Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 65 additions & 21 deletions lib/avro/datum.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,68 @@ public function __construct($expected_schema, $datum)
}
}

/**
* Zigzag implementation to encode longs
* https://en.wikipedia.org/wiki/Variable-length_quantity#Zigzag_encoding
*
* @package Avro
*/
class Zigzag {

const BYTE_SIZE = 8;
const PLATFORM_BITS = PHP_INT_SIZE * self::BYTE_SIZE;

/**
* Implementation of unsigned shift right as PHP does not have the `>>>` operator
*
* @param int $n
* @param int $x
*
* @return int
*/
public static function unsigned_right_shift(int $n, int $x): int
{
return ($n >> $x) ^ (($n >> (self::PLATFORM_BITS -1)) << (self::PLATFORM_BITS - $x));
}

/**
* @param int|string $n
* @return string long $n encoded as bytes
* @internal This relies on 64-bit PHP.
*/
public static function encode_long($n): string
{
$n = (int) $n;
$n = ($n << 1) ^ ($n >> 63);
$str = '';
if (($n & ~0x7F) != 0) {
$str .= chr(($n | 0x80) & 0xFF);
$n = self::unsigned_right_shift($n, 7);

while ($n > 0x7F) {
$str .= chr(($n | 0x80) & 0xFF);
$n = self::unsigned_right_shift($n, 7);
}
}

$str .= chr($n);
return $str;
}

public static function decode_long(array $bytes): int {
$b = array_shift($bytes);
$n = $b & 0x7f;
$shift = 7;
while (0 != ($b & 0x80))
{
$b = array_shift($bytes);
$n |= (($b & 0x7f) << $shift);
$shift += 7;
}
return self::unsigned_right_shift($n, 1) ^ -($n & 1);
}
}

/**
* Exceptions arising from incompatibility between
* reader and writer schemas.
Expand Down Expand Up @@ -304,18 +366,9 @@ static function double_to_long_bits($double)
* @return string long $n encoded as bytes
* @internal This relies on 64-bit PHP.
*/
static public function encode_long($n)
public static function encode_long($n): string
{
$n = (int) $n;
$n = ($n << 1) ^ ($n >> 63);
$str = '';
while (0 != ($n & ~0x7F))
{
$str .= chr(($n & 0x7F) | 0x80);
$n >>= 7;
}
$str .= chr($n);
return $str;
return Zigzag::encode_long($n);
}

/**
Expand Down Expand Up @@ -931,16 +984,7 @@ class AvroIOBinaryDecoder
*/
public static function decode_long_from_array($bytes)
{
$b = array_shift($bytes);
$n = $b & 0x7f;
$shift = 7;
while (0 != ($b & 0x80))
{
$b = array_shift($bytes);
$n |= (($b & 0x7f) << $shift);
$shift += 7;
}
return (($n >> 1) ^ -($n & 1));
return Zigzag::decode_long($bytes);
}

/**
Expand Down
45 changes: 43 additions & 2 deletions test/DatumIOTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,37 @@ function test_datum_round_trip($schema_json, $datum, $binary)
$this->assertEquals($datum, $read_datum);
}

/**
* @dataProvider zigzag_unsigned_right_shift_provider
*/
function test_zigzag_unsigned_right_shift(int $expected, int $n, int $x) {
$this->assertEquals($expected, Zigzag::unsigned_right_shift($n, $x));
}

public static function zigzag_unsigned_right_shift_provider(): array {
return [
[4611686018427387902, -8, 2],
[2, 8, 2],
[144115188075855871, -2, 7],
[1125899906842623, 144115188075855871, 7],
[8796093022207, 1125899906842623, 7],
[68719476735, 8796093022207, 7],
[536870911, 68719476735, 7],
[4194303, 536870911, 7],
[32767, 4194303, 7],
[255, 32767, 7],
[1, 255, 7],
[144115188059078656, -2147483648, 7],
[1125899906711552, 144115188059078656, 7],
[8796093021184, 1125899906711552, 7],
[68719476728, 8796093021184, 7],
[536870911, 68719476728, 7],
[4194303, 536870911, 7],
[32767, 4194303, 7],
[255, 32767, 7],
];
}

/**
* @return array
*/
Expand All @@ -67,11 +98,21 @@ function data_provider()
array('"int"', 1, "\002"),
array('"int"', 2147483647, "\xFE\xFF\xFF\xFF\x0F"),

// array('"long"', (int) -9223372036854775808, "\001"),
array('"long"', (int) -9223372036854775808, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01"),
array('"long"', -(1<<62), "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x7F"),
array('"long"', -4294967295, "\xFD\xFF\xFF\xFF\x1F"),
array('"long"', -10, "\x13"),
array('"long"', -3, "\005"),
array('"long"', -2, "\003"),
array('"long"', -1, "\001"),
array('"long"', 0, "\000"),
array('"long"', 1, "\002"),
// array('"long"', 9223372036854775807, "\002")
array('"long"', 2, "\004"),
array('"long"', 3, "\006"),
array('"long"', 10, "\x14"),
array('"long"', 4294967295, "\xFE\xFF\xFF\xFF\x1F"),
array('"long"', 1<<62, "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01"),
array('"long"', 9223372036854775807, "\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01"),

array('"float"', (float) -10.0, "\000\000 \301"),
array('"float"', (float) -1.0, "\000\000\200\277"),
Expand Down