Skip to content

Commit

Permalink
Merge pull request #24 from ghola/binary-data
Browse files Browse the repository at this point in the history
Added support for binary string
  • Loading branch information
jrbasso authored Jun 9, 2016
2 parents 68c6a72 + db65c9b commit 457e688
Show file tree
Hide file tree
Showing 4 changed files with 217 additions and 8 deletions.
34 changes: 32 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,25 @@ but the output is a string JSON encoded. You can also unserialize the JSON gener
PHP content back.

Supported features:

- Encode/Decode of scalar, null, array
- Encode/Decode of objects
- Encode/Decode of binary data
- Support nested serialization
- Support not declared properties on the original class definition (ie, properties in `stdClass`)
- Support object recursion
- Closures (via 3rd party library. See details below)

Unsupported serialization content:

- Resource (ie, `fopen()` response)
- NAN, INF constants

Limitations:
- Binary String or malformed UTF8 strings (ie, resulsts from `SELECT AES_ENCRYPT(:content, :key) as encrypted`)
- These strings will need to be properly handled by converting to hex using `bin2hex` or `utf8_encode` in the `__sleep()` method

- Binary data containing null bytes (\u0000) as array keys cannot be properly decoded because of a json extension bug:
- https://github.com/remicollet/pecl-json-c/issues/7
- https://github.com/json-c/json-c/issues/108

This project should not be confused with `JsonSerializable` interface added on PHP 5.4. This interface is used on
`json_encode` to encode the objects. There is no unserialization with this interface, differently from this project.
Expand Down Expand Up @@ -60,6 +66,30 @@ Or add the `zumba/json-serializer` directly in your `composer.json` file.

If you are not using composer, you can just copy the files from `src` folder in your project.

## Serializing Binary Strings

Binary strings introduce two special identifiers in the final json: `@utf8encoded` and `@scalar`.
`@utf8encoded` is an array of keys from the original data which have their value (or the keys themselves)
encoded from 8bit to UTF-8. This is how the serializer knows what to encode back from UTF-8 to 8bit when deserializing.
Example:

```php
$data = ['key' => '<binaryvalue>', 'anotherkey' => 'nonbinaryvalue'];
$serializer = new Zumba\JsonSerializer\JsonSerializer();
$json = $serializer->serialize($data);
// $json will contain the content {"key":"<utf8encodedbinaryvalue>","anotherkey":"nonbinaryvalue","@utf8encoded":{"key":1}}
```

`@scalar` is used only when the value to be encoded is not an array or an object but a binary string. Example:

```php
$data = '<binaryvalue>';
$serializer = new Zumba\JsonSerializer\JsonSerializer();
$json = $serializer->serialize($data);
// $json will contain the content {"@scalar":"<utf8encodedbinaryvalue>","@utf8encoded":1}
```


## Serializing Closures

For serializing PHP closures you have to pass an object implementing `SuperClosure\SerializerInterface`.
Expand Down
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
}
],
"require": {
"php": ">=5.4.0"
"php": ">=5.4.0",
"ext-mbstring": "*"
},
"suggest": {
"jeremeamia/superclosure": "Allow to serialize PHP closures"
Expand Down
113 changes: 111 additions & 2 deletions src/JsonSerializer/JsonSerializer.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@ class JsonSerializer

const CLASS_IDENTIFIER_KEY = '@type';
const CLOSURE_IDENTIFIER_KEY = '@closure';
const UTF8ENCODED_IDENTIFIER_KEY = '@utf8encoded';
const SCALAR_IDENTIFIER_KEY = '@scalar';
const FLOAT_ADAPTER = 'JsonSerializerFloatAdapter';

const KEY_UTF8ENCODED = 1;
const VALUE_UTF8ENCODED = 2;

/**
* Storage for object
*
Expand Down Expand Up @@ -73,9 +78,19 @@ public function __construct(ClosureSerializerInterface $closureSerializer = null
public function serialize($value)
{
$this->reset();
$encoded = json_encode($this->serializeData($value), $this->calculateEncodeOptions());
$serializedData = $this->serializeData($value);
$encoded = json_encode($serializedData, $this->calculateEncodeOptions());
if ($encoded === false || json_last_error() != JSON_ERROR_NONE) {
throw new JsonSerializerException('Invalid data to encode to JSON. Error: ' . json_last_error());
if (json_last_error() != JSON_ERROR_UTF8) {
throw new JsonSerializerException('Invalid data to encode to JSON. Error: ' . json_last_error());
}

$serializedData = $this->encodeNonUtf8ToUtf8($serializedData);
$encoded = json_encode($serializedData, $this->calculateEncodeOptions());

if ($encoded === false || json_last_error() != JSON_ERROR_NONE) {
throw new JsonSerializerException('Invalid data to encode to JSON. Error: ' . json_last_error());
}
}
return $this->processEncodedValue($encoded);
}
Expand All @@ -94,6 +109,53 @@ protected function calculateEncodeOptions()
return $options;
}

/**
* @param mixed $serializedData
*
* @return array
*/
protected function encodeNonUtf8ToUtf8($serializedData)
{
if (is_string($serializedData)) {
if (!mb_check_encoding($serializedData, 'UTF-8')) {
$serializedData = [
static::SCALAR_IDENTIFIER_KEY => mb_convert_encoding($serializedData, 'UTF-8', '8bit'),
static::UTF8ENCODED_IDENTIFIER_KEY => static::VALUE_UTF8ENCODED,
];
}

return $serializedData;
}

$encodedKeys = [];
$encodedData = [];
foreach ($serializedData as $key => $value) {
if (is_array($value)) {
$value = $this->encodeNonUtf8ToUtf8($value);
}

if (!mb_check_encoding($key, 'UTF-8')) {
$key = mb_convert_encoding($key, 'UTF-8', '8bit');
$encodedKeys[$key] = (isset($encodedKeys[$key]) ? $encodedKeys[$key] : 0) | static::KEY_UTF8ENCODED;
}

if (is_string($value)) {
if (!mb_check_encoding($value, 'UTF-8')) {
$value = mb_convert_encoding($value, 'UTF-8', '8bit');
$encodedKeys[$key] = (isset($encodedKeys[$key]) ? $encodedKeys[$key] : 0) | static::VALUE_UTF8ENCODED;
}
}

$encodedData[$key] = $value;
}

if ($encodedKeys) {
$encodedData[self::UTF8ENCODED_IDENTIFIER_KEY] = $encodedKeys;
}

return $encodedData;
}

/**
* Execute post-encoding actions
*
Expand Down Expand Up @@ -121,6 +183,11 @@ public function unserialize($value)
if ($data === null && json_last_error() != JSON_ERROR_NONE) {
throw new JsonSerializerException('Invalid JSON to unserialize.');
}

if (mb_strpos($value, static::UTF8ENCODED_IDENTIFIER_KEY) !== false) {
$data = $this->decodeNonUtf8FromUtf8($data);
}

return $this->unserializeData($data);
}

Expand Down Expand Up @@ -249,6 +316,48 @@ protected function unserializeData($value)
return array_map(array($this, __FUNCTION__), $value);
}

/**
* @param mixed $serializedData
*
* @return mixed
*/
protected function decodeNonUtf8FromUtf8($serializedData)
{
if (is_array($serializedData) && isset($serializedData[static::SCALAR_IDENTIFIER_KEY])) {
$serializedData = mb_convert_encoding($serializedData[static::SCALAR_IDENTIFIER_KEY], '8bit', 'UTF-8');
return $serializedData;
} elseif (is_scalar($serializedData) || $serializedData === null) {
return $serializedData;
}

$encodedKeys = [];
if (isset($serializedData[static::UTF8ENCODED_IDENTIFIER_KEY])) {
$encodedKeys = $serializedData[static::UTF8ENCODED_IDENTIFIER_KEY];
unset($serializedData[static::UTF8ENCODED_IDENTIFIER_KEY]);
}

$decodedData = [];
foreach ($serializedData as $key => $value) {
if (is_array($value)) {
$value = $this->decodeNonUtf8FromUtf8($value);
}

if (isset($encodedKeys[$key])) {
$originalKey = $key;
if ($encodedKeys[$key] & static::KEY_UTF8ENCODED) {
$key = mb_convert_encoding($key, '8bit', 'UTF-8');
}
if ($encodedKeys[$originalKey] & static::VALUE_UTF8ENCODED) {
$value = mb_convert_encoding($value, '8bit', 'UTF-8');
}
}

$decodedData[$key] = $value;
}

return $decodedData;
}

/**
* Convert the serialized array into an object
*
Expand Down
75 changes: 72 additions & 3 deletions tests/JsonSerializerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -402,12 +402,81 @@ public function testUnserializeBadJSON()
}

/**
* The test attempts to serialize an array containing a non UTF8 encoded string
* The test attempts to serialize an array containing a NAN
*/
public function testSerializeBadData()
public function testSerializeInvalidData()
{
if (PHP_VERSION_ID < 50500) {
$this->markTestSkipped('PHP 5.4 raises a warning when encoding NAN, which fails the test.');
}

$this->setExpectedException('Zumba\Exception\JsonSerializerException');
$this->serializer->serialize(array(hex2bin('ba')));
$this->serializer->serialize(array(NAN));
}

/**
* @return void
*/
public function testSerializeBinaryStringScalar()
{
$data = '';
for ($i = 0; $i <= 255; $i++) {
$data .= chr($i);
}

$unserialized = $this->serializer->unserialize($this->serializer->serialize($data));
$this->assertSame($data, $unserialized);
}

/**
* @return void
*/
public function testSerializeArrayWithBinaryStringsAsValues()
{
$data = '';
for ($i = 0; $i <= 255; $i++) {
$data .= chr($i);
}

$data = [$data, "$data 1", "$data 2"];
$unserialized = $this->serializer->unserialize($this->serializer->serialize($data));
$this->assertSame($data, $unserialized);
}

/**
* Starting from 1 and not from 0 because php cannot handle the nil character (\u0000) in json keys as per:
* https://github.com/remicollet/pecl-json-c/issues/7
* https://github.com/json-c/json-c/issues/108
*
* @return void
*/
public function testSerializeArrayWithBinaryStringsAsKeys()
{
$data = '';
for ($i = 1; $i <= 255; $i++) {
$data .= chr($i);
}

$data = [$data => $data, "$data 1" => 'something'];
$unserialized = $this->serializer->unserialize($this->serializer->serialize($data));
$this->assertSame($data, $unserialized);
}

/**
* @return void
*/
public function testSerializeObjectWithBinaryStrings()
{
$data = '';
for ($i = 0; $i <= 255; $i++) {
$data .= chr($i);
}

$obj = new \stdClass();
$obj->string = $data;
$unserialized = $this->serializer->unserialize($this->serializer->serialize($obj));
$this->assertInstanceOf('stdClass', $obj);
$this->assertSame($obj->string, $unserialized->string);
}

/*
Expand Down

0 comments on commit 457e688

Please sign in to comment.