diff --git a/README.md b/README.md index 7029d64..c311848 100644 --- a/README.md +++ b/README.md @@ -10,19 +10,25 @@ but the output is a string JSON encoded. You can also unserialize the JSON gener PHP content back. Supported features: + - Encode/Decode of scalar, null, array - Encode/Decode of objects +- Encode/Decode of binary data - Support nested serialization - Support not declared properties on the original class definition (ie, properties in `stdClass`) - Support object recursion - Closures (via 3rd party library. See details below) Unsupported serialization content: + - Resource (ie, `fopen()` response) +- NAN, INF constants Limitations: -- Binary String or malformed UTF8 strings (ie, resulsts from `SELECT AES_ENCRYPT(:content, :key) as encrypted`) - - These strings will need to be properly handled by converting to hex using `bin2hex` or `utf8_encode` in the `__sleep()` method + +- Binary data containing null bytes (\u0000) as array keys cannot be properly decoded because of a json extension bug: + - https://github.com/remicollet/pecl-json-c/issues/7 + - https://github.com/json-c/json-c/issues/108 This project should not be confused with `JsonSerializable` interface added on PHP 5.4. This interface is used on `json_encode` to encode the objects. There is no unserialization with this interface, differently from this project. @@ -60,6 +66,30 @@ Or add the `zumba/json-serializer` directly in your `composer.json` file. If you are not using composer, you can just copy the files from `src` folder in your project. +## Serializing Binary Strings + +Binary strings introduce two special identifiers in the final json: `@utf8encoded` and `@scalar`. +`@utf8encoded` is an array of keys from the original data which have their value (or the keys themselves) +encoded from 8bit to UTF-8. This is how the serializer knows what to encode back from UTF-8 to 8bit when deserializing. +Example: + +```php +$data = ['key' => '', 'anotherkey' => 'nonbinaryvalue']; +$serializer = new Zumba\JsonSerializer\JsonSerializer(); +$json = $serializer->serialize($data); +// $json will contain the content {"key":"","anotherkey":"nonbinaryvalue","@utf8encoded":{"key":1}} +``` + +`@scalar` is used only when the value to be encoded is not an array or an object but a binary string. Example: + +```php +$data = ''; +$serializer = new Zumba\JsonSerializer\JsonSerializer(); +$json = $serializer->serialize($data); +// $json will contain the content {"@scalar":"","@utf8encoded":1} +``` + + ## Serializing Closures For serializing PHP closures you have to pass an object implementing `SuperClosure\SerializerInterface`. diff --git a/composer.json b/composer.json index 7ced06b..1642998 100644 --- a/composer.json +++ b/composer.json @@ -16,7 +16,8 @@ } ], "require": { - "php": ">=5.4.0" + "php": ">=5.4.0", + "ext-mbstring": "*" }, "suggest": { "jeremeamia/superclosure": "Allow to serialize PHP closures" diff --git a/src/JsonSerializer/JsonSerializer.php b/src/JsonSerializer/JsonSerializer.php index f8dc0d4..1de9028 100644 --- a/src/JsonSerializer/JsonSerializer.php +++ b/src/JsonSerializer/JsonSerializer.php @@ -13,8 +13,13 @@ class JsonSerializer const CLASS_IDENTIFIER_KEY = '@type'; const CLOSURE_IDENTIFIER_KEY = '@closure'; + const UTF8ENCODED_IDENTIFIER_KEY = '@utf8encoded'; + const SCALAR_IDENTIFIER_KEY = '@scalar'; const FLOAT_ADAPTER = 'JsonSerializerFloatAdapter'; + const KEY_UTF8ENCODED = 1; + const VALUE_UTF8ENCODED = 2; + /** * Storage for object * @@ -73,9 +78,19 @@ public function __construct(ClosureSerializerInterface $closureSerializer = null public function serialize($value) { $this->reset(); - $encoded = json_encode($this->serializeData($value), $this->calculateEncodeOptions()); + $serializedData = $this->serializeData($value); + $encoded = json_encode($serializedData, $this->calculateEncodeOptions()); if ($encoded === false || json_last_error() != JSON_ERROR_NONE) { - throw new JsonSerializerException('Invalid data to encode to JSON. Error: ' . json_last_error()); + if (json_last_error() != JSON_ERROR_UTF8) { + throw new JsonSerializerException('Invalid data to encode to JSON. Error: ' . json_last_error()); + } + + $serializedData = $this->encodeNonUtf8ToUtf8($serializedData); + $encoded = json_encode($serializedData, $this->calculateEncodeOptions()); + + if ($encoded === false || json_last_error() != JSON_ERROR_NONE) { + throw new JsonSerializerException('Invalid data to encode to JSON. Error: ' . json_last_error()); + } } return $this->processEncodedValue($encoded); } @@ -94,6 +109,53 @@ protected function calculateEncodeOptions() return $options; } + /** + * @param mixed $serializedData + * + * @return array + */ + protected function encodeNonUtf8ToUtf8($serializedData) + { + if (is_string($serializedData)) { + if (!mb_check_encoding($serializedData, 'UTF-8')) { + $serializedData = [ + static::SCALAR_IDENTIFIER_KEY => mb_convert_encoding($serializedData, 'UTF-8', '8bit'), + static::UTF8ENCODED_IDENTIFIER_KEY => static::VALUE_UTF8ENCODED, + ]; + } + + return $serializedData; + } + + $encodedKeys = []; + $encodedData = []; + foreach ($serializedData as $key => $value) { + if (is_array($value)) { + $value = $this->encodeNonUtf8ToUtf8($value); + } + + if (!mb_check_encoding($key, 'UTF-8')) { + $key = mb_convert_encoding($key, 'UTF-8', '8bit'); + $encodedKeys[$key] = (isset($encodedKeys[$key]) ? $encodedKeys[$key] : 0) | static::KEY_UTF8ENCODED; + } + + if (is_string($value)) { + if (!mb_check_encoding($value, 'UTF-8')) { + $value = mb_convert_encoding($value, 'UTF-8', '8bit'); + $encodedKeys[$key] = (isset($encodedKeys[$key]) ? $encodedKeys[$key] : 0) | static::VALUE_UTF8ENCODED; + } + } + + $encodedData[$key] = $value; + } + + if ($encodedKeys) { + $encodedData[self::UTF8ENCODED_IDENTIFIER_KEY] = $encodedKeys; + } + + return $encodedData; + } + /** * Execute post-encoding actions * @@ -121,6 +183,11 @@ public function unserialize($value) if ($data === null && json_last_error() != JSON_ERROR_NONE) { throw new JsonSerializerException('Invalid JSON to unserialize.'); } + + if (mb_strpos($value, static::UTF8ENCODED_IDENTIFIER_KEY) !== false) { + $data = $this->decodeNonUtf8FromUtf8($data); + } + return $this->unserializeData($data); } @@ -249,6 +316,48 @@ protected function unserializeData($value) return array_map(array($this, __FUNCTION__), $value); } + /** + * @param mixed $serializedData + * + * @return mixed + */ + protected function decodeNonUtf8FromUtf8($serializedData) + { + if (is_array($serializedData) && isset($serializedData[static::SCALAR_IDENTIFIER_KEY])) { + $serializedData = mb_convert_encoding($serializedData[static::SCALAR_IDENTIFIER_KEY], '8bit', 'UTF-8'); + return $serializedData; + } elseif (is_scalar($serializedData) || $serializedData === null) { + return $serializedData; + } + + $encodedKeys = []; + if (isset($serializedData[static::UTF8ENCODED_IDENTIFIER_KEY])) { + $encodedKeys = $serializedData[static::UTF8ENCODED_IDENTIFIER_KEY]; + unset($serializedData[static::UTF8ENCODED_IDENTIFIER_KEY]); + } + + $decodedData = []; + foreach ($serializedData as $key => $value) { + if (is_array($value)) { + $value = $this->decodeNonUtf8FromUtf8($value); + } + + if (isset($encodedKeys[$key])) { + $originalKey = $key; + if ($encodedKeys[$key] & static::KEY_UTF8ENCODED) { + $key = mb_convert_encoding($key, '8bit', 'UTF-8'); + } + if ($encodedKeys[$originalKey] & static::VALUE_UTF8ENCODED) { + $value = mb_convert_encoding($value, '8bit', 'UTF-8'); + } + } + + $decodedData[$key] = $value; + } + + return $decodedData; + } + /** * Convert the serialized array into an object * diff --git a/tests/JsonSerializerTest.php b/tests/JsonSerializerTest.php index 57632cd..831da11 100644 --- a/tests/JsonSerializerTest.php +++ b/tests/JsonSerializerTest.php @@ -402,12 +402,81 @@ public function testUnserializeBadJSON() } /** - * The test attempts to serialize an array containing a non UTF8 encoded string + * The test attempts to serialize an array containing a NAN */ - public function testSerializeBadData() + public function testSerializeInvalidData() { + if (PHP_VERSION_ID < 50500) { + $this->markTestSkipped('PHP 5.4 raises a warning when encoding NAN, which fails the test.'); + } + $this->setExpectedException('Zumba\Exception\JsonSerializerException'); - $this->serializer->serialize(array(hex2bin('ba'))); + $this->serializer->serialize(array(NAN)); + } + + /** + * @return void + */ + public function testSerializeBinaryStringScalar() + { + $data = ''; + for ($i = 0; $i <= 255; $i++) { + $data .= chr($i); + } + + $unserialized = $this->serializer->unserialize($this->serializer->serialize($data)); + $this->assertSame($data, $unserialized); + } + + /** + * @return void + */ + public function testSerializeArrayWithBinaryStringsAsValues() + { + $data = ''; + for ($i = 0; $i <= 255; $i++) { + $data .= chr($i); + } + + $data = [$data, "$data 1", "$data 2"]; + $unserialized = $this->serializer->unserialize($this->serializer->serialize($data)); + $this->assertSame($data, $unserialized); + } + + /** + * Starting from 1 and not from 0 because php cannot handle the nil character (\u0000) in json keys as per: + * https://github.com/remicollet/pecl-json-c/issues/7 + * https://github.com/json-c/json-c/issues/108 + * + * @return void + */ + public function testSerializeArrayWithBinaryStringsAsKeys() + { + $data = ''; + for ($i = 1; $i <= 255; $i++) { + $data .= chr($i); + } + + $data = [$data => $data, "$data 1" => 'something']; + $unserialized = $this->serializer->unserialize($this->serializer->serialize($data)); + $this->assertSame($data, $unserialized); + } + + /** + * @return void + */ + public function testSerializeObjectWithBinaryStrings() + { + $data = ''; + for ($i = 0; $i <= 255; $i++) { + $data .= chr($i); + } + + $obj = new \stdClass(); + $obj->string = $data; + $unserialized = $this->serializer->unserialize($this->serializer->serialize($obj)); + $this->assertInstanceOf('stdClass', $obj); + $this->assertSame($obj->string, $unserialized->string); } /*