From 988a303d646c4e149a882e09264fe9031001813c Mon Sep 17 00:00:00 2001 From: Neil Shweky Date: Thu, 23 Dec 2021 11:26:19 -0500 Subject: [PATCH] RUBY-2855 RUBY-2637 Fix dbref spec tests (#258) * RUBY-2855 dont raise an error on implicit parsing * RUBY-2855 raise error on explicit dbref parsing for invalid type * RUBY-2855 don't accept dbref with invalid db type * RUBY-2855 refactor, raise error on explicit invalid type for db in dbref * RUBY-2855 remove unnecessary condition * RUBY-2855 added fixed tests * RUBY-2855 add trailing space * RUBY-2855 add parse_hash to remainder of dbref hash * RUBY-2855 remove failing tests * RUBY-2855 update error messages * RUBY-2855 make from_bson use implicit decoding * RUBY-2855 fix MRI from_bson call * RUBY-2855 add and update tests * RUBY-2855 dbref nested inside dbref * RUBY-2855 add from_bson validation tests for nested and top-level * Ruby does not generally use is_ prefixes * revert the bypass * stop using renamed constants * DBRef parsing in C * do not convert DBRefs back to Documents * remove unused var * from_bson override is no longer needed * pass validating_keys * fix docstring * instantiate DBRefs under jruby Co-authored-by: Oleg Pudeyev --- ext/bson/bson-native.h | 2 + ext/bson/init.c | 9 ++ ext/bson/read.c | 29 ++++ lib/bson/dbref.rb | 51 +++---- lib/bson/document.rb | 13 ++ lib/bson/ext_json.rb | 23 +-- lib/bson/hash.rb | 12 +- spec/bson/dbref_spec.rb | 197 +++++++++++++++++++++++-- spec/spec_tests/data/corpus/dbref.json | 22 ++- spec/spec_tests/data/corpus/top.json | 8 - 10 files changed, 309 insertions(+), 57 deletions(-) diff --git a/ext/bson/bson-native.h b/ext/bson/bson-native.h index 6bbc809c9..04ac14a44 100644 --- a/ext/bson/bson-native.h +++ b/ext/bson/bson-native.h @@ -123,3 +123,5 @@ extern VALUE rb_bson_registry; extern VALUE rb_bson_illegal_key; extern const rb_data_type_t rb_byte_buffer_data_type; + +extern VALUE _ref_str, _id_str, _db_str; diff --git a/ext/bson/init.c b/ext/bson/init.c index 2d217d9fa..ce5ab2d61 100644 --- a/ext/bson/init.c +++ b/ext/bson/init.c @@ -30,6 +30,8 @@ const rb_data_type_t rb_byte_buffer_data_type = { { NULL, rb_bson_byte_buffer_free, rb_bson_byte_buffer_memsize } }; +VALUE _ref_str, _id_str, _db_str; + /** * Initialize the bson_native extension. */ @@ -37,6 +39,13 @@ void Init_bson_native() { char rb_bson_machine_id[256]; + _ref_str = rb_str_new_cstr("$ref"); + rb_gc_register_mark_object(_ref_str); + _id_str = rb_str_new_cstr("$id"); + rb_gc_register_mark_object(_id_str); + _db_str = rb_str_new_cstr("$db"); + rb_gc_register_mark_object(_db_str); + VALUE rb_bson_module = rb_define_module("BSON"); /* Document-class: BSON::ByteBuffer diff --git a/ext/bson/read.c b/ext/bson/read.c index d47009915..57d105889 100644 --- a/ext/bson/read.c +++ b/ext/bson/read.c @@ -356,6 +356,30 @@ VALUE rb_bson_byte_buffer_get_decimal128_bytes(VALUE self) return bytes; } +/** + * This duplicates the DBRef validation code in DBRef constructor. + */ +static int pvt_is_dbref(VALUE doc) { + VALUE ref, id, db; + + ref = rb_hash_aref(doc, _ref_str); + if (NIL_P(ref) || !RB_TYPE_P(ref, T_STRING)) { + return 0; + } + + id = rb_hash_aref(doc, _id_str); + if (NIL_P(id)) { + return 0; + } + + db = rb_hash_aref(doc, _db_str); + if (!NIL_P(db) && !RB_TYPE_P(db, T_STRING)) { + return 0; + } + + return 1; +} + VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){ VALUE doc = Qnil; byte_buffer_t *b = NULL; @@ -380,6 +404,11 @@ VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){ if (READ_PTR(b) - start_ptr != length) { pvt_raise_decode_error(rb_sprintf("Expected to read %d bytes for the hash but read %ld bytes", length, READ_PTR(b) - start_ptr)); } + + if (pvt_is_dbref(doc)) { + VALUE cDBRef = pvt_const_get_2("BSON", "DBRef"); + doc = rb_funcall(cDBRef, rb_intern("new"), 1, doc); + } return doc; } diff --git a/lib/bson/dbref.rb b/lib/bson/dbref.rb index c9fe72eb6..296baca34 100644 --- a/lib/bson/dbref.rb +++ b/lib/bson/dbref.rb @@ -22,27 +22,33 @@ class DBRef < Document include JSON # The constant for the collection reference field. + # + # @deprecated COLLECTION = '$ref'.freeze # The constant for the id field. + # + # @deprecated ID = '$id'.freeze # The constant for the database field. + # + # @deprecated DATABASE = '$db'.freeze # @return [ String ] collection The collection name. def collection - self[COLLECTION] + self['$ref'] end # @return [ BSON::ObjectId ] id The referenced document id. def id - self[ID] + self['$id'] end # @return [ String ] database The database name. def database - self[DATABASE] + self['$db'] end # Get the DBRef as a JSON document @@ -62,11 +68,22 @@ def as_json(*args) # # @param [ Hash ] hash the DBRef hash. It must contain $collection and $id. def initialize(hash) - [COLLECTION, ID].each do |key| + %w($ref $id).each do |key| unless hash[key] - raise ArgumentError, "DBRefs must have a #{key}" + raise ArgumentError, "DBRef must have #{key}: #{hash}" + end + end + + unless hash['$ref'].is_a?(String) + raise ArgumentError, "The value for key $ref must be a string, got: #{hash['$ref']}" + end + + if db = hash['$db'] + unless db.is_a?(String) + raise ArgumentError, "The value for key $db must be a string, got: #{hash['$db']}" end end + super(hash) end @@ -78,29 +95,9 @@ def initialize(hash) # @param [ BSON::ByteBuffer ] buffer The encoded BSON buffer to append to. # @param [ true, false ] validating_keys Whether keys should be validated when serializing. # - # @return [ String ] The raw BSON. + # @return [ BSON::ByteBuffer ] The buffer with the encoded object. def to_bson(buffer = ByteBuffer.new, validating_keys = Config.validating_keys?) - as_json.to_bson(buffer) - end - - module ClassMethods - - # Deserialize the hash from BSON, converting to a DBRef if appropriate. - # - # @param [ String ] buffer The bson representing a hash. - # - # @return [ Hash, DBRef ] The decoded hash or DBRef. - # - # @see http://bsonspec.org/#/specification - def from_bson(buffer, **options) - decoded = super - if decoded[COLLECTION] - decoded = DBRef.new(decoded) - end - decoded - end + as_json.to_bson(buffer, validating_keys) end end - - ::Hash.send(:extend, DBRef::ClassMethods) end diff --git a/lib/bson/document.rb b/lib/bson/document.rb index a4eae628a..8166b4a09 100644 --- a/lib/bson/document.rb +++ b/lib/bson/document.rb @@ -320,6 +320,19 @@ def symbolize_keys! raise ArgumentError, 'symbolize_keys! is not supported on BSON::Document instances. Please convert the document to hash first (using #to_h), then call #symbolize_keys! on the Hash instance' end + # Override the Hash implementation of to_bson_normalized_value. + # + # BSON::Document is already of the correct type and already provides + # indifferent access to keys, hence no further conversions are necessary. + # + # Attempting to perform Hash's conversion on Document instances converts + # DBRefs to Documents which is wrong. + # + # @return [ BSON::Document ] The normalized hash. + def to_bson_normalized_value + self + end + private def convert_key(key) diff --git a/lib/bson/ext_json.rb b/lib/bson/ext_json.rb index 68dd0abcb..a461a3602 100644 --- a/lib/bson/ext_json.rb +++ b/lib/bson/ext_json.rb @@ -139,7 +139,7 @@ module ExtJSON return {} end - if hash.key?('$ref') + if dbref?(hash) # Legacy dbref handling. # Note that according to extended json spec, only hash values (but # not the top-level BSON document itself) may be of type "dbref". @@ -148,10 +148,6 @@ module ExtJSON # logic to top level hashes doesn't cause harm. hash = hash.dup ref = hash.delete('$ref') - # $ref can be a string value or an ObjectId - unless ref.is_a?(String) - raise Error::ExtJSONParseError, "Invalid $ref value: #{ref}" - end # $id, if present, can be anything id = hash.delete('$id') if id.is_a?(Hash) @@ -164,13 +160,9 @@ module ExtJSON out = {'$ref' => ref, '$id' => id} if hash.key?('$db') # $db must always be a string, if provided - db = hash.delete('$db') - unless db.is_a?(String) - raise Error::ExtJSONParseError, "Invalid $db value: #{db}" - end - out['$db'] = db + out['$db'] = hash.delete('$db') end - return out.update(hash) + return out.update(parse_hash(hash)) end if hash.length == 1 @@ -383,5 +375,14 @@ module ExtJSON module_function def create_regexp(pattern, options) Regexp::Raw.new(pattern, options) end + + module_function def dbref?(hash) + if db = hash.key?('$db') + unless db.is_a?(String) + return false + end + end + return hash['$ref']&.is_a?(String) && hash.key?('$id') + end end end diff --git a/lib/bson/hash.rb b/lib/bson/hash.rb index be04e4372..d9b71791e 100644 --- a/lib/bson/hash.rb +++ b/lib/bson/hash.rb @@ -71,7 +71,7 @@ def to_bson(buffer = ByteBuffer.new, validating_keys = Config.validating_keys?) # @example Convert the hash to a normalized value. # hash.to_bson_normalized_value # - # @return [ BSON::Document ] The normazlied hash. + # @return [ BSON::Document ] The normalized hash. # # @since 3.0.0 def to_bson_normalized_value @@ -128,6 +128,16 @@ def from_bson(buffer, **options) if actual_byte_size != expected_byte_size raise Error::BSONDecodeError, "Expected hash to take #{expected_byte_size} bytes but it took #{actual_byte_size} bytes" end + + if hash['$ref'] && hash['$id'] + # We're doing implicit decoding here. If the document is an invalid + # dbref, we should decode it as a BSON::Document. + begin + hash = DBRef.new(hash) + rescue ArgumentError + end + end + hash end end diff --git a/spec/bson/dbref_spec.rb b/spec/bson/dbref_spec.rb index 792af8897..1701e6dd9 100644 --- a/spec/bson/dbref_spec.rb +++ b/spec/bson/dbref_spec.rb @@ -92,7 +92,7 @@ it 'raises an error' do expect do dbref - end.to raise_error(ArgumentError, /DBRefs must have a \$ref/) + end.to raise_error(ArgumentError, /DBRef must have \$ref/) end end @@ -104,7 +104,31 @@ it 'raises an error' do expect do dbref - end.to raise_error(ArgumentError, /DBRefs must have a \$id/) + end.to raise_error(ArgumentError, /DBRef must have \$id/) + end + end + + context 'when providing an invalid type for ref' do + let(:hash) do + { '$ref' => 1, '$id' => object_id } + end + + it 'raises an error' do + expect do + dbref + end.to raise_error(ArgumentError, /The value for key \$ref must be a string/) + end + end + + context 'when providing an invalid type for database' do + let(:hash) do + { '$ref' => 'users', '$id' => object_id, '$db' => 1 } + end + + it 'raises an error' do + expect do + dbref + end.to raise_error(ArgumentError, /The value for key \$db must be a string/) end end end @@ -159,7 +183,7 @@ describe '#from_bson' do let(:buffer) do - dbref.to_bson + hash.to_bson end let(:decoded) do @@ -168,8 +192,8 @@ context 'when a database exists' do - let(:dbref) do - described_class.new({ '$ref' => 'users', '$id' => object_id, '$db' => 'database' }) + let(:hash) do + { '$ref' => 'users', '$id' => object_id, '$db' => 'database' } end it 'decodes the ref' do @@ -183,12 +207,16 @@ it 'decodes the database' do expect(decoded.database).to eq('database') end + + it 'is of class DBRef' do + expect(decoded).to be_a described_class + end end context 'when no database exists' do - let(:dbref) do - described_class.new({ '$ref' => 'users', '$id' => object_id }) + let(:hash) do + { '$ref' => 'users', '$id' => object_id } end it 'decodes the ref' do @@ -202,17 +230,168 @@ it 'sets the database to nil' do expect(decoded.database).to be_nil end + + it 'is of class DBRef' do + expect(decoded).to be_a described_class + end end context 'when other keys exist' do - let(:dbref) do - described_class.new({ '$ref' => 'users', '$id' => object_id, 'x' => 'y' }) + let(:hash) do + { '$ref' => 'users', '$id' => object_id, 'x' => 'y' } end it 'decodes the key' do expect(decoded['x']).to eq('y') end + + it 'is of class DBRef' do + expect(decoded).to be_a described_class + end + end + + context 'when it is an invalid dbref' do + + shared_examples 'bson document' do + it 'should not raise' do + expect do + decoded + end.to_not raise_error + end + + it 'has the correct class' do + expect(decoded).to be_a BSON::Document + expect(decoded).to_not be_a described_class + end + end + + context 'when the hash has invalid collection type' do + let(:hash) do + { '$ref' => 1, '$id' => object_id } + end + include_examples 'bson document' + end + + context 'when the hash has an invalid database type' do + let(:hash) do + { '$ref' => 'users', '$id' => object_id, '$db' => 1 } + end + include_examples 'bson document' + end + + context 'when the hash is missing a collection' do + let(:hash) do + { '$id' => object_id } + end + include_examples 'bson document' + end + + context 'when the hash is missing an id' do + let(:hash) do + { '$ref' => 'users' } + end + include_examples 'bson document' + end + end + + context 'when nesting the dbref' do + + context 'when it is a valid dbref' do + let(:hash) do + { 'dbref' => { '$ref' => 'users', '$id' => object_id } } + end + + it 'should not raise' do + expect do + buffer + end.to_not raise_error + end + + it 'has the correct class' do + expect(decoded['dbref']).to be_a described_class + end + end + + context 'when it is an invalid dbref' do + + shared_examples 'nested bson document' do + it 'should not raise' do + expect do + decoded + end.to_not raise_error + end + + it 'has the correct class' do + expect(decoded['dbref']).to be_a BSON::Document + expect(decoded['dbref']).to_not be_a described_class + end + end + + context 'when the hash has invalid collection type' do + let(:hash) do + { 'dbref' => { '$ref' => 1, '$id' => object_id } } + end + include_examples 'nested bson document' + end + + context 'when the hash has an invalid database type' do + let(:hash) do + { 'dbref' => { '$ref' => 'users', '$id' => object_id, '$db' => 1 } } + end + include_examples 'nested bson document' + end + + context 'when the hash is missing a collection' do + let(:hash) do + { 'dbref' => { '$id' => object_id } } + end + include_examples 'nested bson document' + end + + context 'when the hash is missing an id' do + let(:hash) do + { 'dbref' => { '$ref' => 'users' } } + end + include_examples 'nested bson document' + end + end + end + + context 'when nesting a dbref inside a dbref' do + context 'when it is a valid dbref' do + let(:hash) do + { 'dbref1' => { '$ref' => 'users', '$id' => object_id, 'dbref2' => { '$ref' => 'users', '$id' => object_id } } } + end + + it 'should not raise' do + expect do + buffer + end.to_not raise_error + end + + it 'has the correct class' do + expect(decoded['dbref1']).to be_a described_class + expect(decoded['dbref1']['dbref2']).to be_a described_class + end + end + + context 'when it is an invalid dbref' do + let(:hash) do + { 'dbref' => { '$ref' => 'users', '$id' => object_id, 'dbref' => { '$ref' => 1, '$id' => object_id } } } + end + + it 'should not raise' do + expect do + decoded + end.to_not raise_error + end + + it 'has the correct class' do + expect(decoded['dbref']).to be_a described_class + expect(decoded['dbref']['dbref']).to be_a BSON::Document + end + end end end end diff --git a/spec/spec_tests/data/corpus/dbref.json b/spec/spec_tests/data/corpus/dbref.json index 1fe12c6f6..41c0b09d0 100644 --- a/spec/spec_tests/data/corpus/dbref.json +++ b/spec/spec_tests/data/corpus/dbref.json @@ -1,5 +1,5 @@ { - "description": "DBRef", + "description": "Document type (DBRef sub-documents)", "bson_type": "0x03", "valid": [ { @@ -26,6 +26,26 @@ "description": "Document with key names similar to those of a DBRef", "canonical_bson": "3e0000000224726566000c0000006e6f742d612d646272656600072469640058921b3e6e32ab156a22b59e022462616e616e6100050000007065656c0000", "canonical_extjson": "{\"$ref\": \"not-a-dbref\", \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}, \"$banana\": \"peel\"}" + }, + { + "description": "DBRef with additional dollar-prefixed and dotted fields", + "canonical_bson": "48000000036462726566003c0000000224726566000b000000636f6c6c656374696f6e00072469640058921b3e6e32ab156a22b59e10612e62000100000010246300010000000000", + "canonical_extjson": "{\"dbref\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}, \"a.b\": {\"$numberInt\": \"1\"}, \"$c\": {\"$numberInt\": \"1\"}}}" + }, + { + "description": "Sub-document resembles DBRef but $id is missing", + "canonical_bson": "26000000036462726566001a0000000224726566000b000000636f6c6c656374696f6e000000", + "canonical_extjson": "{\"dbref\": {\"$ref\": \"collection\"}}" + }, + { + "description": "Sub-document resembles DBRef but $ref is not a string", + "canonical_bson": "2c000000036462726566002000000010247265660001000000072469640058921b3e6e32ab156a22b59e0000", + "canonical_extjson": "{\"dbref\": {\"$ref\": {\"$numberInt\": \"1\"}, \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}}}" + }, + { + "description": "Sub-document resembles DBRef but $db is not a string", + "canonical_bson": "4000000003646272656600340000000224726566000b000000636f6c6c656374696f6e00072469640058921b3e6e32ab156a22b59e1024646200010000000000", + "canonical_extjson": "{\"dbref\": {\"$ref\": \"collection\", \"$id\": {\"$oid\": \"58921b3e6e32ab156a22b59e\"}, \"$db\": {\"$numberInt\": \"1\"}}}" } ] } diff --git a/spec/spec_tests/data/corpus/top.json b/spec/spec_tests/data/corpus/top.json index 56fb441c9..3566bbede 100644 --- a/spec/spec_tests/data/corpus/top.json +++ b/spec/spec_tests/data/corpus/top.json @@ -199,14 +199,6 @@ "description": "Bad $date (extra field)", "string": "{\"a\" : {\"$date\" : {\"$numberLong\" : \"1356351330501\"}, \"unrelated\": true}}" }, - { - "description": "Bad DBRef (ref is number, not string)", - "string": "{\"x\" : {\"$ref\" : 42, \"$id\" : \"abc\"}}" - }, - { - "description": "Bad DBRef (db is number, not string)", - "string": "{\"x\" : {\"$ref\" : \"a\", \"$id\" : \"abc\", \"$db\" : 42}}" - }, { "description": "Bad $minKey (boolean, not integer)", "string": "{\"a\" : {\"$minKey\" : true}}"