From bdde9c0bebcd0ba26a33fb06bd3d0cce85fd6a08 Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Wed, 19 Apr 2023 10:58:56 +0200 Subject: [PATCH 1/2] Use json_schemer to validate schema --- Gemfile | 2 +- Gemfile.lock | 13 ++++++++++--- app/runners/result_constructor.rb | 10 +++++----- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/Gemfile b/Gemfile index a01d8095ce..f80bd10ad5 100644 --- a/Gemfile +++ b/Gemfile @@ -37,7 +37,7 @@ gem 'image_processing', '~> 1.12.2' gem 'bootsnap', '~> 1.16.0', require: false # used to validate container responses -gem 'json-schema', '~> 3.0.0' +gem 'json_schemer', '~> 0.2.24' # delayed jobs gem 'delayed_job_active_record', '~> 4.1.7' diff --git a/Gemfile.lock b/Gemfile.lock index cb658a636b..385fbb97ed 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -160,6 +160,8 @@ GEM docker-api (2.2.0) excon (>= 0.47.0) multi_json + ecma-re-validator (0.4.0) + regexp_parser (~> 2.2) ed25519 (1.3.0) erubi (1.12.0) exception_notification (4.5.0) @@ -185,6 +187,7 @@ GEM glob (0.4.0) globalid (1.1.0) activesupport (>= 5.0) + hana (1.3.7) has_scope (0.8.1) actionpack (>= 5.2) activesupport (>= 5.2) @@ -220,8 +223,11 @@ GEM bindata faraday (~> 2.0) faraday-follow_redirects - json-schema (3.0.0) - addressable (>= 2.8) + json_schemer (0.2.24) + ecma-re-validator (~> 0.3) + hana (~> 1.3) + regexp_parser (~> 2.0) + uri_template (~> 0.7) jwt (2.7.0) kramdown (2.4.0) rexml @@ -466,6 +472,7 @@ GEM tzinfo (2.0.6) concurrent-ruby (~> 1.0) unicode-display_width (2.4.2) + uri_template (0.7.0) validate_email (0.1.6) activemodel (>= 3.0) mail (>= 2.2.5) @@ -536,7 +543,7 @@ DEPENDENCIES image_processing (~> 1.12.2) jbuilder (~> 2.11.5) jsbundling-rails (~> 1.1.1) - json-schema (~> 3.0.0) + json_schemer (~> 0.2.24) jwt (~> 2.7.0) kramdown (~> 2.4.0) kramdown-parser-gfm (~> 1.1.0) diff --git a/app/runners/result_constructor.rb b/app/runners/result_constructor.rb index 2934489dd2..5dea98afc7 100644 --- a/app/runners/result_constructor.rb +++ b/app/runners/result_constructor.rb @@ -1,5 +1,5 @@ require 'json' # JSON support -require 'json-schema' # json schema validation, from json-schema gem +require 'json_schemer' # json schema validation class ResultConstructorError < StandardError attr_accessor :title, :description @@ -12,8 +12,8 @@ def initialize(title, description = nil) end class ResultConstructor - FULL_SCHEMA = JSON.parse(Rails.public_path.join('schemas/judge_output.json').read) - PART_SCHEMA = JSON.parse(Rails.public_path.join('schemas/partial_output.json').read) + FULL_SCHEMER = JSONSchemer.schema(Rails.public_path.join('schemas/judge_output.json')) + PART_SCHEMER = JSONSchemer.schema(Rails.public_path.join('schemas/partial_output.json')) LEVELSA = %i[judgement tab context testcase test].freeze LEVELSH = { judgement: 0, tab: 1, context: 2, testcase: 3, test: 4 }.freeze @@ -29,9 +29,9 @@ def feed(judge_output) raise ResultConstructorError, 'No judge output' if judge_output.empty? split_jsons(judge_output).each do |json| - if JSON::Validator.validate(PART_SCHEMA, json) + if PART_SCHEMER.valid?(json.deep_stringify_keys) update(json) - elsif JSON::Validator.validate(FULL_SCHEMA, json) + elsif FULL_SCHEMER.valid?(json.deep_stringify_keys) @result = json else raise ResultConstructorError.new( From 641b37a1c602020a02a2c8b32bfe0c74b133f73d Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Wed, 19 Apr 2023 11:59:49 +0200 Subject: [PATCH 2/2] Don't run validation on each command in production In production environments, we no longer run JSON schema validation on all the output of the judge. This saves about 0.5-1s of time on each submission. Instead, we run a validation after everything has been constructed, since we still don't want invalid data in our systems. --- app/runners/result_constructor.rb | 54 +++++++++++++++++----- config/application.rb | 3 ++ config/environments/production.rb | 3 ++ public/schemas/judge_output.json | 2 +- test/runners/result_constructor_test.rb | 59 ++++++++++++++++++++++--- 5 files changed, 102 insertions(+), 19 deletions(-) diff --git a/app/runners/result_constructor.rb b/app/runners/result_constructor.rb index 5dea98afc7..0f8bec4c1b 100644 --- a/app/runners/result_constructor.rb +++ b/app/runners/result_constructor.rb @@ -19,25 +19,49 @@ class ResultConstructor LEVELSH = { judgement: 0, tab: 1, context: 2, testcase: 3, test: 4 }.freeze GATHER = { tab: :groups, context: :groups, testcase: :groups, test: :tests }.freeze - def initialize(locale) + def initialize(locale, full_check: false) @locale = locale @level = nil @result = {} + @full_check = full_check || Rails.configuration.slow_judge_results_validation end def feed(judge_output) raise ResultConstructorError, 'No judge output' if judge_output.empty? - split_jsons(judge_output).each do |json| - if PART_SCHEMER.valid?(json.deep_stringify_keys) - update(json) - elsif FULL_SCHEMER.valid?(json.deep_stringify_keys) - @result = json - else - raise ResultConstructorError.new( - 'Judge output is not a valid json', - json.to_s - ) + if @full_check + # In development, we validate each update object. + split_jsons(judge_output).each do |json| + if PART_SCHEMER.valid?(json.deep_stringify_keys) + update(json) + elsif FULL_SCHEMER.valid?(json.deep_stringify_keys) + @result = json + else + raise ResultConstructorError.new( + 'Judge output is not a valid json', + json.to_s + ) + end + end + else + # In production, we do one validation at the end (in result) + split_jsons(judge_output).each do |json| + if json.key?(:command) + begin + # Clone the object to have better errors, since the update method + # may modify the json hash. + update(json.clone) + rescue StandardError + # We rescue all errors, since the json may have invalid data, resulting + # in stuff like TypeErrors, NoMethodErrors or KeyErrors + raise ResultConstructorError.new( + 'Judge output is not a valid json', + json.to_s + ) + end + else + @result = json + end end end end @@ -56,6 +80,14 @@ def result(timeout) close_tab(badgeCount: @tab[:badgeCount] || 1) if @level == :tab close_judgement(accepted: false, status: status) if @level == :judgement + # Before finalizing the result, check if it is valid. + unless FULL_SCHEMER.valid?(@result.deep_stringify_keys) + raise ResultConstructorError.new( + 'Judge output is not a valid json', + @result.to_s + ) + end + @result end diff --git a/config/application.rb b/config/application.rb index 7c9eda4730..6d0329d0f8 100644 --- a/config/application.rb +++ b/config/application.rb @@ -47,5 +47,8 @@ class Application < Rails::Application config.active_storage.queues.purge = :default config.action_view.default_form_builder = "StandardFormBuilder" + + # Should the slower but fuller validation of judge results happen + config.slow_judge_results_validation = true end end diff --git a/config/environments/production.rb b/config/environments/production.rb index 150d318c7a..c29553ea3d 100644 --- a/config/environments/production.rb +++ b/config/environments/production.rb @@ -145,4 +145,7 @@ config.action_mailer.deliver_later_queue_name = 'default' config.submissions_storage_path = Rails.root.join('data', 'storage', 'submissions') + + # Should the slower but fuller validation of judge results happen + config.slow_judge_results_validation = false end diff --git a/public/schemas/judge_output.json b/public/schemas/judge_output.json index 31923a9f38..b05eca33f1 100644 --- a/public/schemas/judge_output.json +++ b/public/schemas/judge_output.json @@ -88,7 +88,7 @@ "type": { "$ref": "#/definitions/severity" }, "rows": { "$ref": "#/definitions/index" }, "columns": { "$ref": "#/definitions/index" }, - "externalUrl": { "type": "string" } + "externalUrl": { "type": ["string", "null"] } } }, "test-format": { diff --git a/test/runners/result_constructor_test.rb b/test/runners/result_constructor_test.rb index b574f76252..51f263b5fb 100644 --- a/test/runners/result_constructor_test.rb +++ b/test/runners/result_constructor_test.rb @@ -3,13 +3,11 @@ require 'result_constructor' class ResultConstructorTest < ActiveSupport::TestCase - MINIMAL_FULL_S = - - test 'empty output should fail' do - assert_raises ResultConstructorError do - construct_result(['']) - end + test 'empty output should fail' do + assert_raises ResultConstructorError do + construct_result(['']) end + end test 'empty json should fail' do assert_raises ResultConstructorError do @@ -46,6 +44,41 @@ class ResultConstructorTest < ActiveSupport::TestCase end end + test 'intentionally annoyingly wrong json should fail' do + assert_raises ResultConstructorError do + construct_result([ + # This command is invalid + '{ "command": "start-nothing" }', + '{ "title": "Test", "command": "start-tab" }', + '{ "description": { "format": "code", "description": "..." }, "command": "start-context" }', + '{ "description": { "format": "plain", "description": "" }, "command": "start-testcase" }', + '{ "expected": "70", "command": "start-test" }' + ]) + end + + assert_raises ResultConstructorError do + construct_result([ + # There is a type in "command" + '{ "commund": "start-judgement" }', + '{ "title": "Test", "command": "start-tab" }', + '{ "description": { "format": "code", "description": "..." }, "command": "start-context" }', + '{ "description": { "format": "plain", "description": "" }, "command": "start-testcase" }', + '{ "expected": "70", "command": "start-test" }' + ]) + end + + assert_raises ResultConstructorError do + construct_result([ + '{ "command": "start-judgement" }', + '{ "title": "Test", "command": "start-tab" }', + # A nested object is invalid: a description is a list + '{ "description": ["yes"], "command": "start-context" }', + '{ "description": { "format": "plain", "description": "" }, "command": "start-testcase" }', + '{ "expected": "70", "command": "start-test" }' + ]) + end + end + test 'partial output should accumulated status' do assert_equal({ accepted: false, @@ -339,11 +372,23 @@ class ResultConstructorTest < ActiveSupport::TestCase end end + protected + + def create_rc(locale: 'en') + ResultConstructor.new(locale) + end + private def construct_result(food, locale: 'en', timeout: false) - rc = ResultConstructor.new(locale) + rc = create_rc(locale: locale) food.each { |f| rc.feed f } rc.result(timeout) end end + +class CheckingResultConstructorTest < ResultConstructorTest + def create_rc(locale: 'en') + ResultConstructor.new(locale, full_check: true) + end +end