From bdde9c0bebcd0ba26a33fb06bd3d0cce85fd6a08 Mon Sep 17 00:00:00 2001
From: Niko Strijbol <strijbol.niko@gmail.com>
Date: Wed, 19 Apr 2023 10:58:56 +0200
Subject: [PATCH 1/2] Use json_schemer to validate schema

---
 Gemfile                           |  2 +-
 Gemfile.lock                      | 13 ++++++++++---
 app/runners/result_constructor.rb | 10 +++++-----
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/Gemfile b/Gemfile
index a01d8095ce..f80bd10ad5 100644
--- a/Gemfile
+++ b/Gemfile
@@ -37,7 +37,7 @@ gem 'image_processing', '~> 1.12.2'
 gem 'bootsnap', '~> 1.16.0', require: false
 
 # used to validate container responses
-gem 'json-schema', '~> 3.0.0'
+gem 'json_schemer', '~> 0.2.24'
 
 # delayed jobs
 gem 'delayed_job_active_record', '~> 4.1.7'
diff --git a/Gemfile.lock b/Gemfile.lock
index cb658a636b..385fbb97ed 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -160,6 +160,8 @@ GEM
     docker-api (2.2.0)
       excon (>= 0.47.0)
       multi_json
+    ecma-re-validator (0.4.0)
+      regexp_parser (~> 2.2)
     ed25519 (1.3.0)
     erubi (1.12.0)
     exception_notification (4.5.0)
@@ -185,6 +187,7 @@ GEM
     glob (0.4.0)
     globalid (1.1.0)
       activesupport (>= 5.0)
+    hana (1.3.7)
     has_scope (0.8.1)
       actionpack (>= 5.2)
       activesupport (>= 5.2)
@@ -220,8 +223,11 @@ GEM
       bindata
       faraday (~> 2.0)
       faraday-follow_redirects
-    json-schema (3.0.0)
-      addressable (>= 2.8)
+    json_schemer (0.2.24)
+      ecma-re-validator (~> 0.3)
+      hana (~> 1.3)
+      regexp_parser (~> 2.0)
+      uri_template (~> 0.7)
     jwt (2.7.0)
     kramdown (2.4.0)
       rexml
@@ -466,6 +472,7 @@ GEM
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
     unicode-display_width (2.4.2)
+    uri_template (0.7.0)
     validate_email (0.1.6)
       activemodel (>= 3.0)
       mail (>= 2.2.5)
@@ -536,7 +543,7 @@ DEPENDENCIES
   image_processing (~> 1.12.2)
   jbuilder (~> 2.11.5)
   jsbundling-rails (~> 1.1.1)
-  json-schema (~> 3.0.0)
+  json_schemer (~> 0.2.24)
   jwt (~> 2.7.0)
   kramdown (~> 2.4.0)
   kramdown-parser-gfm (~> 1.1.0)
diff --git a/app/runners/result_constructor.rb b/app/runners/result_constructor.rb
index 2934489dd2..5dea98afc7 100644
--- a/app/runners/result_constructor.rb
+++ b/app/runners/result_constructor.rb
@@ -1,5 +1,5 @@
 require 'json' # JSON support
-require 'json-schema' # json schema validation, from json-schema gem
+require 'json_schemer' # json schema validation
 
 class ResultConstructorError < StandardError
   attr_accessor :title, :description
@@ -12,8 +12,8 @@ def initialize(title, description = nil)
 end
 
 class ResultConstructor
-  FULL_SCHEMA = JSON.parse(Rails.public_path.join('schemas/judge_output.json').read)
-  PART_SCHEMA = JSON.parse(Rails.public_path.join('schemas/partial_output.json').read)
+  FULL_SCHEMER = JSONSchemer.schema(Rails.public_path.join('schemas/judge_output.json'))
+  PART_SCHEMER = JSONSchemer.schema(Rails.public_path.join('schemas/partial_output.json'))
 
   LEVELSA = %i[judgement tab context testcase test].freeze
   LEVELSH = { judgement: 0, tab: 1, context: 2, testcase: 3, test: 4 }.freeze
@@ -29,9 +29,9 @@ def feed(judge_output)
     raise ResultConstructorError, 'No judge output' if judge_output.empty?
 
     split_jsons(judge_output).each do |json|
-      if JSON::Validator.validate(PART_SCHEMA, json)
+      if PART_SCHEMER.valid?(json.deep_stringify_keys)
         update(json)
-      elsif JSON::Validator.validate(FULL_SCHEMA, json)
+      elsif FULL_SCHEMER.valid?(json.deep_stringify_keys)
         @result = json
       else
         raise ResultConstructorError.new(

From 641b37a1c602020a02a2c8b32bfe0c74b133f73d Mon Sep 17 00:00:00 2001
From: Niko Strijbol <strijbol.niko@gmail.com>
Date: Wed, 19 Apr 2023 11:59:49 +0200
Subject: [PATCH 2/2] Don't run validation on each command in production

In production environments, we no longer run JSON schema validation on
all the output of the judge. This saves about 0.5-1s of time on each
submission.

Instead, we run a validation after everything has been constructed,
since we still don't want invalid data in our systems.
---
 app/runners/result_constructor.rb       | 54 +++++++++++++++++-----
 config/application.rb                   |  3 ++
 config/environments/production.rb       |  3 ++
 public/schemas/judge_output.json        |  2 +-
 test/runners/result_constructor_test.rb | 59 ++++++++++++++++++++++---
 5 files changed, 102 insertions(+), 19 deletions(-)

diff --git a/app/runners/result_constructor.rb b/app/runners/result_constructor.rb
index 5dea98afc7..0f8bec4c1b 100644
--- a/app/runners/result_constructor.rb
+++ b/app/runners/result_constructor.rb
@@ -19,25 +19,49 @@ class ResultConstructor
   LEVELSH = { judgement: 0, tab: 1, context: 2, testcase: 3, test: 4 }.freeze
   GATHER = { tab: :groups, context: :groups, testcase: :groups, test: :tests }.freeze
 
-  def initialize(locale)
+  def initialize(locale, full_check: false)
     @locale = locale
     @level = nil
     @result = {}
+    @full_check = full_check || Rails.configuration.slow_judge_results_validation
   end
 
   def feed(judge_output)
     raise ResultConstructorError, 'No judge output' if judge_output.empty?
 
-    split_jsons(judge_output).each do |json|
-      if PART_SCHEMER.valid?(json.deep_stringify_keys)
-        update(json)
-      elsif FULL_SCHEMER.valid?(json.deep_stringify_keys)
-        @result = json
-      else
-        raise ResultConstructorError.new(
-          'Judge output is not a valid json',
-          json.to_s
-        )
+    if @full_check
+      # In development, we validate each update object.
+      split_jsons(judge_output).each do |json|
+        if PART_SCHEMER.valid?(json.deep_stringify_keys)
+          update(json)
+        elsif FULL_SCHEMER.valid?(json.deep_stringify_keys)
+          @result = json
+        else
+          raise ResultConstructorError.new(
+            'Judge output is not a valid json',
+            json.to_s
+          )
+        end
+      end
+    else
+      # In production, we do one validation at the end (in result)
+      split_jsons(judge_output).each do |json|
+        if json.key?(:command)
+          begin
+            # Clone the object to have better errors, since the update method
+            # may modify the json hash.
+            update(json.clone)
+          rescue StandardError
+            # We rescue all errors, since the json may have invalid data, resulting
+            # in stuff like TypeErrors, NoMethodErrors or KeyErrors
+            raise ResultConstructorError.new(
+              'Judge output is not a valid json',
+              json.to_s
+            )
+          end
+        else
+          @result = json
+        end
       end
     end
   end
@@ -56,6 +80,14 @@ def result(timeout)
     close_tab(badgeCount: @tab[:badgeCount] || 1) if @level == :tab
     close_judgement(accepted: false, status: status) if @level == :judgement
 
+    # Before finalizing the result, check if it is valid.
+    unless FULL_SCHEMER.valid?(@result.deep_stringify_keys)
+      raise ResultConstructorError.new(
+        'Judge output is not a valid json',
+        @result.to_s
+      )
+    end
+
     @result
   end
 
diff --git a/config/application.rb b/config/application.rb
index 7c9eda4730..6d0329d0f8 100644
--- a/config/application.rb
+++ b/config/application.rb
@@ -47,5 +47,8 @@ class Application < Rails::Application
     config.active_storage.queues.purge    = :default
 
     config.action_view.default_form_builder = "StandardFormBuilder"
+
+    # Should the slower but fuller validation of judge results happen
+    config.slow_judge_results_validation = true
   end
 end
diff --git a/config/environments/production.rb b/config/environments/production.rb
index 150d318c7a..c29553ea3d 100644
--- a/config/environments/production.rb
+++ b/config/environments/production.rb
@@ -145,4 +145,7 @@
   config.action_mailer.deliver_later_queue_name = 'default'
 
   config.submissions_storage_path = Rails.root.join('data', 'storage', 'submissions')
+
+  # Should the slower but fuller validation of judge results happen
+  config.slow_judge_results_validation = false
 end
diff --git a/public/schemas/judge_output.json b/public/schemas/judge_output.json
index 31923a9f38..b05eca33f1 100644
--- a/public/schemas/judge_output.json
+++ b/public/schemas/judge_output.json
@@ -88,7 +88,7 @@
         "type":    { "$ref": "#/definitions/severity" },
         "rows":    { "$ref": "#/definitions/index" },
         "columns": { "$ref": "#/definitions/index" },
-        "externalUrl":    { "type": "string" }
+        "externalUrl":    { "type": ["string", "null"] }
       }
     },
     "test-format": {
diff --git a/test/runners/result_constructor_test.rb b/test/runners/result_constructor_test.rb
index b574f76252..51f263b5fb 100644
--- a/test/runners/result_constructor_test.rb
+++ b/test/runners/result_constructor_test.rb
@@ -3,13 +3,11 @@
 require 'result_constructor'
 
 class ResultConstructorTest < ActiveSupport::TestCase
-  MINIMAL_FULL_S =
-
-    test 'empty output should fail' do
-      assert_raises ResultConstructorError do
-        construct_result([''])
-      end
+  test 'empty output should fail' do
+    assert_raises ResultConstructorError do
+      construct_result([''])
     end
+  end
 
   test 'empty json should fail' do
     assert_raises ResultConstructorError do
@@ -46,6 +44,41 @@ class ResultConstructorTest < ActiveSupport::TestCase
     end
   end
 
+  test 'intentionally annoyingly wrong json should fail' do
+    assert_raises ResultConstructorError do
+      construct_result([
+        # This command is invalid
+        '{ "command": "start-nothing" }',
+        '{ "title": "Test", "command": "start-tab" }',
+        '{ "description": { "format": "code", "description": "..." }, "command": "start-context" }',
+        '{ "description": { "format": "plain", "description": "" }, "command": "start-testcase" }',
+        '{ "expected": "70", "command": "start-test" }'
+      ])
+    end
+
+    assert_raises ResultConstructorError do
+      construct_result([
+        # There is a type in "command"
+        '{ "commund": "start-judgement" }',
+        '{ "title": "Test", "command": "start-tab" }',
+        '{ "description": { "format": "code", "description": "..." }, "command": "start-context" }',
+        '{ "description": { "format": "plain", "description": "" }, "command": "start-testcase" }',
+        '{ "expected": "70", "command": "start-test" }'
+      ])
+    end
+
+    assert_raises ResultConstructorError do
+      construct_result([
+        '{ "command": "start-judgement" }',
+        '{ "title": "Test", "command": "start-tab" }',
+        # A nested object is invalid: a description is a list
+        '{ "description": ["yes"], "command": "start-context" }',
+        '{ "description": { "format": "plain", "description": "" }, "command": "start-testcase" }',
+        '{ "expected": "70", "command": "start-test" }'
+      ])
+    end
+  end
+
   test 'partial output should accumulated status' do
     assert_equal({
       accepted: false,
@@ -339,11 +372,23 @@ class ResultConstructorTest < ActiveSupport::TestCase
     end
   end
 
+  protected
+
+  def create_rc(locale: 'en')
+    ResultConstructor.new(locale)
+  end
+
   private
 
   def construct_result(food, locale: 'en', timeout: false)
-    rc = ResultConstructor.new(locale)
+    rc = create_rc(locale: locale)
     food.each { |f| rc.feed f }
     rc.result(timeout)
   end
 end
+
+class CheckingResultConstructorTest < ResultConstructorTest
+  def create_rc(locale: 'en')
+    ResultConstructor.new(locale, full_check: true)
+  end
+end