From 650249ed29ec03773d1c7e27a4dd715e9cffc04f Mon Sep 17 00:00:00 2001
From: Steve Chavez <stevechavezast@gmail.com>
Date: Fri, 8 Mar 2024 16:02:28 -0500
Subject: [PATCH] perf: remove json_typeof (#3316)

---
 CHANGELOG.md                        |  1 +
 src/PostgREST/Query/SqlFragment.hs  | 31 ++++++++++++++++-------------
 test/pgbench/2676/new.sql           | 16 +++++++++++++++
 test/pgbench/2676/old.sql           | 17 ++++++++++++++++
 test/pgbench/README.md              |  4 ++--
 test/spec/Feature/Query/PlanSpec.hs |  4 ++--
 6 files changed, 55 insertions(+), 18 deletions(-)
 create mode 100644 test/pgbench/2676/new.sql
 create mode 100644 test/pgbench/2676/old.sql

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d99466b374..02c6b07c62 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ This project adheres to [Semantic Versioning](http://semver.org/).
  - #3061, Apply all function settings as transaction-scoped settings - @taimoorzaeem
  - #3171, #3046, Log schema cache stats to stderr - @steve-chavez
  - #3210, Dump schema cache through admin API - @taimoorzaeem
+ - #2676, Performance improvement on bulk json inserts, around 10% increase on requests per second by removing `json_typeof` from write queries - @steve-chavez
 
 ### Fixed
 
diff --git a/src/PostgREST/Query/SqlFragment.hs b/src/PostgREST/Query/SqlFragment.hs
index cdf28558aa..bc6e483661 100644
--- a/src/PostgREST/Query/SqlFragment.hs
+++ b/src/PostgREST/Query/SqlFragment.hs
@@ -300,19 +300,17 @@ fromJsonBodyF :: Maybe LBS.ByteString -> [CoercibleField] -> Bool -> Bool -> Boo
 fromJsonBodyF body fields includeSelect includeLimitOne includeDefaults =
   (if includeSelect then "SELECT " <> namedCols <> " " else mempty) <>
   "FROM (SELECT " <> jsonPlaceHolder <> " AS json_data) pgrst_payload, " <>
-  -- convert a json object into a json array, this way we can use json_to_recordset for all json payloads
-  -- Otherwise we'd have to use json_to_record for json objects and json_to_recordset for json arrays
-  -- We do this in SQL to avoid processing the JSON in application code
-  "LATERAL (SELECT CASE WHEN " <> jsonTypeofF <> "(pgrst_payload.json_data) = 'array' THEN pgrst_payload.json_data ELSE " <> jsonBuildArrayF <> "(pgrst_payload.json_data) END AS val) pgrst_uniform_json, " <>
   (if includeDefaults
-    then "LATERAL (SELECT jsonb_agg(jsonb_build_object(" <> defsJsonb <> ") || elem) AS val from jsonb_array_elements(pgrst_uniform_json.val) elem) pgrst_json_defs, "
+    then if isJsonObject
+      then "LATERAL (SELECT " <> defsJsonb <> " || pgrst_payload.json_data AS val) pgrst_json_defs, "
+      else "LATERAL (SELECT jsonb_agg(" <> defsJsonb <> " || elem) AS val from jsonb_array_elements(pgrst_payload.json_data) elem) pgrst_json_defs, "
     else mempty) <>
   "LATERAL (SELECT " <> parsedCols <> " FROM " <>
-    (if null fields
-      -- When we are inserting no columns (e.g. using default values), we can't use our ordinary `json_to_recordset`
-      -- because it can't extract records with no columns (there's no valid syntax for the `AS (colName colType,...)`
-      -- part). But we still need to ensure as many rows are created as there are array elements.
-      then SQL.sql $ jsonArrayElementsF <> "(" <> finalBodyF <> ") _ "
+    (if null fields -- when json keys are empty, e.g. when payload is `{}` or `[{}, {}]`
+      then SQL.sql $
+        if isJsonObject
+          then "(values(1)) _ "                                  -- only 1 row for an empty json object '{}'
+          else jsonArrayElementsF <> "(" <> finalBodyF <> ") _ " -- extract rows of a json array of empty objects `[{}, {}]`
       else jsonToRecordsetF <> "(" <> SQL.sql finalBodyF <> ") AS _(" <> typedCols <> ") " <> if includeLimitOne then "LIMIT 1" else mempty
     ) <>
   ") pgrst_body "
@@ -320,16 +318,21 @@ fromJsonBodyF body fields includeSelect includeLimitOne includeDefaults =
     namedCols = intercalateSnippet ", " $ fromQi  . QualifiedIdentifier "pgrst_body" . cfName <$> fields
     parsedCols = intercalateSnippet ", " $ pgFmtCoerceNamed <$> fields
     typedCols = intercalateSnippet ", " $ pgFmtIdent . cfName <> const " " <> SQL.sql . encodeUtf8 . cfIRType <$> fields
-    defsJsonb = SQL.sql $ BS.intercalate "," fieldsWDefaults
+    defsJsonb = SQL.sql $ "jsonb_build_object(" <> BS.intercalate "," fieldsWDefaults <> ")"
     fieldsWDefaults = mapMaybe (\case
         CoercibleField{cfName=nam, cfDefault=Just def} -> Just $ encodeUtf8 (pgFmtLit nam <> ", " <> def)
         CoercibleField{cfDefault=Nothing} -> Nothing
       ) fields
-    (finalBodyF, jsonTypeofF, jsonBuildArrayF, jsonArrayElementsF, jsonToRecordsetF) =
+    (finalBodyF, jsonArrayElementsF, jsonToRecordsetF) =
       if includeDefaults
-        then ("pgrst_json_defs.val", "jsonb_typeof", "jsonb_build_array", "jsonb_array_elements", "jsonb_to_recordset")
-        else ("pgrst_uniform_json.val", "json_typeof", "json_build_array", "json_array_elements", "json_to_recordset")
+        then ("pgrst_json_defs.val", "jsonb_array_elements", if isJsonObject then "jsonb_to_record" else "jsonb_to_recordset")
+        else ("pgrst_payload.json_data", "json_array_elements", if isJsonObject then "json_to_record" else "json_to_recordset")
     jsonPlaceHolder = SQL.encoderAndParam (HE.nullable $ if includeDefaults then HE.jsonbLazyBytes else HE.jsonLazyBytes) body
+    isJsonObject = -- light validation as pg's json_to_record(set) already validates that the body is valid JSON. We just need to know whether the body looks like an object or not.
+      let
+        insignificantWhitespace = [32,9,10,13] --" \t\n\r" [32,9,10,13] https://datatracker.ietf.org/doc/html/rfc8259#section-2
+      in
+      LBS.take 1 (LBS.dropWhile (`elem` insignificantWhitespace) (fromMaybe mempty body)) == "{"
 
 pgFmtOrderTerm :: QualifiedIdentifier -> CoercibleOrderTerm -> SQL.Snippet
 pgFmtOrderTerm qi ot =
diff --git a/test/pgbench/2676/new.sql b/test/pgbench/2676/new.sql
new file mode 100644
index 0000000000..91350d6f75
--- /dev/null
+++ b/test/pgbench/2676/new.sql
@@ -0,0 +1,16 @@
+WITH pgrst_source AS (
+  INSERT INTO "test"."complex_items"("arr_data", "field-with_sep", "id", "name")
+  SELECT "pgrst_body"."arr_data", "pgrst_body"."field-with_sep", "pgrst_body"."id", "pgrst_body"."name"
+  FROM (SELECT '[{"id": 4, "name": "Vier"}, {"id": 5, "name": "Funf", "arr_data": null}, {"id": 6, "name": "Sechs", "arr_data": [1, 2, 3], "field-with_sep": 6}]'::json AS json_data) pgrst_payload,
+  LATERAL (SELECT "arr_data", "field-with_sep", "id", "name" FROM json_to_recordset(pgrst_payload.json_data) AS _("arr_data" integer[], "field-with_sep" integer, "id" bigint, "name" text) ) pgrst_body
+  RETURNING "test"."complex_items".*
+)
+SELECT
+  '' AS total_result_set,
+  pg_catalog.count(_postgrest_t) AS page_total,
+  array[]::text[] AS header,
+  coalesce(json_agg(_postgrest_t), '[]') AS body,
+  nullif(current_setting('response.headers', true), '') AS response_headers,
+  nullif(current_setting('response.status', true), '') AS response_status,
+  '' AS response_inserted
+FROM (SELECT "complex_items".* FROM "pgrst_source" AS "complex_items") _postgrest_t;
diff --git a/test/pgbench/2676/old.sql b/test/pgbench/2676/old.sql
new file mode 100644
index 0000000000..e215e9b7b2
--- /dev/null
+++ b/test/pgbench/2676/old.sql
@@ -0,0 +1,17 @@
+WITH pgrst_source AS (
+  INSERT INTO "test"."complex_items"("arr_data", "field-with_sep", "id", "name")
+  SELECT "pgrst_body"."arr_data", "pgrst_body"."field-with_sep", "pgrst_body"."id", "pgrst_body"."name"
+  FROM (SELECT '[{"id": 4, "name": "Vier"}, {"id": 5, "name": "Funf", "arr_data": null}, {"id": 6, "name": "Sechs", "arr_data": [1, 2, 3], "field-with_sep": 6}]'::json AS json_data) pgrst_payload,
+  LATERAL (SELECT CASE WHEN json_typeof(pgrst_payload.json_data) = 'array' THEN pgrst_payload.json_data ELSE json_build_array(pgrst_payload.json_data) END AS val) pgrst_uniform_json,
+  LATERAL (SELECT "arr_data", "field-with_sep", "id", "name" FROM json_to_recordset(pgrst_uniform_json.val) AS _("arr_data" integer[], "field-with_sep" integer, "id" bigint, "name" text) ) pgrst_body
+  RETURNING "test"."complex_items".*
+)
+SELECT
+  '' AS total_result_set,
+  pg_catalog.count(_postgrest_t) AS page_total,
+  array[]::text[] AS header,
+  coalesce(json_agg(_postgrest_t), '[]') AS body,
+  nullif(current_setting('response.headers', true), '') AS response_headers,
+  nullif(current_setting('response.status', true), '') AS response_status,
+  '' AS response_inserted
+FROM (SELECT "complex_items".* FROM "pgrst_source" AS "complex_items") _postgrest_t;
diff --git a/test/pgbench/README.md b/test/pgbench/README.md
index 7b143f4e19..d0d6a71bae 100644
--- a/test/pgbench/README.md
+++ b/test/pgbench/README.md
@@ -3,9 +3,9 @@
 Can be used as:
 
 ```
-postgrest-with-postgresql-15 -f test/pgbench/fixtures.sql pgbench -n -T 10 -f test/pgbench/1567/old.sql
+postgrest-with-postgresql-15 -f test/pgbench/fixtures.sql pgbench -U postgres -n -T 10 -f test/pgbench/1567/old.sql
 
-postgrest-with-postgresql-15 -f test/pgbench/fixtures.sql pgbench -n -T 10 -f test/pgbench/1567/new.sql
+postgrest-with-postgresql-15 -f test/pgbench/fixtures.sql pgbench -U postgres -n -T 10 -f test/pgbench/1567/new.sql
 ```
 
 ## Directory structure
diff --git a/test/spec/Feature/Query/PlanSpec.hs b/test/spec/Feature/Query/PlanSpec.hs
index de63dccebf..d9e78d7709 100644
--- a/test/spec/Feature/Query/PlanSpec.hs
+++ b/test/spec/Feature/Query/PlanSpec.hs
@@ -152,7 +152,7 @@ spec actualPgVersion = do
       liftIO $ do
         resHeaders `shouldSatisfy` elem ("Content-Type", "application/vnd.pgrst.plan+json; for=\"application/json\"; charset=utf-8")
         resStatus `shouldBe` Status { statusCode = 200, statusMessage="OK" }
-        totalCost `shouldBe` 3.27
+        totalCost `shouldBe` 0.06
 
     it "outputs the total cost for an update" $ do
       r <- request methodPatch "/projects?id=eq.3"
@@ -165,7 +165,7 @@ spec actualPgVersion = do
       liftIO $ do
         resHeaders `shouldSatisfy` elem ("Content-Type", "application/vnd.pgrst.plan+json; for=\"application/json\"; charset=utf-8")
         resStatus `shouldBe` Status { statusCode = 200, statusMessage="OK" }
-        totalCost `shouldBe` 12.45
+        totalCost `shouldBe` 8.23
 
     it "outputs the total cost for a delete" $ do
       r <- request methodDelete "/projects?id=in.(1,2,3)"