chore: Refactored sql parser #2714

jsumners-nr · 2024-11-08T17:38:14Z

This PR is meant to resolve #2703. Basically, it removes the complicated regular expressions in favor of a more procedural parsing of SQL statements. All cross-application tests pass.

main branch benchmark

{
  "lib/db/query-parsers/sql.bench.js": {
    "leading-multi-line-comment-single-line": {
      "numSamples": 199800,
      "mean": 0.0013976976977020214,
      "stdDev": 0.0014156655019840908,
      "max": 0.085,
      "min": 0,
      "5thPercentile": 0.001,
      "95thPercentile": 0.002,
      "median": 0.001
    },
    "leading-multi-line-comment-multiple-lines": {
      "numSamples": 199800,
      "mean": 0.0013618468468510846,
      "stdDev": 0.0006465917362843219,
      "max": 0.015,
      "min": 0,
      "5thPercentile": 0.001,
      "95thPercentile": 0.002,
      "median": 0.001
    },
    "single-embedded-multi-line-comment": {
      "numSamples": 199800,
      "mean": 0.0013247097097138071,
      "stdDev": 0.0006047642355391195,
      "max": 0.016,
      "min": 0,
      "5thPercentile": 0.001,
      "95thPercentile": 0.002,
      "median": 0.001
    },
    "multiple-embedded-multi-line-comments": {
      "numSamples": 199800,
      "mean": 0.0015002302302342982,
      "stdDev": 0.0007180428487002329,
      "max": 0.016,
      "min": 0.001,
      "5thPercentile": 0.001,
      "95thPercentile": 0.002,
      "median": 0.001
    },
    "select-statement": {
      "numSamples": 199800,
      "mean": 0.0011379929929963577,
      "stdDev": 0.0004575930149290276,
      "max": 0.008,
      "min": 0,
      "5thPercentile": 0.001,
      "95thPercentile": 0.002,
      "median": 0.001
    },
    "update-statement": {
      "numSamples": 199800,
      "mean": 0.0012941541541581323,
      "stdDev": 0.0006351718693039712,
      "max": 0.029,
      "min": 0,
      "5thPercentile": 0.001,
      "95thPercentile": 0.002,
      "median": 0.001
    },
    "delete-statement": {
      "numSamples": 199800,
      "mean": 0.0012659209209248082,
      "stdDev": 0.0005621189869591081,
      "max": 0.018,
      "min": 0,
      "5thPercentile": 0.001,
      "95thPercentile": 0.002,
      "median": 0.001
    }
  }
}

pr branch benchmark

{
  "lib/db/query-parsers/sql.bench.js": {
    "leading-multi-line-comment-single-line": {
      "numSamples": 199800,
      "mean": 0.003716836836836951,
      "stdDev": 0.0035813356124759044,
      "max": 0.213,
      "min": 0.002,
      "5thPercentile": 0.003,
      "95thPercentile": 0.005,
      "median": 0.003
    },
    "leading-multi-line-comment-multiple-lines": {
      "numSamples": 199800,
      "mean": 0.0036585835835863433,
      "stdDev": 0.002235811989192826,
      "max": 0.113,
      "min": 0.002,
      "5thPercentile": 0.003,
      "95thPercentile": 0.005,
      "median": 0.003
    },
    "single-embedded-multi-line-comment": {
      "numSamples": 199800,
      "mean": 0.003405565565561924,
      "stdDev": 0.0020176129007566826,
      "max": 0.108,
      "min": 0.002,
      "5thPercentile": 0.003,
      "95thPercentile": 0.004,
      "median": 0.003
    },
    "multiple-embedded-multi-line-comments": {
      "numSamples": 199800,
      "mean": 0.003568348348350056,
      "stdDev": 0.001097445926842406,
      "max": 0.057,
      "min": 0.002,
      "5thPercentile": 0.003,
      "95thPercentile": 0.005,
      "median": 0.003
    },
    "select-statement": {
      "numSamples": 199800,
      "mean": 0.0017923973974009314,
      "stdDev": 0.0007276116250456387,
      "max": 0.025,
      "min": 0.001,
      "5thPercentile": 0.001,
      "95thPercentile": 0.002,
      "median": 0.002
    },
    "update-statement": {
      "numSamples": 199800,
      "mean": 0.0033950950950900212,
      "stdDev": 0.002676351775238991,
      "max": 0.14,
      "min": 0.002,
      "5thPercentile": 0.003,
      "95thPercentile": 0.004,
      "median": 0.003
    },
    "delete-statement": {
      "numSamples": 199800,
      "mean": 0.003193493493486407,
      "stdDev": 0.0012386411666348246,
      "max": 0.059,
      "min": 0.002,
      "5thPercentile": 0.002,
      "95thPercentile": 0.004,
      "median": 0.003
    }
  }
}

codecov · 2024-11-08T20:56:36Z

Codecov Report

All modified and coverable lines are covered by tests ✅

Project coverage is 97.23%. Comparing base (9b6de68) to head (c4b6794).
Report is 8 commits behind head on main.

Additional details and impacted files

@@            Coverage Diff             @@
##             main    #2714      +/-   ##
==========================================
- Coverage   97.24%   97.23%   -0.02%     
==========================================
  Files         294      293       -1     
  Lines       46125    46412     +287     
==========================================
+ Hits        44855    45128     +273     
- Misses       1270     1284      +14

Flag	Coverage Δ
integration-tests-cjs-18.x	`74.03% <32.13%> (-0.25%)`	⬇️
integration-tests-cjs-20.x	`74.05% <32.13%> (-0.25%)`	⬇️
integration-tests-cjs-22.x	`74.08% <32.13%> (-0.26%)`	⬇️
integration-tests-esm-18.x	`49.77% <32.13%> (-0.13%)`	⬇️
integration-tests-esm-20.x	`49.78% <32.13%> (-0.13%)`	⬇️
integration-tests-esm-22.x	`49.80% <32.13%> (-0.13%)`	⬇️
unit-tests-18.x	`88.99% <100.00%> (+0.01%)`	⬆️
unit-tests-20.x	`88.99% <100.00%> (+0.01%)`	⬆️
unit-tests-22.x	`89.00% <100.00%> (?)`
versioned-tests-18.x	`78.98% <86.55%> (-0.16%)`	⬇️
versioned-tests-20.x	`78.98% <86.55%> (-0.16%)`	⬇️
versioned-tests-22.x	`78.99% <86.55%> (-0.16%)`	⬇️

Flags with carried forward coverage won't be shown. Click here to find out more.

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

bizob2828 · 2024-11-14T15:46:31Z

Looking at the benchmarks this is slightly slower

jsumners-nr · 2024-11-14T17:29:32Z

Looking at the benchmarks this is slightly slower

Which is why I haven't really pushed it. We need a replication of the original issue to learn if the cost is worth it.

bizob2828 · 2024-11-14T17:41:43Z

Ag

Looking at the benchmarks this is slightly slower

Which is why I haven't really pushed it. We need a replication of the original issue to learn if the cost is worth it.

Agreed. I think if there's an actual issue and this fixes it, it may be worth investigating. Until we get a repro I don't see the value add in this PR

jsumners-nr added 4 commits November 8, 2024 12:36

chore: Refactored sql parser

b90d315

support non-conforming select

117b462

improve parsing

619d1bf

improve parsing

620a4fb

jsumners-nr added 2 commits November 11, 2024 08:38

properly parse select subqueries

56c5953

oops, possible to have have a space

1850f48

jsumners-nr force-pushed the issue-2703 branch from 5279da9 to 1850f48 Compare November 11, 2024 13:39

support single line comments and fix cte parsing

c4b6794

jsumners-nr added the risk: medium This change impacts senstive areas or has other traits that warrant more careful consideration. label Nov 12, 2024

jsumners-nr mentioned this pull request Nov 13, 2024

Regex is blocking thread #2703

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

chore: Refactored sql parser #2714

chore: Refactored sql parser #2714

jsumners-nr commented Nov 8, 2024 •

edited

Loading

codecov bot commented Nov 8, 2024 •

edited

Loading

bizob2828 commented Nov 14, 2024

jsumners-nr commented Nov 14, 2024

bizob2828 commented Nov 14, 2024 •

edited

Loading

chore: Refactored sql parser #2714

Are you sure you want to change the base?

chore: Refactored sql parser #2714

Conversation

jsumners-nr commented Nov 8, 2024 • edited Loading

codecov bot commented Nov 8, 2024 • edited Loading

Codecov Report

bizob2828 commented Nov 14, 2024

jsumners-nr commented Nov 14, 2024

bizob2828 commented Nov 14, 2024 • edited Loading

jsumners-nr commented Nov 8, 2024 •

edited

Loading

codecov bot commented Nov 8, 2024 •

edited

Loading

bizob2828 commented Nov 14, 2024 •

edited

Loading