diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..23a10178 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,19 @@ +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates +version: 2 +updates: + - package-ecosystem: 'github-actions' + directory: '.github/workflows' + schedule: + interval: 'weekly' + day: 'monday' + groups: + workflows: + dependency-type: 'development' + - package-ecosystem: 'npm' + directory: '/' + schedule: + interval: 'weekly' + day: 'monday' + groups: + dev-dependencies: + dependency-type: 'development' diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 827969fe..00000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,29 +0,0 @@ -# See https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages -name: release -on: - workflow_dispatch: - inputs: - version: - type: string - required: true - description: 'Version to release. Released package is based on the version suffix: -web, -common, -node' -# TODO: trigger on release, currently it's just manual dispatch -# release: -# types: [created] -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - # Setup .npmrc file to publish to npm - - uses: actions/setup-node@v3 - with: - node-version: '16.x' - registry-url: 'https://registry.npmjs.org' - - run: npm config set access public - - run: npm i --ignore-scripts - - name: Build package and prepare package.json - run: NODE_OPTIONS="-r ts-node/register" node .build/build_and_prepare.ts ${{ github.event.inputs.version }} - - run: npm publish --access public - env: - NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 294bee65..5b8b4920 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -12,7 +12,19 @@ on: schedule: - cron: '43 12 * * 6' push: - branches: [ "main" ] + branches: + - main + paths-ignore: + - '**/*.md' + - 'LICENSE' + - 'benchmarks/**' + - 'examples/**' + pull_request: + paths-ignore: + - '**/*.md' + - 'LICENSE' + - 'benchmarks/**' + - 'examples/**' workflow_dispatch: # Declare default permissions as read only. @@ -32,13 +44,13 @@ jobs: # actions: read steps: - - name: "Checkout code" - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - name: 'Checkout code' + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false - - name: "Run analysis" - uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 + - name: 'Run analysis' + uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0 with: results_file: results.sarif results_format: sarif @@ -59,8 +71,8 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - - name: "Upload artifact" - uses: actions/upload-artifact@97a0fba1372883ab732affbe8f94b823f91727db # v3.pre.node20 + - name: 'Upload artifact' + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v3.pre.node20 with: name: SARIF file path: results.sarif @@ -68,7 +80,7 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - - name: "Upload to code-scanning" + - name: 'Upload to code-scanning' uses: github/codeql-action/upload-sarif@v3 with: sarif_file: results.sarif diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 74c25a21..58af1093 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,5 +1,6 @@ name: 'tests' +permissions: {} on: workflow_dispatch: push: @@ -67,7 +68,7 @@ jobs: - uses: actions/checkout@main - name: Start ClickHouse (version - ${{ matrix.clickhouse }}) in Docker - uses: isbang/compose-action@v1.5.1 + uses: isbang/compose-action@v2.0.2 env: CLICKHOUSE_VERSION: ${{ matrix.clickhouse }} with: @@ -100,7 +101,7 @@ jobs: - uses: actions/checkout@main - name: Start ClickHouse (version - ${{ matrix.clickhouse }}) in Docker - uses: isbang/compose-action@v1.5.1 + uses: isbang/compose-action@v2.0.2 env: CLICKHOUSE_VERSION: ${{ matrix.clickhouse }} with: @@ -141,7 +142,7 @@ jobs: - uses: actions/checkout@main - name: Start ClickHouse cluster (version - ${{ matrix.clickhouse }}) in Docker - uses: isbang/compose-action@v1.5.1 + uses: isbang/compose-action@v2.0.2 env: CLICKHOUSE_VERSION: ${{ matrix.clickhouse }} with: @@ -172,7 +173,7 @@ jobs: - uses: actions/checkout@main - name: Start ClickHouse cluster (version - ${{ matrix.clickhouse }}) in Docker - uses: isbang/compose-action@v1.5.1 + uses: isbang/compose-action@v2.0.2 env: CLICKHOUSE_VERSION: ${{ matrix.clickhouse }} with: @@ -222,7 +223,6 @@ jobs: web-integration-tests-cloud-smt: needs: node-unit-tests runs-on: ubuntu-latest - permissions: write-all steps: - uses: actions/checkout@main @@ -263,7 +263,7 @@ jobs: fetch-depth: 0 - name: Start ClickHouse (version - ${{ matrix.clickhouse }}) in Docker - uses: isbang/compose-action@v1.5.1 + uses: isbang/compose-action@v2.0.2 with: compose-file: 'docker-compose.yml' down-flags: '--volumes' diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f2ee2b6..1ef56960 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,66 @@ +# 1.7.0 (Common, Node.js, Web) + +## Bug fixes + +- (Web only) Fixed an issue where streaming large datasets could provide corrupted results. See [#333](https://github.com/ClickHouse/clickhouse-js/pull/333) (PR) for more details. + +## New features + +- Added `JSONEachRowWithProgress` format support, `ProgressRow` interface, and `isProgressRow` type guard. See [this Node.js example](./examples/node/select_json_each_row_with_progress.ts) for more details. It should work similarly with the Web version. +- (Experimental) Exposed the `parseColumnType` function that takes a string representation of a ClickHouse type (e.g., `FixedString(16)`, `Nullable(Int32)`, etc.) and returns an AST-like object that represents the type. For example: + + ```ts + for (const type of [ + 'Int32', + 'Array(Nullable(String))', + `Map(Int32, DateTime64(9, 'UTC'))`, + ]) { + console.log(`##### Source ClickHouse type: ${type}`) + console.log(parseColumnType(type)) + } + ``` + + The above code will output: + + ``` + ##### Source ClickHouse type: Int32 + { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' } + ##### Source ClickHouse type: Array(Nullable(String)) + { + type: 'Array', + value: { + type: 'Nullable', + sourceType: 'Nullable(String)', + value: { type: 'Simple', columnType: 'String', sourceType: 'String' } + }, + dimensions: 1, + sourceType: 'Array(Nullable(String))' + } + ##### Source ClickHouse type: Map(Int32, DateTime64(9, 'UTC')) + { + type: 'Map', + key: { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + value: { + type: 'DateTime64', + timezone: 'UTC', + precision: 9, + sourceType: "DateTime64(9, 'UTC')" + }, + sourceType: "Map(Int32, DateTime64(9, 'UTC'))" + } + ``` + + While the original intention was to use this function internally for `Native`/`RowBinaryWithNamesAndTypes` data formats headers parsing, it can be useful for other purposes as well (e.g., interfaces generation, or custom JSON serializers). + + NB: currently unsupported source types to parse: + + - Geo + - (Simple)AggregateFunction + - Nested + - Old/new experimental JSON + - Dynamic + - Variant + # 1.6.0 (Common, Node.js, Web) ## New features diff --git a/examples/README.md b/examples/README.md index 530ba4bd..b98afa6a 100644 --- a/examples/README.md +++ b/examples/README.md @@ -57,6 +57,7 @@ If something is missing, or you found a mistake in one of these examples, please - [select_streaming_json_each_row.ts](node/select_streaming_json_each_row.ts) - (Node.js only) streaming JSON\* formats from ClickHouse and processing it with `on('data')` event. - [select_streaming_json_each_row_for_await.ts](node/select_streaming_json_each_row_for_await.ts) - (Node.js only) similar to [select_streaming_json_each_row.ts](node/select_streaming_json_each_row.ts), but using the `for await` loop syntax. - [select_streaming_text_line_by_line.ts](node/select_streaming_text_line_by_line.ts) - (Node.js only) streaming text formats from ClickHouse and processing it line by line. In this example, CSV format is used. +- [select_json_each_row_with_progress.ts](node/select_json_each_row_with_progress.ts) - streaming using `JSONEachRowWithProgress` format, checking for the progress rows in the stream. #### Data types diff --git a/examples/node/select_json_each_row_with_progress.ts b/examples/node/select_json_each_row_with_progress.ts new file mode 100644 index 00000000..90664368 --- /dev/null +++ b/examples/node/select_json_each_row_with_progress.ts @@ -0,0 +1,39 @@ +import { createClient } from '@clickhouse/client' +import { isProgressRow } from '@clickhouse/client-common' + +/** See the format spec - https://clickhouse.com/docs/en/interfaces/formats#jsoneachrowwithprogress + * When JSONEachRowWithProgress format is used in TypeScript, + * the ResultSet should infer the final row type as `{ row: Data } | ProgressRow`. */ +type Data = { number: string } + +void (async () => { + const client = createClient() + const rs = await client.query({ + query: 'SELECT number FROM system.numbers LIMIT 100', + format: 'JSONEachRowWithProgress', + }) + + let totalRows = 0 + let totalProgressRows = 0 + + const stream = rs.stream() + for await (const rows of stream) { + for (const row of rows) { + const decodedRow = row.json() + if (isProgressRow(decodedRow)) { + console.log('Got a progress row:', decodedRow) + totalProgressRows++ + } else { + totalRows++ + if (totalRows % 100 === 0) { + console.log('Sample row:', decodedRow) + } + } + } + } + + console.log('Total rows:', totalRows) + console.log('Total progress rows:', totalProgressRows) + + await client.close() +})() diff --git a/package.json b/package.json index fac46808..4ad2d8c3 100644 --- a/package.json +++ b/package.json @@ -43,25 +43,25 @@ "prepare": "husky" }, "devDependencies": { - "@faker-js/faker": "^8.4.1", + "@faker-js/faker": "^9.0.2", "@istanbuljs/nyc-config-typescript": "^1.0.2", "@types/jasmine": "^5.1.4", - "@types/node": "^20.11.30", + "@types/node": "^22.7.0", "@types/sinon": "^17.0.3", "@types/split2": "^4.2.3", - "@types/uuid": "^9.0.8", - "@typescript-eslint/eslint-plugin": "^7.3.1", - "@typescript-eslint/parser": "^7.3.1", - "apache-arrow": "^15.0.2", - "eslint": "^8.57.0", + "@types/uuid": "^10.0.0", + "@typescript-eslint/eslint-plugin": "^8.7.0", + "@typescript-eslint/parser": "^8.7.0", + "apache-arrow": "^18.0.0", + "eslint": "^8.57.1", "eslint-config-prettier": "^9.1.0", - "eslint-plugin-expect-type": "^0.3.0", - "eslint-plugin-prettier": "^5.1.3", - "husky": "^9.0.11", - "jasmine": "^5.1.0", - "jasmine-core": "^5.1.2", + "eslint-plugin-expect-type": "^0.4.3", + "eslint-plugin-prettier": "^5.2.1", + "husky": "^9.1.6", + "jasmine": "^5.3.0", + "jasmine-core": "^5.3.0", "jasmine-expect": "^5.0.0", - "karma": "^6.4.3", + "karma": "^6.4.4", "karma-chrome-launcher": "^3.2.0", "karma-firefox-launcher": "^2.1.3", "karma-jasmine": "^5.1.0", @@ -69,24 +69,24 @@ "karma-sourcemap-loader": "^0.4.0", "karma-typescript": "^5.5.4", "karma-webpack": "^5.0.1", - "lint-staged": "^15.2.2", - "nyc": "^15.1.0", - "parquet-wasm": "0.6.0-beta.2", - "prettier": "3.2.5", - "sinon": "^17.0.1", + "lint-staged": "^15.2.10", + "nyc": "^17.1.0", + "parquet-wasm": "0.6.1", + "prettier": "3.3.3", + "sinon": "^19.0.2", "source-map-support": "^0.5.21", "split2": "^4.2.0", "terser-webpack-plugin": "^5.3.10", - "ts-jest": "^29.1.2", + "ts-jest": "^29.2.5", "ts-loader": "^9.5.1", "ts-node": "^10.9.2", "tsconfig-paths": "^4.2.0", "tsconfig-paths-webpack-plugin": "^4.1.0", - "typescript": "^5.4.3", - "uuid": "^9.0.1", - "webpack": "^5.91.0", + "typescript": "^5.6.2", + "uuid": "^11.0.1", + "webpack": "^5.95.0", "webpack-cli": "^5.1.4", - "webpack-merge": "^5.10.0" + "webpack-merge": "^6.0.1" }, "workspaces": [ "./packages/*" diff --git a/packages/client-common/__tests__/integration/abort_request.test.ts b/packages/client-common/__tests__/integration/abort_request.test.ts index 92b1543a..efefaf5f 100644 --- a/packages/client-common/__tests__/integration/abort_request.test.ts +++ b/packages/client-common/__tests__/integration/abort_request.test.ts @@ -7,7 +7,6 @@ describe('abort request', () => { beforeEach(() => { client = createTestClient() }) - afterEach(async () => { await client.close() }) diff --git a/packages/client-common/__tests__/integration/error_parsing.test.ts b/packages/client-common/__tests__/integration/error_parsing.test.ts index 7cb3dc13..542e77ab 100644 --- a/packages/client-common/__tests__/integration/error_parsing.test.ts +++ b/packages/client-common/__tests__/integration/error_parsing.test.ts @@ -14,9 +14,10 @@ describe('ClickHouse server errors parsing', () => { // Possible error messages here: // (since 24.3+, Cloud SMT): Unknown expression identifier 'number' in scope SELECT number AS FR // (since 23.8+, Cloud RMT): Missing columns: 'number' while processing query: 'SELECT number AS FR', required columns: 'number' + // (since 24.9+): Unknown expression identifier `number` in scope SELECT number AS FR const errorMessagePattern = `((?:Missing columns: 'number' while processing query: 'SELECT number AS FR', required columns: 'number')|` + - `(?:Unknown expression identifier 'number' in scope SELECT number AS FR))` + `(?:Unknown expression identifier ('|\`)number('|\`) in scope SELECT number AS FR))` await expectAsync( client.query({ query: 'SELECT number FR', @@ -37,7 +38,7 @@ describe('ClickHouse server errors parsing', () => { const dbName = getTestDatabaseName() const errorMessagePattern = `((?:^Table ${dbName}.unknown_table does not exist.*)|` + - `(?:Unknown table expression identifier 'unknown_table' in scope))` + `(?:Unknown table expression identifier ('|\`)unknown_table('|\`) in scope))` await expectAsync( client.query({ query: 'SELECT * FROM unknown_table', diff --git a/packages/client-common/__tests__/integration/read_only_user.test.ts b/packages/client-common/__tests__/integration/read_only_user.test.ts index 260fed26..73fd9b06 100644 --- a/packages/client-common/__tests__/integration/read_only_user.test.ts +++ b/packages/client-common/__tests__/integration/read_only_user.test.ts @@ -1,17 +1,21 @@ import type { ClickHouseClient } from '@clickhouse/client-common' +import { isCloudTestEnv } from '@test/utils/test_env' import { createReadOnlyUser } from '../fixtures/read_only_user' import { createSimpleTable } from '../fixtures/simple_table' import { createTestClient, getTestDatabaseName, guid } from '../utils' describe('read only user', () => { + let defaultClient: ClickHouseClient let client: ClickHouseClient let tableName: string + let userName: string beforeAll(async () => { const database = getTestDatabaseName() - const defaultClient = createTestClient() + defaultClient = createTestClient() - const { username, password } = await createReadOnlyUser(defaultClient) + const credentials = await createReadOnlyUser(defaultClient) + userName = credentials.username // Populate some test table to select from tableName = `read_only_user_data_${guid()}` @@ -20,13 +24,12 @@ describe('read only user', () => { table: tableName, values: [[42, 'hello', [0, 1]]], }) - await defaultClient.close() // Create a client that connects read only user to the test database client = createTestClient({ - username, - password, database, + username: credentials.username, + password: credentials.password, clickhouse_settings: { // readonly user cannot adjust settings. reset the default ones set by fixtures. // might be fixed by https://github.com/ClickHouse/ClickHouse/issues/40244 @@ -37,7 +40,13 @@ describe('read only user', () => { }) afterAll(async () => { + if (isCloudTestEnv()) { + await defaultClient.command({ + query: `DROP USER IF EXISTS ${userName}`, + }) + } await client.close() + await defaultClient.close() }) it('should select some data without issues', async () => { diff --git a/packages/client-common/__tests__/unit/clickhouse_types.test.ts b/packages/client-common/__tests__/unit/clickhouse_types.test.ts new file mode 100644 index 00000000..96b43fff --- /dev/null +++ b/packages/client-common/__tests__/unit/clickhouse_types.test.ts @@ -0,0 +1,29 @@ +import { isProgressRow } from '@clickhouse/client-common' + +describe('ClickHouse types', () => { + it('should check if a row is progress row', async () => { + const row = { + progress: { + read_rows: '1', + read_bytes: '1', + written_rows: '1', + written_bytes: '1', + total_rows_to_read: '1', + result_rows: '1', + result_bytes: '1', + elapsed_ns: '1', + }, + } + expect(isProgressRow(row)).toBeTruthy() + expect(isProgressRow({})).toBeFalsy() + expect( + isProgressRow({ + ...row, + extra: 'extra', + }), + ).toBeFalsy() + expect(isProgressRow(null)).toBeFalsy() + expect(isProgressRow(42)).toBeFalsy() + expect(isProgressRow({ foo: 'bar' })).toBeFalsy() + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types.test.ts b/packages/client-common/__tests__/unit/parse_column_types.test.ts new file mode 100644 index 00000000..2a68eb66 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types.test.ts @@ -0,0 +1,56 @@ +import { parseFixedStringType } from '../../src/parse' + +describe('Columns types parser', () => { + describe('FixedString', () => { + it('should parse FixedString', async () => { + const args: [string, number][] = [ + ['FixedString(1)', 1], + ['FixedString(42)', 42], + ['FixedString(100)', 100], + ['FixedString(32768)', 32768], + ] + args.forEach(([columnType, sizeBytes]) => { + const result = parseFixedStringType({ + columnType, + sourceType: columnType, + }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as a FixedString with size ${sizeBytes}`, + ) + .toEqual({ type: 'FixedString', sizeBytes, sourceType: columnType }) + }) + }) + + it('should throw on invalid FixedString type', async () => { + const args: [string][] = [ + ['FixedString'], + ['FixedString('], + ['FixedString()'], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => + parseFixedStringType({ columnType, sourceType: columnType }), + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid FixedString type') + }) + }) + + it('should throw on invalid FixedString size', async () => { + const args: [string][] = [ + ['FixedString(0)'], + ['FixedString(x)'], + [`FixedString(')`], + ] + args.forEach(([columnType]) => { + expect(() => + parseFixedStringType({ columnType, sourceType: columnType }), + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid FixedString size in bytes') + }) + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_array.test.ts b/packages/client-common/__tests__/unit/parse_column_types_array.test.ts new file mode 100644 index 00000000..acb7d766 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_array.test.ts @@ -0,0 +1,308 @@ +import type { + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnEnum, + SimpleColumnType, +} from '../../src/parse' +import { parseArrayType } from '../../src/parse' + +describe('Columns types parser - Array', () => { + it('should parse Array with a simple value type', async () => { + type TestArgs = { + columnType: string + valueType: SimpleColumnType + dimensions: number + } + const args: TestArgs[] = [ + { + columnType: 'Array(String)', + valueType: 'String', + dimensions: 1, + }, + { + columnType: 'Array(UInt8)', + valueType: 'UInt8', + dimensions: 1, + }, + { + columnType: 'Array(Array(Int32))', + valueType: 'Int32', + dimensions: 2, + }, + { + columnType: 'Array(Array(Array(Date32)))', + valueType: 'Date32', + dimensions: 3, + }, + { + columnType: 'Array(Array(Array(Array(Float32))))', + valueType: 'Float32', + dimensions: 4, + }, + ] + args.forEach((args: TestArgs) => { + const { columnType, valueType, dimensions } = args + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + value: { + type: 'Simple', + columnType: valueType, + sourceType: valueType, // T + }, + sourceType: columnType, // Array(T) + dimensions, + }) + }) + }) + + it('should parse Array with Nullable', async () => { + type TestArgs = { + columnType: string + valueType: SimpleColumnType + dimensions: number + } + const args: TestArgs[] = [ + { + columnType: 'Array(Nullable(String))', + valueType: 'String', + dimensions: 1, + }, + { + columnType: 'Array(Array(Nullable(Int32)))', + valueType: 'Int32', + dimensions: 2, + }, + ] + args.forEach(({ columnType, valueType, dimensions }: TestArgs) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + value: { + type: 'Nullable', + value: { + type: 'Simple', + columnType: valueType, + sourceType: valueType, // T + }, + sourceType: `Nullable(${valueType})`, // Nullable(T) + }, + sourceType: columnType, // Array(Nullable(T)) + dimensions, + }) + }) + }) + + it('should parse Array with Enum value type', async () => { + type TestArgs = { + value: ParsedColumnEnum + dimensions: number + columnType: string + } + const sourceEnum8 = `Enum8('foo' = 42)` + const valuesEnum8 = { 42: 'foo' } + const sourceEnum16 = `Enum16('bar' = 144, 'qaz' = 500)` + const valuesEnum16 = { + 144: 'bar', + 500: 'qaz', + } + const args: TestArgs[] = [ + { + value: { + type: 'Enum', + intSize: 8, + values: valuesEnum8, + sourceType: sourceEnum8, + }, + dimensions: 1, + columnType: `Array(${sourceEnum8})`, + }, + { + value: { + type: 'Enum', + intSize: 16, + values: valuesEnum16, + sourceType: sourceEnum16, + }, + dimensions: 1, + columnType: `Array(${sourceEnum16})`, + }, + { + value: { + type: 'Enum', + intSize: 8, + values: valuesEnum8, + sourceType: sourceEnum8, + }, + dimensions: 2, + columnType: `Array(Array(${sourceEnum8}))`, + }, + { + value: { + type: 'Enum', + intSize: 16, + values: valuesEnum16, + sourceType: sourceEnum16, + }, + dimensions: 3, + columnType: `Array(Array(Array(${sourceEnum16})))`, + }, + ] + args.forEach(({ columnType, dimensions, value }) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + sourceType: columnType, + dimensions, + value, + }) + }) + }) + + it('should parse Array of DateTime', async () => { + type TestArgs = { + value: ParsedColumnDateTime + dimensions: number + columnType: string + } + const args: TestArgs[] = [ + { + value: { + type: 'DateTime', + timezone: null, + sourceType: 'DateTime', + }, + dimensions: 1, + columnType: 'Array(DateTime)', + }, + { + value: { + type: 'DateTime', + timezone: 'UTC', + sourceType: `DateTime('UTC')`, + }, + dimensions: 1, + columnType: `Array(DateTime('UTC'))`, + }, + { + value: { + type: 'DateTime', + timezone: 'Etc/GMT-5', + sourceType: `DateTime('Etc/GMT-5')`, + }, + dimensions: 2, + columnType: `Array(Array(DateTime('Etc/GMT-5')))`, + }, + ] + args.forEach(({ columnType, dimensions, value }) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + sourceType: columnType, + dimensions, + value, + }) + }) + }) + + it('should parse Array of DateTime64', async () => { + type TestArgs = { + value: ParsedColumnDateTime64 + dimensions: number + columnType: string + } + const args: TestArgs[] = [ + { + value: { + type: 'DateTime64', + timezone: null, + sourceType: 'DateTime64(0)', + precision: 0, + }, + dimensions: 1, + columnType: 'Array(DateTime64(0))', + }, + { + value: { + type: 'DateTime64', + timezone: 'UTC', + sourceType: `DateTime64(3, 'UTC')`, + precision: 3, + }, + dimensions: 1, + columnType: `Array(DateTime64(3, 'UTC'))`, + }, + { + value: { + type: 'DateTime64', + timezone: 'Etc/GMT-5', + sourceType: `DateTime64(6, 'Etc/GMT-5')`, + precision: 6, + }, + dimensions: 2, + columnType: `Array(Array(DateTime64(6, 'Etc/GMT-5')))`, + }, + { + value: { + type: 'DateTime64', + timezone: 'Europe/Sofia', + sourceType: `DateTime64(9, 'Europe/Sofia')`, + precision: 9, + }, + dimensions: 3, + columnType: `Array(Array(Array(DateTime64(9, 'Europe/Sofia'))))`, + }, + ] + + args.forEach(({ columnType, dimensions, value }) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + sourceType: columnType, + dimensions, + value, + }) + }) + }) + + // TODO: Map type test. + + it('should throw on invalid Array type', async () => { + // Array(Int8) is the shortest valid definition + const args = [ + ['Array'], + ['Array('], + ['Array()'], + ['Array(a'], + ['Array(ab'], + ['Array(ab)'], + ['Array(abc)'], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => parseArrayType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Array type') + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_datetime.test.ts b/packages/client-common/__tests__/unit/parse_column_types_datetime.test.ts new file mode 100644 index 00000000..6fc0c00e --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_datetime.test.ts @@ -0,0 +1,113 @@ +import { parseDateTime64Type, parseDateTimeType } from '../../src/parse' + +describe('Columns types parser - DateTime and DateTime64', () => { + describe('DateTime', () => { + it('should parse DateTime', async () => { + const args: [string, string | null][] = [ + ['DateTime', null], + [`DateTime('GB')`, 'GB'], + [`DateTime('UTC')`, 'UTC'], + [`DateTime('Europe/Amsterdam')`, 'Europe/Amsterdam'], + ] + args.forEach(([columnType, timezone]) => { + const result = parseDateTimeType({ columnType, sourceType: columnType }) + expect(result) + .withContext(`Expected ${columnType} to be parsed as a DateTime`) + .toEqual({ type: 'DateTime', sourceType: columnType, timezone }) + }) + }) + + it('should throw on invalid DateTime', async () => { + // DateTime('GB') has the least amount of chars allowed for a valid DateTime type. + const args: [string][] = [ + ['DateTime()'], + [`DateTime(')`], + [`DateTime('')`], + [`DateTime('A')`], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => parseDateTimeType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid DateTime type') + }) + }) + }) + + describe('DateTime64', () => { + const precisionRange = [...Array(10).keys()] // 0..9 + + it('should parse DateTime64 without timezone', async () => { + const args: [string, number][] = precisionRange.map((precision) => [ + `DateTime64(${precision})`, + precision, + ]) + args.forEach(([columnType, precision]) => { + const result = parseDateTime64Type({ + columnType, + sourceType: columnType, + }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision}`, + ) + .toEqual({ + type: 'DateTime64', + timezone: null, + sourceType: columnType, + precision, + }) + }) + }) + + it('should parse DateTime64 with timezone', async () => { + const allPrecisionArgs: [string, number, string][][] = precisionRange.map( + (precision) => [ + [`DateTime64(${precision}, 'GB')`, precision, 'GB'], + [`DateTime64(${precision}, 'UTC')`, precision, 'UTC'], + [`DateTime64(${precision}, 'Etc/GMT-5')`, precision, 'Etc/GMT-5'], + ], + ) + allPrecisionArgs.forEach((args) => + args.forEach(([columnType, precision, timezone]) => { + const result = parseDateTime64Type({ + columnType, + sourceType: columnType, + }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision} and timezone ${timezone}`, + ) + .toEqual({ + type: 'DateTime64', + sourceType: columnType, + timezone, + precision, + }) + }), + ) + }) + + it('should throw on invalid DateTime64 type', async () => { + const args = [['DateTime64('], ['DateTime64()'], ['String']] + args.forEach(([columnType]) => { + expect(() => + parseDateTime64Type({ columnType, sourceType: columnType }), + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid DateTime64 type') + }) + }) + + it('should throw on invalid DateTime64 precision', async () => { + const args = [[`DateTime64(')`], [`DateTime64(foo)`]] + args.forEach(([columnType]) => { + expect(() => + parseDateTime64Type({ columnType, sourceType: columnType }), + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid DateTime64 precision') + }) + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_decimal.test.ts b/packages/client-common/__tests__/unit/parse_column_types_decimal.test.ts new file mode 100644 index 00000000..f0010db8 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_decimal.test.ts @@ -0,0 +1,103 @@ +import { parseDecimalType } from '../../src/parse' + +describe('Columns types parser - Decimal', () => { + type TestArgs = { + sourceType: string + precision: number + scale: number + intSize: 32 | 64 | 128 | 256 + } + + it('should parse Decimal', async () => { + const args: TestArgs[] = [ + { + sourceType: 'Decimal(7, 2)', + precision: 7, + scale: 2, + intSize: 32, + }, + { + sourceType: 'Decimal(12, 4)', + precision: 12, + scale: 4, + intSize: 64, + }, + { + sourceType: 'Decimal(27, 6)', + precision: 27, + scale: 6, + intSize: 128, + }, + { + sourceType: 'Decimal(42, 8)', + precision: 42, + scale: 8, + intSize: 256, + }, + ] + args.forEach(({ sourceType, precision, scale, intSize }) => { + const result = parseDecimalType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to be parsed as a Decimal with precision ${precision}, scale ${scale} and intSize ${intSize}`, + ) + .toEqual({ + type: 'Decimal', + params: { precision, scale, intSize }, + sourceType, + }) + }) + }) + + it('should throw on invalid Decimal type', async () => { + const args: [string][] = [ + ['Decimal'], + ['Decimal('], + ['Decimal()'], + ['Decimal(1)'], + ['Decimal(1,)'], + ['Decimal(1, )'], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => parseDecimalType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Decimal type') + }) + }) + + it('should throw on invalid Decimal precision', async () => { + const args: [string][] = [ + ['Decimal(0, 0)'], + ['Decimal(x, 0)'], + [`Decimal(', ')`], + [`Decimal(77, 1)`], // max is 76 + ] + args.forEach(([columnType]) => { + expect(() => parseDecimalType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Decimal precision') + }) + }) + + it('should throw on invalid Decimal scale', async () => { + const args: [string][] = [ + ['Decimal(1, 2)'], // scale should be less than precision + ['Decimal(1, x)'], + [`Decimal(42, ,)`], + [`Decimal(42, ')`], + ] + args.forEach(([columnType]) => { + expect(() => parseDecimalType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Decimal scale') + }) + }) + + it('should throw when precision or scale cannot be parsed', async () => { + const columnType = 'Decimal(foobar)' + expect(() => + parseDecimalType({ columnType, sourceType: columnType }), + ).toThrowError('Expected Decimal type to have both precision and scale') + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_enum.test.ts b/packages/client-common/__tests__/unit/parse_column_types_enum.test.ts new file mode 100644 index 00000000..a3ac56c3 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_enum.test.ts @@ -0,0 +1,89 @@ +import { enumTypes, parsedEnumTestArgs } from '@test/utils/native_columns' +import { parseEnumType } from '../../src/parse' + +describe('Columns types parser - Enum', () => { + it('should parse correct values', async () => { + parsedEnumTestArgs.forEach((expected) => { + const result = parseEnumType({ + sourceType: expected.sourceType, + columnType: expected.sourceType, + }) + expect(result) + .withContext( + `Expected ${ + expected.sourceType + } to be parsed as an Enum with intSize ${ + expected.intSize + } and values ${JSON.stringify(expected.values)}`, + ) + .toEqual(expected) + }) + }) + + it('should throw when the type is not a valid enum', async () => { + const args: [string][] = [ + ['Enum'], // should be either 8 or 16 + ['Enum32'], + ['Enum64'], + ['String'], + ['Enum(String)'], + ] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Expected Enum to be either Enum8 or Enum16') + }) + }) + + it('should throw when the values are not valid', async () => { + const args: [string][] = [["Enum8('a' = x)"], ["Enum16('foo' = 'bar')"]] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Expected Enum index to be a valid number') + }) + }) + + it('should throw on duplicate indices', async () => { + const args: [string][] = [ + ["Enum8('a' = 0, 'b' = 0)"], + ["Enum8('a' = 0, 'b' = 1, 'c' = 1)"], + ] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Duplicate Enum index') + }) + }) + + it('should throw on duplicate names', async () => { + const args: [string][] = [ + ["Enum8('a' = 0, 'a' = 1)"], + ["Enum8('a' = 0, 'b' = 1, 'b' = 2)"], + ] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Duplicate Enum name') + }) + }) + + it('should throw when Enum has no values to parse', async () => { + // The minimal allowed Enum definition is Enum8('' = 0), i.e. 6 chars inside. + const allEnumTypeArgs: string[][] = enumTypes.map(([enumType]) => [ + `${enumType}()`, + `${enumType}(')`, + `${enumType}('')`, + `${enumType}('' )`, + `${enumType}('' =)`, + `${enumType}('' = )`, + ]) + allEnumTypeArgs.forEach((args) => + args.forEach((columnType) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Enum type values') + }), + ) + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_map.test.ts b/packages/client-common/__tests__/unit/parse_column_types_map.test.ts new file mode 100644 index 00000000..57b4b303 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_map.test.ts @@ -0,0 +1,41 @@ +import type { ParsedColumnMap } from '../../src/parse' +import { parseMapType } from '../../src/parse' + +describe('Columns types parser - Map', () => { + it('should parse Map with simple types', async () => { + const args: [ParsedColumnMap, string][] = [ + [ + { + type: 'Map', + key: { type: 'Simple', columnType: 'String', sourceType: 'String' }, + value: { type: 'Simple', columnType: 'UInt8', sourceType: 'UInt8' }, + sourceType: 'Map(String, UInt8)', + }, + 'Map(String, UInt8)', + ], + [ + { + type: 'Map', + key: { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + value: { + type: 'Simple', + columnType: 'Float32', + sourceType: 'Float32', + }, + sourceType: 'Map(Int32, Float32)', + }, + 'Map(Int32, Float32)', + ], + ] + args.forEach(([expected, sourceType]) => { + const result = parseMapType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to be parsed as a Map with key type ${expected.key.sourceType} and value type ${expected.value.sourceType}`, + ) + .toEqual(expected) + }) + }) + + // TODO: rest of the allowed types. +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_nullable.test.ts b/packages/client-common/__tests__/unit/parse_column_types_nullable.test.ts new file mode 100644 index 00000000..daa9ca1d --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_nullable.test.ts @@ -0,0 +1,266 @@ +import type { + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnDecimal, + ParsedColumnEnum, + ParsedColumnSimple, +} from '../../src/parse' +import { asNullableType } from '../../src/parse' + +describe('Columns types parser - Nullable', () => { + it('should wrap a simple type', async () => { + const args: [ParsedColumnSimple, string][] = [ + [ + { type: 'Simple', columnType: 'String', sourceType: 'String' }, + 'Nullable(String)', + ], + [ + { type: 'Simple', columnType: 'UInt8', sourceType: 'UInt8' }, + 'Nullable(UInt8)', + ], + [ + { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + 'Nullable(Int32)', + ], + [ + { type: 'Simple', columnType: 'Float32', sourceType: 'Float32' }, + 'Nullable(Float32)', + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.columnType} to be wrapped as ${sourceType}`, + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap an Enum', async () => { + const sourceEnum8 = `Enum8('foo' = 42)` + const valuesEnum8 = { 42: 'foo' } + const sourceEnum16 = `Enum16('bar' = 144, 'qaz' = 500)` + const valuesEnum16 = { + 144: 'bar', + 500: 'qaz', + } + const args: [ParsedColumnEnum, string][] = [ + [ + { + type: 'Enum', + intSize: 8, + values: valuesEnum8, + sourceType: sourceEnum8, + }, + 'Nullable(Enum8)', + ], + [ + { + type: 'Enum', + intSize: 16, + values: valuesEnum16, + sourceType: sourceEnum16, + }, + 'Nullable(Enum16)', + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext(`Expected ${value.type} to be wrapped as ${sourceType}`) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap a Decimal', async () => { + const args: [ParsedColumnDecimal, string][] = [ + [ + { + type: 'Decimal', + params: { intSize: 32, precision: 4, scale: 3 }, + sourceType: 'Decimal(4, 3)', + }, + 'Nullable(Decimal(4, 3))', + ], + [ + { + type: 'Decimal', + params: { intSize: 64, precision: 12, scale: 6 }, + sourceType: 'Decimal(12, 6)', + }, + 'Nullable(Decimal(12, 6))', + ], + [ + { + type: 'Decimal', + params: { intSize: 128, precision: 24, scale: 12 }, + sourceType: 'Decimal(24, 12)', + }, + 'Nullable(Decimal(24, 12))', + ], + [ + { + type: 'Decimal', + params: { intSize: 256, precision: 42, scale: 20 }, + sourceType: 'Decimal(42, 20)', + }, + 'Nullable(Decimal(42, 20))', + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.sourceType} to be wrapped as ${sourceType}`, + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap a DateTime', async () => { + const args: [ParsedColumnDateTime, string][] = [ + [ + { type: 'DateTime', timezone: null, sourceType: 'DateTime' }, + 'Nullable(DateTime)', + ], + [ + { type: 'DateTime', timezone: 'UTC', sourceType: "DateTime('UTC')" }, + `Nullable(DateTime('UTC'))`, + ], + [ + { type: 'DateTime', timezone: 'GB', sourceType: "DateTime('GB')" }, + `Nullable(DateTime('GB'))`, + ], + [ + { + type: 'DateTime', + timezone: 'Etc/GMT-5', + sourceType: `DateTime('Etc/GMT-5')`, + }, + `Nullable(DateTime('Etc/GMT-5'))`, + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.sourceType} to be wrapped as ${sourceType}`, + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap a DateTime64', async () => { + const args: [ParsedColumnDateTime64, string][] = [ + [ + { + type: 'DateTime64', + timezone: null, + sourceType: 'DateTime64(0)', + precision: 3, + }, + 'Nullable(DateTime64(0))', + ], + [ + { + type: 'DateTime64', + timezone: null, + sourceType: 'DateTime64(3)', + precision: 3, + }, + 'Nullable(DateTime64(3))', + ], + [ + { + type: 'DateTime64', + timezone: 'UTC', + sourceType: `DateTime64(3, 'UTC')`, + precision: 3, + }, + `Nullable(DateTime64(3, 'UTC'))`, + ], + [ + { + type: 'DateTime64', + timezone: 'GB', + sourceType: `DateTime64(6, 'GB')`, + precision: 6, + }, + `Nullable(DateTime64(6, 'GB'))`, + ], + [ + { + type: 'DateTime64', + timezone: 'Etc/GMT-5', + sourceType: `DateTime64(9, 'Etc/GMT-5')`, + precision: 9, + }, + `Nullable(DateTime64(9, 'Etc/GMT-5'))`, + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.sourceType} to be wrapped as ${sourceType}`, + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should throw in case of Array or Map', async () => { + const columnUInt8: ParsedColumnSimple = { + type: 'Simple', + columnType: 'UInt8', + sourceType: 'UInt8', + } + const columnString: ParsedColumnSimple = { + type: 'Simple', + columnType: 'String', + sourceType: 'String', + } + expect(() => + asNullableType( + { + type: 'Map', + key: columnUInt8, + value: columnString, + sourceType: 'Map(UInt8, String)', + }, + '...', + ), + ).toThrowError('Map cannot be Nullable') + expect(() => + asNullableType( + { + type: 'Array', + value: columnUInt8, + dimensions: 1, + sourceType: 'Array(UInt8)', + }, + '...', + ), + ).toThrowError('Array cannot be Nullable') + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_tuple.test.ts b/packages/client-common/__tests__/unit/parse_column_types_tuple.test.ts new file mode 100644 index 00000000..2f100f8a --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_tuple.test.ts @@ -0,0 +1,164 @@ +import { parsedEnumTestArgs } from '@test/utils/native_columns' +import type { + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnFixedString, + ParsedColumnSimple, + ParsedColumnTuple, +} from '../../src/parse' +import { parseTupleType } from '../../src/parse' + +describe('Columns types parser - Tuple', () => { + it('should parse Tuple with simple types', async () => { + const args: TestArgs[] = [ + { + sourceType: 'Tuple(String, UInt8)', + expected: { + type: 'Tuple', + elements: [ + { type: 'Simple', columnType: 'String', sourceType: 'String' }, + { type: 'Simple', columnType: 'UInt8', sourceType: 'UInt8' }, + ], + sourceType: 'Tuple(String, UInt8)', + }, + }, + { + sourceType: 'Tuple(Int32, Float32)', + expected: { + type: 'Tuple', + elements: [ + { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + { type: 'Simple', columnType: 'Float32', sourceType: 'Float32' }, + ], + sourceType: 'Tuple(Int32, Float32)', + }, + }, + ] + args.forEach(({ expected, sourceType }) => { + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to have ${joinElements(expected)} elements`, + ) + .toEqual(expected) + }) + }) + + it('should parse Tuple with Decimals', async () => { + const args: TestArgs[] = [ + { + sourceType: 'Tuple(Decimal(7, 2), Decimal(18, 4))', + expected: { + type: 'Tuple', + elements: [ + { + type: 'Decimal', + sourceType: 'Decimal(7, 2)', + params: { precision: 7, scale: 2, intSize: 32 }, + }, + { + type: 'Decimal', + sourceType: 'Decimal(18, 4)', + params: { precision: 18, scale: 4, intSize: 64 }, + }, + ], + sourceType: 'Tuple(Decimal(7, 2), Decimal(18, 4))', + }, + }, + ] + args.forEach(({ expected, sourceType }) => { + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to have ${joinElements(expected)} elements`, + ) + .toEqual(expected) + }) + }) + + it('should parse Tuple with Enums', async () => { + const args: TestArgs[] = parsedEnumTestArgs.map((enumElement) => { + // e.g. Tuple(String, Enum8('a' = 1)) + const sourceType = `Tuple(${stringElement.sourceType}, ${enumElement.sourceType})` + return { + sourceType, + expected: { + type: 'Tuple', + elements: [stringElement, enumElement], + sourceType, + }, + } + }) + args.forEach(({ expected, sourceType }) => { + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to have ${joinElements(expected)} elements`, + ) + .toEqual(expected) + }) + }) + + it('should parse Tuple with FixedString/DateTime', async () => { + const fixedStringElement: ParsedColumnFixedString = { + type: 'FixedString', + sourceType: 'FixedString(16)', + sizeBytes: 16, + } + const dateTimeElement: ParsedColumnDateTime = { + type: 'DateTime', + timezone: null, + sourceType: 'DateTime', + } + const dateTimeWithTimezoneElement: ParsedColumnDateTime = { + type: 'DateTime', + timezone: 'Europe/Amsterdam', + sourceType: `DateTime('Europe/Amsterdam')`, + } + const dateTime64Element: ParsedColumnDateTime64 = { + type: 'DateTime64', + timezone: null, + precision: 3, + sourceType: 'DateTime64(3)', + } + const dateTime64WithTimezoneElement: ParsedColumnDateTime64 = { + type: 'DateTime64', + timezone: 'Europe/Amsterdam', + precision: 9, + sourceType: `DateTime64(9, 'Europe/Amsterdam')`, + } + const elements = [ + fixedStringElement, + dateTimeElement, + dateTimeWithTimezoneElement, + dateTime64Element, + dateTime64WithTimezoneElement, + ] + const elementsSourceTypes = elements.map((el) => el.sourceType).join(', ') + const sourceType = `Tuple(${elementsSourceTypes})` + const expected: ParsedColumnTuple = { + type: 'Tuple', + elements, + sourceType, + } + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result).toEqual(expected) + }) + + // TODO: Simple types permutations, Nullable, Arrays, Maps, Nested Tuples + + const stringElement: ParsedColumnSimple = { + type: 'Simple', + sourceType: 'String', + columnType: 'String', + } +}) + +function joinElements(expected: ParsedColumnTuple) { + return expected.elements.map((el) => el.sourceType).join(', ') +} + +type TestArgs = { + sourceType: string + expected: ParsedColumnTuple +} diff --git a/packages/client-common/__tests__/utils/client.ts b/packages/client-common/__tests__/utils/client.ts index 758b579e..59f87957 100644 --- a/packages/client-common/__tests__/utils/client.ts +++ b/packages/client-common/__tests__/utils/client.ts @@ -64,6 +64,7 @@ export function createTestClient( clickhouse_settings: clickHouseSettings, } if (process.env.browser) { + // eslint-disable-next-line @typescript-eslint/no-require-imports return require('../../../client-web/src/client').createClient(cloudConfig) } else { // props to https://stackoverflow.com/a/41063795/4575540 @@ -80,6 +81,7 @@ export function createTestClient( clickhouse_settings: clickHouseSettings, } if (process.env.browser) { + // eslint-disable-next-line @typescript-eslint/no-require-imports return require('../../../client-web/src/client').createClient(localConfig) // eslint-disable-line @typescript-eslint/no-var-requires } else { // @ts-expect-error diff --git a/packages/client-common/__tests__/utils/datasets.ts b/packages/client-common/__tests__/utils/datasets.ts new file mode 100644 index 00000000..1966ccb4 --- /dev/null +++ b/packages/client-common/__tests__/utils/datasets.ts @@ -0,0 +1,37 @@ +import type { ClickHouseClient } from '@clickhouse/client-common' +import { fakerRU } from '@faker-js/faker' +import { createTableWithFields } from '@test/fixtures/table_with_fields' + +export async function genLargeStringsDataset( + client: ClickHouseClient, + { + rows, + words, + }: { + rows: number + words: number + }, +): Promise<{ + table: string + values: { id: number; sentence: string; timestamp: string }[] +}> { + const table = await createTableWithFields( + client as ClickHouseClient, + `sentence String, timestamp String`, + ) + const values = [...new Array(rows)].map((_, id) => ({ + id, + // it seems that it is easier to trigger an incorrect behavior with non-ASCII symbols + sentence: fakerRU.lorem.sentence(words), + timestamp: new Date().toISOString(), + })) + await client.insert({ + table, + values, + format: 'JSONEachRow', + }) + return { + table, + values, + } +} diff --git a/packages/client-common/__tests__/utils/native_columns.ts b/packages/client-common/__tests__/utils/native_columns.ts new file mode 100644 index 00000000..61761b2a --- /dev/null +++ b/packages/client-common/__tests__/utils/native_columns.ts @@ -0,0 +1,124 @@ +import type { ParsedColumnEnum } from '../../src/parse' + +export const enumTypes: ['Enum8' | 'Enum16', 8 | 16][] = [ + ['Enum8', 8], + ['Enum16', 16], +] + +export const parsedEnumTestArgs: ParsedColumnEnum[] = enumTypes.flatMap( + ([enumType, intSize]) => [ + { + type: 'Enum', + sourceType: `${enumType}('a' = 1)`, + values: { + 1: 'a', + } as Record, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('a' = 0, 'b' = 2)`, + values: { + 0: 'a', + 2: 'b', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('a' = 1, 'b' = 2, 'c' = 42)`, + values: { + 1: 'a', + 2: 'b', + 42: 'c', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('f\\'' = 1, 'x =' = 2, 'b\\'\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)`, + values: { + 1: "f\\'", + 2: 'x =', + 3: "b\\'\\'\\'", + 42: "\\'c=4=", + 100: '4', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('f\\'()' = 1)`, + values: { + 1: "f\\'()", + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('\\'' = 0)`, + values: { + 0: `\\'`, + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('' = 0)`, + values: { + 0: '', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('' = 42)`, + values: { + 42: '', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('foo' = 1, '' = 42)`, + values: { + 1: 'foo', + 42: '', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('' = 0, 'foo' = 42)`, + values: { + 0: '', + 42: 'foo', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('(' = 1)`, + values: { + 1: '(', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}(')' = 1)`, + values: { + 1: ')', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('()' = 1)`, + values: { + 1: '()', + }, + intSize, + }, + ], +) diff --git a/packages/client-common/src/clickhouse_types.ts b/packages/client-common/src/clickhouse_types.ts index c436270f..846888b9 100644 --- a/packages/client-common/src/clickhouse_types.ts +++ b/packages/client-common/src/clickhouse_types.ts @@ -40,3 +40,19 @@ export interface WithClickHouseSummary { export interface WithResponseHeaders { response_headers: ResponseHeaders } + +/** X-ClickHouse-Summary response header and progress rows from JSONEachRowWithProgress share the same structure */ +export interface ProgressRow { + progress: ClickHouseSummary +} + +/** Type guard to use with JSONEachRowWithProgress, checking if the emitted row is a progress row. + * @see https://clickhouse.com/docs/en/interfaces/formats#jsoneachrowwithprogress */ +export function isProgressRow(row: unknown): row is ProgressRow { + return ( + row !== null && + typeof row === 'object' && + 'progress' in row && + Object.keys(row).length === 1 + ) +} diff --git a/packages/client-common/src/config.ts b/packages/client-common/src/config.ts index 9813cf33..b2bd013a 100644 --- a/packages/client-common/src/config.ts +++ b/packages/client-common/src/config.ts @@ -279,6 +279,7 @@ export function createUrl(configURL: string | URL | undefined): URL { } catch (err) { throw new Error( 'ClickHouse URL is malformed. Expected format: http[s]://[username:password@]hostname:port[/database][?param1=value1¶m2=value2]', + { cause: err }, ) } if (url.protocol !== 'http:' && url.protocol !== 'https:') { diff --git a/packages/client-common/src/data_formatter/formatter.ts b/packages/client-common/src/data_formatter/formatter.ts index f4b92830..c6aa7389 100644 --- a/packages/client-common/src/data_formatter/formatter.ts +++ b/packages/client-common/src/data_formatter/formatter.ts @@ -7,6 +7,7 @@ export const StreamableJSONFormats = [ 'JSONCompactEachRowWithNamesAndTypes', 'JSONCompactStringsEachRowWithNames', 'JSONCompactStringsEachRowWithNamesAndTypes', + 'JSONEachRowWithProgress', ] as const export const RecordsJSONFormats = ['JSONObjectEachRow'] as const export const SingleDocumentJSONFormats = [ diff --git a/packages/client-common/src/index.ts b/packages/client-common/src/index.ts index 881b11e7..3bbc2621 100644 --- a/packages/client-common/src/index.ts +++ b/packages/client-common/src/index.ts @@ -16,6 +16,7 @@ export { export { type BaseClickHouseClientConfigOptions } from './config' export type { Row, + RowOrProgress, BaseResultSet, ResultJSONType, RowJSONType, @@ -51,12 +52,29 @@ export type { ResponseHeaders, WithClickHouseSummary, WithResponseHeaders, + ProgressRow, } from './clickhouse_types' +export { isProgressRow } from './clickhouse_types' export { type ClickHouseSettings, type MergeTreeSettings, SettingsMap, } from './settings' +export type { + SimpleColumnType, + ParsedColumnSimple, + ParsedColumnEnum, + ParsedColumnFixedString, + ParsedColumnNullable, + ParsedColumnDecimal, + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnArray, + ParsedColumnTuple, + ParsedColumnMap, + ParsedColumnType, +} from './parse' +export { SimpleColumnTypes, parseColumnType } from './parse' /** For implementations usage only - should not be re-exported */ export { diff --git a/packages/client-common/src/parse/column_types.ts b/packages/client-common/src/parse/column_types.ts new file mode 100644 index 00000000..ee29475d --- /dev/null +++ b/packages/client-common/src/parse/column_types.ts @@ -0,0 +1,723 @@ +export class ColumnTypeParseError extends Error { + readonly args: Record + constructor(message: string, args?: Record) { + super(message) + this.args = args ?? {} + + // Set the prototype explicitly, see: + // https://github.com/Microsoft/TypeScript/wiki/Breaking-Changes#extending-built-ins-like-error-array-and-map-may-no-longer-work + Object.setPrototypeOf(this, ColumnTypeParseError.prototype) + } +} + +export const SimpleColumnTypes = [ + 'Bool', + 'UInt8', + 'Int8', + 'UInt16', + 'Int16', + 'UInt32', + 'Int32', + 'UInt64', + 'Int64', + 'UInt128', + 'Int128', + 'UInt256', + 'Int256', + 'Float32', + 'Float64', + 'String', + 'UUID', + 'Date', + 'Date32', + 'IPv4', + 'IPv6', +] as const +export type SimpleColumnType = (typeof SimpleColumnTypes)[number] + +export interface ParsedColumnSimple { + type: 'Simple' + /** Without LowCardinality and Nullable. For example: + * * UInt8 -> UInt8 + * * LowCardinality(Nullable(String)) -> String */ + columnType: SimpleColumnType + /** The original type before parsing. */ + sourceType: string +} + +export interface ParsedColumnFixedString { + type: 'FixedString' + sizeBytes: number + sourceType: string +} + +export interface ParsedColumnDateTime { + type: 'DateTime' + timezone: string | null + sourceType: string +} + +export interface ParsedColumnDateTime64 { + type: 'DateTime64' + timezone: string | null + /** Valid range: [0 : 9] */ + precision: number + sourceType: string +} + +export interface ParsedColumnEnum { + type: 'Enum' + /** Index to name */ + values: Record + /** UInt8 or UInt16 */ + intSize: 8 | 16 + sourceType: string +} + +/** Int size for Decimal depends on the Precision + * * 32 bits for precision < 10 + * * 64 bits for precision < 19 + * * 128 bits for precision < 39 + * * 256 bits for precision >= 39 + */ +export interface DecimalParams { + precision: number + scale: number + intSize: 32 | 64 | 128 | 256 +} +export interface ParsedColumnDecimal { + type: 'Decimal' + params: DecimalParams + sourceType: string +} + +/** Tuple, Array or Map itself cannot be Nullable */ +export interface ParsedColumnNullable { + type: 'Nullable' + value: + | ParsedColumnSimple + | ParsedColumnEnum + | ParsedColumnDecimal + | ParsedColumnFixedString + | ParsedColumnDateTime + | ParsedColumnDateTime64 + sourceType: string +} + +/** Array cannot be Nullable or LowCardinality, but its value type can be. + * Arrays can be multidimensional, e.g. Array(Array(Array(T))). + * Arrays are allowed to have a Map as the value type. + */ +export interface ParsedColumnArray { + type: 'Array' + value: + | ParsedColumnNullable + | ParsedColumnSimple + | ParsedColumnFixedString + | ParsedColumnDecimal + | ParsedColumnEnum + | ParsedColumnMap + | ParsedColumnDateTime + | ParsedColumnDateTime64 + | ParsedColumnTuple + /** Array(T) = 1 dimension, Array(Array(T)) = 2, etc. */ + dimensions: number + sourceType: string +} + +/** @see https://clickhouse.com/docs/en/sql-reference/data-types/map */ +export interface ParsedColumnMap { + type: 'Map' + /** Possible key types: + * - String, Integer, UUID, Date, Date32, etc ({@link ParsedColumnSimple}) + * - FixedString + * - DateTime + * - Enum + */ + key: + | ParsedColumnSimple + | ParsedColumnFixedString + | ParsedColumnEnum + | ParsedColumnDateTime + /** Value types are arbitrary, including Map, Array, and Tuple. */ + value: ParsedColumnType + sourceType: string +} + +export interface ParsedColumnTuple { + type: 'Tuple' + /** Element types are arbitrary, including Map, Array, and Tuple. */ + elements: ParsedColumnType[] + sourceType: string +} + +export type ParsedColumnType = + | ParsedColumnSimple + | ParsedColumnEnum + | ParsedColumnFixedString + | ParsedColumnNullable + | ParsedColumnDecimal + | ParsedColumnDateTime + | ParsedColumnDateTime64 + | ParsedColumnArray + | ParsedColumnTuple + | ParsedColumnMap + +/** + * @experimental - incomplete, unstable API; + * originally intended to be used for RowBinary/Native header parsing internally. + * Currently unsupported source types: + * * Geo + * * (Simple)AggregateFunction + * * Nested + * * Old/new JSON + * * Dynamic + * * Variant + */ +export function parseColumnType(sourceType: string): ParsedColumnType { + let columnType = sourceType + let isNullable = false + if (columnType.startsWith(LowCardinalityPrefix)) { + columnType = columnType.slice(LowCardinalityPrefix.length, -1) + } + if (columnType.startsWith(NullablePrefix)) { + columnType = columnType.slice(NullablePrefix.length, -1) + isNullable = true + } + let result: ParsedColumnType + if ((SimpleColumnTypes as unknown as string[]).includes(columnType)) { + result = { + type: 'Simple', + columnType: columnType as SimpleColumnType, + sourceType, + } + } else if (columnType.startsWith(DecimalPrefix)) { + result = parseDecimalType({ + sourceType, + columnType, + }) + } else if (columnType.startsWith(DateTime64Prefix)) { + result = parseDateTime64Type({ sourceType, columnType }) + } else if (columnType.startsWith(DateTimePrefix)) { + result = parseDateTimeType({ sourceType, columnType }) + } else if (columnType.startsWith(FixedStringPrefix)) { + result = parseFixedStringType({ sourceType, columnType }) + } else if ( + columnType.startsWith(Enum8Prefix) || + columnType.startsWith(Enum16Prefix) + ) { + result = parseEnumType({ sourceType, columnType }) + } else if (columnType.startsWith(ArrayPrefix)) { + result = parseArrayType({ sourceType, columnType }) + } else if (columnType.startsWith(MapPrefix)) { + result = parseMapType({ sourceType, columnType }) + } else if (columnType.startsWith(TuplePrefix)) { + result = parseTupleType({ sourceType, columnType }) + } else { + throw new ColumnTypeParseError('Unsupported column type', { columnType }) + } + if (isNullable) { + return asNullableType(result, sourceType) + } else { + return result + } +} + +export function parseDecimalType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnDecimal { + if ( + !columnType.startsWith(DecimalPrefix) || + columnType.length < DecimalPrefix.length + 5 // Decimal(1, 0) is the shortest valid definition + ) { + throw new ColumnTypeParseError('Invalid Decimal type', { + sourceType, + columnType, + }) + } + const split = columnType.slice(DecimalPrefix.length, -1).split(', ') + if (split.length !== 2) { + throw new ColumnTypeParseError( + 'Expected Decimal type to have both precision and scale', + { + sourceType, + columnType, + split, + }, + ) + } + let intSize: DecimalParams['intSize'] = 32 + const precision = parseInt(split[0], 10) + if (Number.isNaN(precision) || precision < 1 || precision > 76) { + throw new ColumnTypeParseError('Invalid Decimal precision', { + columnType, + sourceType, + precision, + }) + } + const scale = parseInt(split[1], 10) + if (Number.isNaN(scale) || scale < 0 || scale > precision) { + throw new ColumnTypeParseError('Invalid Decimal scale', { + columnType, + sourceType, + precision, + scale, + }) + } + if (precision > 38) { + intSize = 256 + } else if (precision > 18) { + intSize = 128 + } else if (precision > 9) { + intSize = 64 + } + return { + type: 'Decimal', + params: { + precision, + scale, + intSize, + }, + sourceType, + } +} + +export function parseEnumType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnEnum { + let intSize: 8 | 16 + if (columnType.startsWith(Enum8Prefix)) { + columnType = columnType.slice(Enum8Prefix.length, -1) + intSize = 8 + } else if (columnType.startsWith(Enum16Prefix)) { + columnType = columnType.slice(Enum16Prefix.length, -1) + intSize = 16 + } else { + throw new ColumnTypeParseError( + 'Expected Enum to be either Enum8 or Enum16', + { + columnType, + sourceType, + }, + ) + } + // The minimal allowed Enum definition is Enum8('' = 0), i.e. 6 chars inside. + if (columnType.length < 6) { + throw new ColumnTypeParseError('Invalid Enum type values', { + columnType, + sourceType, + }) + } + + const names: string[] = [] + const indices: number[] = [] + let parsingName = true // false when parsing the index + let charEscaped = false // we should ignore escaped ticks + let startIndex = 1 // Skip the first ' + + // Should support the most complicated enums, such as Enum8('f\'' = 1, 'x =' = 2, 'b\'\'\'' = 3, '\'c=4=' = 42, '4' = 100) + for (let i = 1; i < columnType.length; i++) { + if (parsingName) { + if (charEscaped) { + charEscaped = false + } else { + if (columnType.charCodeAt(i) === BackslashASCII) { + charEscaped = true + } else if (columnType.charCodeAt(i) === SingleQuoteASCII) { + // non-escaped closing tick - push the name + const name = columnType.slice(startIndex, i) + if (names.includes(name)) { + throw new ColumnTypeParseError('Duplicate Enum name', { + columnType, + sourceType, + name, + names, + indices, + }) + } + names.push(name) + i += 4 // skip ` = ` and the first digit, as it will always have at least one. + startIndex = i + parsingName = false + } + } + } + // Parsing the index, skipping next iterations until the first non-digit one + else if ( + columnType.charCodeAt(i) < ZeroASCII || + columnType.charCodeAt(i) > NineASCII + ) { + pushEnumIndex(startIndex, i) + // the char at this index should be comma. + i += 2 // skip ` '`, but not the first char - ClickHouse allows something like Enum8('foo' = 0, '' = 42) + startIndex = i + 1 + parsingName = true + charEscaped = false + } + } + + // Push the last index + pushEnumIndex(startIndex, columnType.length) + if (names.length !== indices.length) { + throw new ColumnTypeParseError( + 'Expected Enum to have the same number of names and indices', + { columnType, sourceType, names, indices }, + ) + } + + const values: ParsedColumnEnum['values'] = {} + for (let i = 0; i < names.length; i++) { + values[indices[i]] = names[i] + } + return { + type: 'Enum', + values, + intSize, + sourceType, + } + + function pushEnumIndex(start: number, end: number) { + const index = parseInt(columnType.slice(start, end), 10) + if (Number.isNaN(index) || index < 0) { + throw new ColumnTypeParseError( + 'Expected Enum index to be a valid number', + { + columnType, + sourceType, + names, + indices, + index, + start, + end, + }, + ) + } + if (indices.includes(index)) { + throw new ColumnTypeParseError('Duplicate Enum index', { + columnType, + sourceType, + index, + names, + indices, + }) + } + indices.push(index) + } +} + +export function parseMapType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnMap { + if ( + !columnType.startsWith(MapPrefix) || + columnType.length < MapPrefix.length + 11 // the shortest definition seems to be Map(Int8, Int8) + ) { + throw new ColumnTypeParseError('Invalid Map type', { + columnType, + sourceType, + }) + } + columnType = columnType.slice(MapPrefix.length, -1) + const [keyType, valueType] = getElementsTypes({ columnType, sourceType }, 2) + const key = parseColumnType(keyType) + if ( + key.type === 'DateTime64' || + key.type === 'Nullable' || + key.type === 'Array' || + key.type === 'Map' || + key.type === 'Decimal' || + key.type === 'Tuple' + ) { + throw new ColumnTypeParseError('Invalid Map key type', { + key, + sourceType, + }) + } + const value = parseColumnType(valueType) + return { + type: 'Map', + key, + value, + sourceType, + } +} + +export function parseTupleType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnTuple { + if ( + !columnType.startsWith(TuplePrefix) || + columnType.length < TuplePrefix.length + 5 // Tuple(Int8) is the shortest valid definition + ) { + throw new ColumnTypeParseError('Invalid Tuple type', { + columnType, + sourceType, + }) + } + columnType = columnType.slice(TuplePrefix.length, -1) + const elements = getElementsTypes({ columnType, sourceType }, 1).map((type) => + parseColumnType(type), + ) + return { + type: 'Tuple', + elements, + sourceType, + } +} + +export function parseArrayType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnArray { + if ( + !columnType.startsWith(ArrayPrefix) || + columnType.length < ArrayPrefix.length + 5 // Array(Int8) is the shortest valid definition + ) { + throw new ColumnTypeParseError('Invalid Array type', { + columnType, + sourceType, + }) + } + + let dimensions = 0 + while (columnType.length > 0) { + if (columnType.startsWith(ArrayPrefix)) { + columnType = columnType.slice(ArrayPrefix.length, -1) // Array(T) -> T + dimensions++ + } else { + break + } + } + if (dimensions === 0 || dimensions > 10) { + // TODO: check how many we can handle; max 10 seems more than enough. + throw new ColumnTypeParseError( + 'Expected Array to have between 1 and 10 dimensions', + { columnType }, + ) + } + const value = parseColumnType(columnType) + if (value.type === 'Array') { + throw new ColumnTypeParseError('Unexpected Array as value type', { + columnType, + sourceType, + }) + } + return { + type: 'Array', + value, + dimensions, + sourceType, + } +} + +export function parseDateTimeType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnDateTime { + if ( + columnType.startsWith(DateTimeWithTimezonePrefix) && + columnType.length > DateTimeWithTimezonePrefix.length + 4 // DateTime('GB') has the least amount of chars + ) { + const timezone = columnType.slice(DateTimeWithTimezonePrefix.length + 1, -2) + return { + type: 'DateTime', + timezone, + sourceType, + } + } else if ( + columnType.startsWith(DateTimePrefix) && + columnType.length === DateTimePrefix.length + ) { + return { + type: 'DateTime', + timezone: null, + sourceType, + } + } else { + throw new ColumnTypeParseError('Invalid DateTime type', { + columnType, + sourceType, + }) + } +} + +export function parseDateTime64Type({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnDateTime64 { + if ( + !columnType.startsWith(DateTime64Prefix) || + columnType.length < DateTime64Prefix.length + 2 // should at least have a precision + ) { + throw new ColumnTypeParseError('Invalid DateTime64 type', { + columnType, + sourceType, + }) + } + const precision = parseInt(columnType[DateTime64Prefix.length], 10) + if (Number.isNaN(precision) || precision < 0 || precision > 9) { + throw new ColumnTypeParseError('Invalid DateTime64 precision', { + columnType, + sourceType, + precision, + }) + } + let timezone = null + if (columnType.length > DateTime64Prefix.length + 2) { + // e.g. DateTime64(3, 'UTC') -> UTC + timezone = columnType.slice(DateTime64Prefix.length + 4, -2) + } + return { + type: 'DateTime64', + timezone, + precision, + sourceType, + } +} + +export function parseFixedStringType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnFixedString { + if ( + !columnType.startsWith(FixedStringPrefix) || + columnType.length < FixedStringPrefix.length + 2 // i.e. at least FixedString(1) + ) { + throw new ColumnTypeParseError('Invalid FixedString type', { + columnType, + sourceType, + }) + } + const sizeBytes = parseInt(columnType.slice(FixedStringPrefix.length, -1), 10) + if (Number.isNaN(sizeBytes) || sizeBytes < 1) { + throw new ColumnTypeParseError('Invalid FixedString size in bytes', { + columnType, + sourceType, + sizeBytes, + }) + } + return { + type: 'FixedString', + sizeBytes, + sourceType, + } +} + +export function asNullableType( + value: ParsedColumnType, + sourceType: string, +): ParsedColumnNullable { + if ( + value.type === 'Array' || + value.type === 'Map' || + value.type === 'Tuple' || + value.type === 'Nullable' + ) { + throw new ColumnTypeParseError(`${value.type} cannot be Nullable`, { + sourceType, + }) + } + if (value.sourceType.startsWith(NullablePrefix)) { + value.sourceType = value.sourceType.slice(NullablePrefix.length, -1) + } + return { + type: 'Nullable', + sourceType, + value, + } +} + +/** Used for Map key/value types and Tuple elements. + * * `String, UInt8` results in [`String`, `UInt8`]. + * * `String, UInt8, Array(String)` results in [`String`, `UInt8`, `Array(String)`]. + * * Throws if parsed values are below the required minimum. */ +export function getElementsTypes( + { columnType, sourceType }: ParseColumnTypeParams, + minElements: number, +): string[] { + const elements: string[] = [] + /** Consider the element type parsed once we reach a comma outside of parens AND after an unescaped tick. + * The most complicated cases are values names in the self-defined Enum types: + * * `Tuple(Enum8('f\'()' = 1))` -> `f\'()` + * * `Tuple(Enum8('(' = 1))` -> `(` + * See also: {@link parseEnumType }, which works similarly (but has to deal with the indices following the names). */ + let openParens = 0 + let quoteOpen = false + let charEscaped = false + let lastElementIndex = 0 + for (let i = 0; i < columnType.length; i++) { + // prettier-ignore + // console.log(i, 'Current char:', columnType[i], 'openParens:', openParens, 'quoteOpen:', quoteOpen, 'charEscaped:', charEscaped) + if (charEscaped) { + charEscaped = false + } else if (columnType.charCodeAt(i) === BackslashASCII) { + charEscaped = true + } else if (columnType.charCodeAt(i) === SingleQuoteASCII) { + quoteOpen = !quoteOpen // unescaped quote + } else { + if (!quoteOpen) { + if (columnType.charCodeAt(i) === LeftParenASCII) { + openParens++ + } else if (columnType.charCodeAt(i) === RightParenASCII) { + openParens-- + } else if (columnType.charCodeAt(i) === CommaASCII) { + if (openParens === 0) { + elements.push(columnType.slice(lastElementIndex, i)) + // console.log('Pushed element:', elements[elements.length - 1]) + i += 2 // skip ', ' + lastElementIndex = i + } + } + } + } + } + + // prettier-ignore + // console.log('Final elements:', elements, 'nextElementIndex:', lastElementIndex, 'minElements:', minElements, 'openParens:', openParens) + + // Push the remaining part of the type if it seems to be valid (at least all parentheses are closed) + if (!openParens && lastElementIndex < columnType.length - 1) { + elements.push(columnType.slice(lastElementIndex)) + } + if (elements.length < minElements) { + throw new ColumnTypeParseError('Expected more elements in the type', { + sourceType, + columnType, + elements, + minElements, + }) + } + return elements +} + +interface ParseColumnTypeParams { + /** A particular type to parse, such as DateTime. */ + columnType: string + /** Full type definition, such as Map(String, DateTime). */ + sourceType: string +} + +const NullablePrefix = 'Nullable(' as const +const LowCardinalityPrefix = 'LowCardinality(' as const +const DecimalPrefix = 'Decimal(' as const +const ArrayPrefix = 'Array(' as const +const MapPrefix = 'Map(' as const +const Enum8Prefix = 'Enum8(' as const +const Enum16Prefix = 'Enum16(' as const +const TuplePrefix = 'Tuple(' as const +const DateTimePrefix = 'DateTime' as const +const DateTimeWithTimezonePrefix = 'DateTime(' as const +const DateTime64Prefix = 'DateTime64(' as const +const FixedStringPrefix = 'FixedString(' as const + +const SingleQuoteASCII = 39 as const +const LeftParenASCII = 40 as const +const RightParenASCII = 41 as const +const CommaASCII = 44 as const +const ZeroASCII = 48 as const +const NineASCII = 57 as const +const BackslashASCII = 92 as const diff --git a/packages/client-common/src/parse/index.ts b/packages/client-common/src/parse/index.ts new file mode 100644 index 00000000..12260e55 --- /dev/null +++ b/packages/client-common/src/parse/index.ts @@ -0,0 +1 @@ +export * from './column_types' diff --git a/packages/client-common/src/result.ts b/packages/client-common/src/result.ts index efdfa077..e1eb14ea 100644 --- a/packages/client-common/src/result.ts +++ b/packages/client-common/src/result.ts @@ -1,4 +1,8 @@ -import type { ResponseHeaders, ResponseJSON } from './clickhouse_types' +import type { + ProgressRow, + ResponseHeaders, + ResponseJSON, +} from './clickhouse_types' import type { DataFormat, RawDataFormat, @@ -8,6 +12,8 @@ import type { StreamableJSONDataFormat, } from './data_formatter' +export type RowOrProgress = { row: T } | ProgressRow + export type ResultStream = // JSON*EachRow (except JSONObjectEachRow), CSV, TSV etc. Format extends StreamableDataFormat @@ -22,29 +28,35 @@ export type ResultStream = Stream export type ResultJSONType = - // JSON*EachRow formats except JSONObjectEachRow - F extends StreamableJSONDataFormat - ? T[] - : // JSON formats with known layout { data, meta, statistics, ... } - F extends SingleDocumentJSONFormat - ? ResponseJSON - : // JSON formats represented as a Record - F extends RecordsJSONFormat - ? Record - : // CSV, TSV etc. - cannot be represented as JSON - F extends RawDataFormat - ? never - : // happens only when Format could not be inferred from a literal - T[] | Record | ResponseJSON + // Emits either a { row: T } or an object with progress + F extends 'JSONEachRowWithProgress' + ? RowOrProgress[] + : // JSON*EachRow formats except JSONObjectEachRow + F extends StreamableJSONDataFormat + ? T[] + : // JSON formats with known layout { data, meta, statistics, ... } + F extends SingleDocumentJSONFormat + ? ResponseJSON + : // JSON formats represented as a Record + F extends RecordsJSONFormat + ? Record + : // CSV, TSV etc. - cannot be represented as JSON + F extends RawDataFormat + ? never + : // happens only when Format could not be inferred from a literal + T[] | Record | ResponseJSON export type RowJSONType = - // JSON*EachRow formats - F extends StreamableJSONDataFormat - ? T - : // CSV, TSV, non-streamable JSON formats - cannot be streamed as JSON - F extends RawDataFormat | SingleDocumentJSONFormat | RecordsJSONFormat - ? never - : T // happens only when Format could not be inferred from a literal + // Emits either a { row: T } or an object with progress + F extends 'JSONEachRowWithProgress' + ? RowOrProgress + : // JSON*EachRow formats + F extends StreamableJSONDataFormat + ? T + : // CSV, TSV, non-streamable JSON formats - cannot be streamed as JSON + F extends RawDataFormat | SingleDocumentJSONFormat | RecordsJSONFormat + ? never + : T // happens only when Format could not be inferred from a literal export interface Row< JSONType = unknown, diff --git a/packages/client-common/src/version.ts b/packages/client-common/src/version.ts index d6574191..c801573b 100644 --- a/packages/client-common/src/version.ts +++ b/packages/client-common/src/version.ts @@ -1 +1 @@ -export default '1.6.0' +export default '1.7.0' diff --git a/packages/client-node/__tests__/integration/node_query_format_types.test.ts b/packages/client-node/__tests__/integration/node_query_format_types.test.ts index 649005af..d9fc63ed 100644 --- a/packages/client-node/__tests__/integration/node_query_format_types.test.ts +++ b/packages/client-node/__tests__/integration/node_query_format_types.test.ts @@ -1,13 +1,14 @@ -import type { ResultSet } from '../../src' import type { ClickHouseClient as BaseClickHouseClient, DataFormat, } from '@clickhouse/client-common' import { createTableWithFields } from '@test/fixtures/table_with_fields' import { guid } from '@test/utils' -import type { ClickHouseClient } from '../../src' +import type { ClickHouseClient, ResultSet } from '../../src' import { createNodeTestClient } from '../utils/node_client' +/* eslint-disable @typescript-eslint/no-unused-expressions */ + // Ignored and used only as a source for ESLint checks with $ExpectType // See also: https://www.npmjs.com/package/eslint-plugin-expect-type xdescribe('[Node.js] Query and ResultSet types', () => { diff --git a/packages/client-node/__tests__/integration/node_stream_json_formats.test.ts b/packages/client-node/__tests__/integration/node_stream_json_formats.test.ts index c196e2e2..802c878d 100644 --- a/packages/client-node/__tests__/integration/node_stream_json_formats.test.ts +++ b/packages/client-node/__tests__/integration/node_stream_json_formats.test.ts @@ -1,4 +1,4 @@ -import { type ClickHouseClient } from '@clickhouse/client-common' +import { type ClickHouseClient, isProgressRow } from '@clickhouse/client-common' import { createSimpleTable } from '@test/fixtures/simple_table' import { assertJsonValues, jsonValues } from '@test/fixtures/test_data' import { createTestClient, guid } from '@test/utils' @@ -231,6 +231,26 @@ describe('[Node.js] stream JSON formats', () => { }) }) + describe('JSONEachRowWithProgress', () => { + it('should work', async () => { + const limit = 2 + const expectedProgressRowsCount = 4 + const rs = await client.query({ + query: `SELECT number FROM system.numbers LIMIT ${limit}`, + format: 'JSONEachRowWithProgress', + clickhouse_settings: { + max_block_size: '1', // reduce the block size, so the progress is reported more frequently + }, + }) + const rows = await rs.json<{ number: 'string' }>() + expect(rows.length).toEqual(limit + expectedProgressRowsCount) + expect(rows.filter((r) => !isProgressRow(r)) as unknown[]).toEqual([ + { row: { number: '0' } }, + { row: { number: '1' } }, + ]) + }) + }) + it('does not throw if stream closes prematurely', async () => { const stream = new Stream.Readable({ objectMode: true, diff --git a/packages/client-node/__tests__/integration/node_streaming_e2e.test.ts b/packages/client-node/__tests__/integration/node_streaming_e2e.test.ts index 38539f6f..a9f7184f 100644 --- a/packages/client-node/__tests__/integration/node_streaming_e2e.test.ts +++ b/packages/client-node/__tests__/integration/node_streaming_e2e.test.ts @@ -3,10 +3,9 @@ import { type ClickHouseClient, type ClickHouseSettings, } from '@clickhouse/client-common' -import { fakerRU } from '@faker-js/faker' import { createSimpleTable } from '@test/fixtures/simple_table' -import { createTableWithFields } from '@test/fixtures/table_with_fields' import { createTestClient, guid } from '@test/utils' +import { genLargeStringsDataset } from '@test/utils/datasets' import { tableFromIPC } from 'apache-arrow' import { Buffer } from 'buffer' import Fs from 'fs' @@ -152,40 +151,9 @@ describe('[Node.js] streaming e2e', () => { // Here we generate a large enough dataset to break into multiple chunks while streaming, // effectively testing the implementation of incomplete rows handling describe('should correctly process multiple chunks', () => { - async function generateData({ - rows, - words, - }: { - rows: number - words: number - }): Promise<{ - table: string - values: { id: number; sentence: string; timestamp: string }[] - }> { - const table = await createTableWithFields( - client as ClickHouseClient, - `sentence String, timestamp String`, - ) - const values = [...new Array(rows)].map((_, id) => ({ - id, - // it seems that it is easier to trigger an incorrect behavior with non-ASCII symbols - sentence: fakerRU.lorem.sentence(words), - timestamp: new Date().toISOString(), - })) - await client.insert({ - table, - values, - format: 'JSONEachRow', - }) - return { - table, - values, - } - } - describe('large amount of rows', () => { it('should work with .json()', async () => { - const { table, values } = await generateData({ + const { table, values } = await genLargeStringsDataset(client, { rows: 10000, words: 10, }) @@ -199,7 +167,7 @@ describe('[Node.js] streaming e2e', () => { }) it('should work with .stream()', async () => { - const { table, values } = await generateData({ + const { table, values } = await genLargeStringsDataset(client, { rows: 10000, words: 10, }) @@ -222,7 +190,7 @@ describe('[Node.js] streaming e2e', () => { describe("rows that don't fit into a single chunk", () => { it('should work with .json()', async () => { - const { table, values } = await generateData({ + const { table, values } = await genLargeStringsDataset(client, { rows: 5, words: 10000, }) @@ -236,7 +204,7 @@ describe('[Node.js] streaming e2e', () => { }) it('should work with .stream()', async () => { - const { table, values } = await generateData({ + const { table, values } = await genLargeStringsDataset(client, { rows: 5, words: 10000, }) diff --git a/packages/client-node/src/connection/node_base_connection.ts b/packages/client-node/src/connection/node_base_connection.ts index 48e611d2..d17dfc84 100644 --- a/packages/client-node/src/connection/node_base_connection.ts +++ b/packages/client-node/src/connection/node_base_connection.ts @@ -393,8 +393,8 @@ export abstract class NodeBaseConnection const tryDecompressResponseStream = params.op === 'Exec' ? // allows to disable stream decompression for the `Exec` operation only - params.decompress_response_stream ?? - this.params.compression.decompress_response + (params.decompress_response_stream ?? + this.params.compression.decompress_response) : // there is nothing useful in the response stream for the `Command` operation, // and it is immediately destroyed; never decompress it false diff --git a/packages/client-node/src/index.ts b/packages/client-node/src/index.ts index 787a0397..68d4a300 100644 --- a/packages/client-node/src/index.ts +++ b/packages/client-node/src/index.ts @@ -46,4 +46,21 @@ export { StreamableJSONFormats, SingleDocumentJSONFormats, RecordsJSONFormats, + type SimpleColumnType, + type ParsedColumnSimple, + type ParsedColumnEnum, + type ParsedColumnFixedString, + type ParsedColumnNullable, + type ParsedColumnDecimal, + type ParsedColumnDateTime, + type ParsedColumnDateTime64, + type ParsedColumnArray, + type ParsedColumnTuple, + type ParsedColumnMap, + type ParsedColumnType, + parseColumnType, + SimpleColumnTypes, + type ProgressRow, + isProgressRow, + type RowOrProgress, } from '@clickhouse/client-common' diff --git a/packages/client-node/src/result_set.ts b/packages/client-node/src/result_set.ts index 43aceb28..d05b275d 100644 --- a/packages/client-node/src/result_set.ts +++ b/packages/client-node/src/result_set.ts @@ -85,7 +85,7 @@ export class ResultSet const stream = this.stream() for await (const rows of stream) { for (const row of rows) { - result.push(row.json()) + result.push(row.json() as T) } } return result as any diff --git a/packages/client-node/src/version.ts b/packages/client-node/src/version.ts index d6574191..c801573b 100644 --- a/packages/client-node/src/version.ts +++ b/packages/client-node/src/version.ts @@ -1 +1 @@ -export default '1.6.0' +export default '1.7.0' diff --git a/packages/client-web/__tests__/integration/web_abort_request.test.ts b/packages/client-web/__tests__/integration/web_abort_request.test.ts index 426fee70..ca6836e6 100644 --- a/packages/client-web/__tests__/integration/web_abort_request.test.ts +++ b/packages/client-web/__tests__/integration/web_abort_request.test.ts @@ -1,11 +1,12 @@ -import type { ClickHouseClient, Row } from '@clickhouse/client-common' +import type { Row } from '@clickhouse/client-common' import { createTestClient } from '@test/utils' +import type { WebClickHouseClient } from '../../src/client' describe('[Web] abort request', () => { - let client: ClickHouseClient + let client: WebClickHouseClient beforeEach(() => { - client = createTestClient() + client = createTestClient() as unknown as WebClickHouseClient }) afterEach(async () => { @@ -31,23 +32,27 @@ describe('[Web] abort request', () => { it('cancels a select query while reading response', async () => { const controller = new AbortController() + let rowCount = 0 const selectPromise = client .query({ - query: 'SELECT * from system.numbers LIMIT 100000', + query: 'SELECT number FROM system.numbers LIMIT 10000', format: 'JSONCompactEachRow', abort_signal: controller.signal, + clickhouse_settings: { + // low block size to force streaming 1 row at a time + max_block_size: '1', + }, }) .then(async (rs) => { const reader = rs.stream().getReader() while (true) { const { done, value: rows } = await reader.read() if (done) break - ;(rows as Row[]).forEach((row: Row) => { - const [number] = row.json<[string]>() - // abort when reach number 3 - if (number === '3') { + ;(rows as Row[]).forEach(() => { + if (rowCount >= 1) { controller.abort() } + rowCount++ }) } }) @@ -61,10 +66,15 @@ describe('[Web] abort request', () => { }) it('cancels a select query while reading response by closing response stream', async () => { + let rowCount = 0 const selectPromise = client .query({ - query: 'SELECT * from system.numbers LIMIT 100000', + query: 'SELECT number FROM system.numbers LIMIT 3', format: 'JSONCompactEachRow', + clickhouse_settings: { + // low block size to force streaming 1 row at a time + max_block_size: '1', + }, }) .then(async function (rs) { const reader = rs.stream().getReader() @@ -72,12 +82,11 @@ describe('[Web] abort request', () => { const { done, value: rows } = await reader.read() if (done) break for (const row of rows as Row[]) { - const [number] = row.json<[string]>() - // abort when reach number 3 - if (number === '3') { - await reader.releaseLock() - await rs.close() + row.json() + if (rowCount >= 1) { + await rs.stream().cancel() } + rowCount++ } } }) diff --git a/packages/client-web/__tests__/integration/web_select_streaming.test.ts b/packages/client-web/__tests__/integration/web_select_streaming.test.ts index b8184096..8cc00e5e 100644 --- a/packages/client-web/__tests__/integration/web_select_streaming.test.ts +++ b/packages/client-web/__tests__/integration/web_select_streaming.test.ts @@ -1,5 +1,7 @@ import type { ClickHouseClient, Row } from '@clickhouse/client-common' +import { isProgressRow } from '@clickhouse/client-common' import { createTestClient } from '@test/utils' +import { genLargeStringsDataset } from '@test/utils/datasets' describe('[Web] SELECT streaming', () => { let client: ClickHouseClient> @@ -7,7 +9,14 @@ describe('[Web] SELECT streaming', () => { await client.close() }) beforeEach(async () => { - client = createTestClient() + client = createTestClient({ + // It is required to disable keep-alive to allow for larger inserts + // https://fetch.spec.whatwg.org/#http-network-or-cache-fetch + // If contentLength is non-null and httpRequest’s keepalive is true, then: + // <...> + // If the sum of contentLength and inflightKeepaliveBytes is greater than 64 kibibytes, then return a network error. + keep_alive: { enabled: false }, + }) }) describe('consume the response only once', () => { @@ -117,6 +126,24 @@ describe('[Web] SELECT streaming', () => { ]) }) + it('should return objects in JSONEachRowWithProgress format', async () => { + const limit = 2 + const expectedProgressRowsCount = 4 + const rs = await client.query({ + query: `SELECT * FROM system.numbers LIMIT ${limit}`, + format: 'JSONEachRowWithProgress', + clickhouse_settings: { + max_block_size: '1', // reduce the block size, so the progress is reported more frequently + }, + }) + const rows = await rs.json<{ number: string }>() + expect(rows.length).toEqual(limit + expectedProgressRowsCount) + expect(rows.filter((r) => !isProgressRow(r)) as unknown[]).toEqual([ + { row: { number: '0' } }, + { row: { number: '1' } }, + ]) + }) + it('returns stream of objects in JSONStringsEachRow format', async () => { const result = await client.query({ query: 'SELECT number FROM system.numbers LIMIT 5', @@ -199,6 +226,75 @@ describe('[Web] SELECT streaming', () => { ]) }) }) + + // See https://github.com/ClickHouse/clickhouse-js/issues/171 for more details + // Here we generate a large enough dataset to break into multiple chunks while streaming, + // effectively testing the implementation of incomplete rows handling + describe('should correctly process multiple chunks', () => { + describe('large amount of rows', () => { + it('should work with .json()', async () => { + const { table, values } = await genLargeStringsDataset(client, { + rows: 10000, + words: 10, + }) + const result = await client + .query({ + query: `SELECT * FROM ${table} ORDER BY id ASC`, + format: 'JSONEachRow', + }) + .then((r) => r.json()) + expect(result).toEqual(values) + }) + + it('should work with .stream()', async () => { + const { table, values } = await genLargeStringsDataset(client, { + rows: 10000, + words: 10, + }) + const stream = await client + .query({ + query: `SELECT * FROM ${table} ORDER BY id ASC`, + format: 'JSONEachRow', + }) + .then((r) => r.stream()) + + const result = await rowsJsonValues(stream) + expect(result).toEqual(values) + }) + }) + + describe("rows that don't fit into a single chunk", () => { + it('should work with .json()', async () => { + const { table, values } = await genLargeStringsDataset(client, { + rows: 5, + words: 10000, + }) + const result = await client + .query({ + query: `SELECT * FROM ${table} ORDER BY id ASC`, + format: 'JSONEachRow', + }) + .then((r) => r.json()) + expect(result).toEqual(values) + }) + + it('should work with .stream()', async () => { + const { table, values } = await genLargeStringsDataset(client, { + rows: 5, + words: 10000, + }) + const stream = await client + .query({ + query: `SELECT * FROM ${table} ORDER BY id ASC`, + format: 'JSONEachRow', + }) + .then((r) => r.stream()) + + const result = await rowsJsonValues(stream) + expect(result).toEqual(values) + }) + }) + }) }) async function rowsJsonValues( diff --git a/packages/client-web/src/index.ts b/packages/client-web/src/index.ts index 090df9ee..27503be7 100644 --- a/packages/client-web/src/index.ts +++ b/packages/client-web/src/index.ts @@ -45,4 +45,21 @@ export { StreamableJSONFormats, SingleDocumentJSONFormats, RecordsJSONFormats, + type SimpleColumnType, + type ParsedColumnSimple, + type ParsedColumnEnum, + type ParsedColumnFixedString, + type ParsedColumnNullable, + type ParsedColumnDecimal, + type ParsedColumnDateTime, + type ParsedColumnDateTime64, + type ParsedColumnArray, + type ParsedColumnTuple, + type ParsedColumnMap, + type ParsedColumnType, + parseColumnType, + SimpleColumnTypes, + type ProgressRow, + isProgressRow, + type RowOrProgress, } from '@clickhouse/client-common' diff --git a/packages/client-web/src/result_set.ts b/packages/client-web/src/result_set.ts index e1ccf894..ecf6cfbb 100644 --- a/packages/client-web/src/result_set.ts +++ b/packages/client-web/src/result_set.ts @@ -9,10 +9,12 @@ import type { import { isNotStreamableJSONFamily, isStreamableJSONFamily, + validateStreamFormat, } from '@clickhouse/client-common' -import { validateStreamFormat } from '@clickhouse/client-common' import { getAsText } from './utils' +const NEWLINE = 0x0a as const + export class ResultSet implements BaseResultSet, Format> { @@ -48,7 +50,7 @@ export class ResultSet break } for (const row of value) { - result.push(row.json()) + result.push(row.json() as T) } } return result as any @@ -67,40 +69,67 @@ export class ResultSet this.markAsConsumed() validateStreamFormat(this.format) - let decodedChunk = '' + let incompleteChunks: Uint8Array[] = [] + let totalIncompleteLength = 0 const decoder = new TextDecoder('utf-8') const transform = new TransformStream({ start() { // }, - transform: (chunk, controller) => { + transform: (chunk: Uint8Array, controller) => { if (chunk === null) { controller.terminate() } - decodedChunk += decoder.decode(chunk) const rows: Row[] = [] - // eslint-disable-next-line no-constant-condition - while (true) { - const idx = decodedChunk.indexOf('\n') - if (idx !== -1) { - const text = decodedChunk.slice(0, idx) - decodedChunk = decodedChunk.slice(idx + 1) + let idx: number + let lastIdx = 0 + do { + // an unescaped newline character denotes the end of a row + idx = chunk.indexOf(NEWLINE, lastIdx) + // there is no complete row in the rest of the current chunk + if (idx === -1) { + // to be processed during the next transform iteration + const incompleteChunk = chunk.slice(lastIdx) + incompleteChunks.push(incompleteChunk) + totalIncompleteLength += incompleteChunk.length + // send the extracted rows to the consumer, if any + if (rows.length > 0) { + controller.enqueue(rows) + } + } else { + let text: string + if (incompleteChunks.length > 0) { + const completeRowBytes = new Uint8Array( + totalIncompleteLength + idx, + ) + + // using the incomplete chunks from the previous iterations + let offset = 0 + incompleteChunks.forEach((incompleteChunk) => { + completeRowBytes.set(incompleteChunk, offset) + offset += incompleteChunk.length + }) + // finalize the row with the current chunk slice that ends with a newline + const finalChunk = chunk.slice(0, idx) + completeRowBytes.set(finalChunk, offset) + + // reset the incomplete chunks + incompleteChunks = [] + totalIncompleteLength = 0 + + text = decoder.decode(completeRowBytes) + } else { + text = decoder.decode(chunk.slice(lastIdx, idx)) + } rows.push({ text, json(): T { return JSON.parse(text) }, }) - } else { - if (rows.length) { - controller.enqueue(rows) - } - break + lastIdx = idx + 1 // skipping newline character } - } - }, - flush() { - decodedChunk = '' + } while (idx !== -1) }, }) diff --git a/packages/client-web/src/version.ts b/packages/client-web/src/version.ts index d6574191..c801573b 100644 --- a/packages/client-web/src/version.ts +++ b/packages/client-web/src/version.ts @@ -1 +1 @@ -export default '1.6.0' +export default '1.7.0'