From 431752cb9fdb6b847ade8d2a2af5dfdf1e5de9a3 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 11:02:25 -0500 Subject: [PATCH 01/20] use tableName val Signed-off-by: DanRoscigno --- ci/SHELL/quickstart/hudi/spark_shell.scala | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 ci/SHELL/quickstart/hudi/spark_shell.scala diff --git a/ci/SHELL/quickstart/hudi/spark_shell.scala b/ci/SHELL/quickstart/hudi/spark_shell.scala new file mode 100644 index 0000000..4541a90 --- /dev/null +++ b/ci/SHELL/quickstart/hudi/spark_shell.scala @@ -0,0 +1,42 @@ +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types._ +import org.apache.spark.sql.Row +import org.apache.spark.sql.SaveMode._ +import org.apache.hudi.DataSourceReadOptions._ +import org.apache.hudi.DataSourceWriteOptions._ +import org.apache.hudi.config.HoodieWriteConfig._ +import scala.collection.JavaConversions._ + +val schema = StructType( Array( + StructField("language", StringType, true), + StructField("users", StringType, true), + StructField("id", StringType, true) + )) + +val rowData= Seq(Row("Java", "20000", "a"), + Row("Python", "100000", "b"), + Row("Scala", "3000", "c")) + + +val df = spark.createDataFrame(rowData,schema) + +val tableName = "hudi_coders_hive" +val basePath = "s3a://huditest/hudi_coders" + +df.write.format("hudi"). + option(org.apache.hudi.config.HoodieWriteConfig.TABLE_NAME, tableName). + option(RECORDKEY_FIELD_OPT_KEY, "id"). + option(PARTITIONPATH_FIELD_OPT_KEY, "language"). + option(PRECOMBINE_FIELD_OPT_KEY, "users"). + option("hoodie.datasource.write.hive_style_partitioning", "true"). + option("hoodie.datasource.hive_sync.enable", "true"). + option("hoodie.datasource.hive_sync.mode", "hms"). + option("hoodie.datasource.hive_sync.database", "default"). + option("hoodie.datasource.hive_sync.table", "hudi_coders_hive"). + option("hoodie.datasource.hive_sync.partition_fields", "language"). + option("hoodie.datasource.hive_sync.partition_extractor_class", "org.apache.hudi.hive.MultiPartKeysValueExtractor"). + option("hoodie.datasource.hive_sync.metastore.uris", "thrift://hive-metastore:9083"). + mode(Overwrite). + save(basePath) +System.exit(0) + From c31873e96491185b091ff82105a1d08bad405663 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 11:29:09 -0500 Subject: [PATCH 02/20] add workflow Signed-off-by: DanRoscigno --- .github/workflows/test_with_hudi.yml | 74 ++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 .github/workflows/test_with_hudi.yml diff --git a/.github/workflows/test_with_hudi.yml b/.github/workflows/test_with_hudi.yml new file mode 100644 index 0000000..12ce433 --- /dev/null +++ b/.github/workflows/test_with_hudi.yml @@ -0,0 +1,74 @@ +name: Test with allin1 + +on: + schedule: + - cron: "5 9 * * 1" + push: + branches: [ main ] + paths: + - 'ci/**/quickstart/hudi/*' + - '.github/workflows/test_with_hudi.yml' + - 'quickstart_hudi_test.go' + - 'helper.go' + pull_request: + branches: [ main ] + paths: + - 'ci/**/quickstart/hudi/*' + - '.github/workflows/test_with_hudi.yml' + - 'quickstart_hudi_test.go' + - 'helper.go' + +jobs: + build: + + name: Build and test + runs-on: ubuntu-latest + + steps: + # Checkout the repo as this CI needs: + # - the compose file for StarRocks and Ginkgo/Gomega + - name: Checkout Test repo + uses: actions/checkout@v4 + with: + path: testing + + - name: Checkout Demo repo + uses: actions/checkout@v4 + with: + repository: StarRocks/demo + path: demo + + - name: Set up Golang + uses: actions/setup-go@v5 + with: + go-version-file: 'testing/ci/go.mod' + + - name: Install ginkgo + run: | + version=$(cat go.mod| grep "ginkgo/v2" | awk '{print $2}') + go install -v github.com/onsi/ginkgo/v2/ginkgo@$version + working-directory: ./testing/ci + + - name: Start Hudi, StarRocks, and MinIO + run: docker compose up --detach --wait --wait-timeout 60 + working-directory: demo/documentation-samples/hudi + + - name: Create bucket + run: docker compose exec mc mc mb huditest + working-directory: demo/documentation-samples/hudi + + - name: Copy Spark script + run: docker compose cp ../../testing/ci/SHELL/quickstart/hudi/spark_shell.scala spark-hudi:/spark/ + working-directory: demo/documentation-samples/hudi + + - name: Run Spark job + run: docker compose exec spark-hudi spark-shell -i /spark/ + working-directory: demo/documentation-samples/hudi + + # Any tests that will run against the StarRocks env would be + # launched in steps like this one. Make sure to reset the + # StarRocks environment after each run (remove any tables + # and databases created, and reset any settings to the default) + # + # The ginkgo command uses `--focus-file` to run only the one test + # file. From 220a0812b5819ca1390e7a0d475e644778d459dc Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 12:07:14 -0500 Subject: [PATCH 03/20] check dirs Signed-off-by: DanRoscigno --- .github/workflows/test_with_hudi.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_with_hudi.yml b/.github/workflows/test_with_hudi.yml index 12ce433..6801d91 100644 --- a/.github/workflows/test_with_hudi.yml +++ b/.github/workflows/test_with_hudi.yml @@ -1,4 +1,4 @@ -name: Test with allin1 +name: Test with Hudi on: schedule: @@ -45,6 +45,7 @@ jobs: - name: Install ginkgo run: | + pwd version=$(cat go.mod| grep "ginkgo/v2" | awk '{print $2}') go install -v github.com/onsi/ginkgo/v2/ginkgo@$version working-directory: ./testing/ci @@ -58,7 +59,9 @@ jobs: working-directory: demo/documentation-samples/hudi - name: Copy Spark script - run: docker compose cp ../../testing/ci/SHELL/quickstart/hudi/spark_shell.scala spark-hudi:/spark/ + run: | + pwd + docker compose cp ../../testing/ci/SHELL/quickstart/hudi/spark_shell.scala spark-hudi:/spark/ working-directory: demo/documentation-samples/hudi - name: Run Spark job From a5bd4eae06aa48bc313d2ab7068ef65b87fa4d09 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 12:14:16 -0500 Subject: [PATCH 04/20] set dir Signed-off-by: DanRoscigno --- .github/workflows/test_with_hudi.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/test_with_hudi.yml b/.github/workflows/test_with_hudi.yml index 6801d91..4c79768 100644 --- a/.github/workflows/test_with_hudi.yml +++ b/.github/workflows/test_with_hudi.yml @@ -45,7 +45,6 @@ jobs: - name: Install ginkgo run: | - pwd version=$(cat go.mod| grep "ginkgo/v2" | awk '{print $2}') go install -v github.com/onsi/ginkgo/v2/ginkgo@$version working-directory: ./testing/ci @@ -60,8 +59,7 @@ jobs: - name: Copy Spark script run: | - pwd - docker compose cp ../../testing/ci/SHELL/quickstart/hudi/spark_shell.scala spark-hudi:/spark/ + docker compose cp ../../../testing/ci/SHELL/quickstart/hudi/spark_shell.scala spark-hudi:/spark/ working-directory: demo/documentation-samples/hudi - name: Run Spark job From ca63c56775bafa99ee1f0430773e2546636e5af0 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 12:19:14 -0500 Subject: [PATCH 05/20] correct filename Signed-off-by: DanRoscigno --- .github/workflows/test_with_hudi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_with_hudi.yml b/.github/workflows/test_with_hudi.yml index 4c79768..5eca48f 100644 --- a/.github/workflows/test_with_hudi.yml +++ b/.github/workflows/test_with_hudi.yml @@ -63,7 +63,7 @@ jobs: working-directory: demo/documentation-samples/hudi - name: Run Spark job - run: docker compose exec spark-hudi spark-shell -i /spark/ + run: docker compose exec spark-hudi spark-shell -i /spark/spark_shell.scala working-directory: demo/documentation-samples/hudi # Any tests that will run against the StarRocks env would be From 156d409e02f3fe619dc66d6d4dfc49907f930944 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 12:22:39 -0500 Subject: [PATCH 06/20] limit testing Signed-off-by: DanRoscigno --- .github/workflows/test_with_oneFE_oneBE.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_with_oneFE_oneBE.yml b/.github/workflows/test_with_oneFE_oneBE.yml index bc1a2a0..462767e 100644 --- a/.github/workflows/test_with_oneFE_oneBE.yml +++ b/.github/workflows/test_with_oneFE_oneBE.yml @@ -1,4 +1,4 @@ -name: Test the docs +name: Test with one FE and one BE on: schedule: @@ -10,9 +10,9 @@ on: pull_request: branches: [ main ] paths: - - 'ci/**/*' - - '.github/workflows/test_the_docs.yml' - - 'docker-compose.yml' + - 'ci/docs_test.go' + - 'ci/helper.go' + - '.github/workflows/test_with_oneFE_oneBE.yml' jobs: build: From 73dbec5f7d1fcefa2bef652d05670ac06216deaa Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 12:33:56 -0500 Subject: [PATCH 07/20] correct path for bucket Signed-off-by: DanRoscigno --- .github/workflows/test_with_hudi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_with_hudi.yml b/.github/workflows/test_with_hudi.yml index 5eca48f..de80334 100644 --- a/.github/workflows/test_with_hudi.yml +++ b/.github/workflows/test_with_hudi.yml @@ -54,7 +54,7 @@ jobs: working-directory: demo/documentation-samples/hudi - name: Create bucket - run: docker compose exec mc mc mb huditest + run: docker compose exec mc mc mb minio/huditest working-directory: demo/documentation-samples/hudi - name: Copy Spark script From e9a0b9c193ddd6b73c380911814a6fcfb5d68b9a Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 12:49:37 -0500 Subject: [PATCH 08/20] SQL commands Signed-off-by: DanRoscigno --- .github/workflows/test_with_hudi.yml | 5 ++++ ci/SQL/quickstart/hudi/quickstart_DB.sql | 24 +++++++++++++++++++ ci/quickstart_hudi_test.go | 30 ++++++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 ci/SQL/quickstart/hudi/quickstart_DB.sql create mode 100644 ci/quickstart_hudi_test.go diff --git a/.github/workflows/test_with_hudi.yml b/.github/workflows/test_with_hudi.yml index de80334..ed9fe22 100644 --- a/.github/workflows/test_with_hudi.yml +++ b/.github/workflows/test_with_hudi.yml @@ -73,3 +73,8 @@ jobs: # # The ginkgo command uses `--focus-file` to run only the one test # file. + - name: Test; Hudi SQL test + if: always() + env: + run: ginkgo -v --focus-file=./quickstart_hudi_test.go + working-directory: testing/ci diff --git a/ci/SQL/quickstart/hudi/quickstart_DB.sql b/ci/SQL/quickstart/hudi/quickstart_DB.sql new file mode 100644 index 0000000..9cb98d5 --- /dev/null +++ b/ci/SQL/quickstart/hudi/quickstart_DB.sql @@ -0,0 +1,24 @@ +CREATE EXTERNAL CATALOG hudi_catalog_hms +PROPERTIES +( + "type" = "hudi", + "hive.metastore.type" = "hive", + "hive.metastore.uris" = "thrift://hive-metastore:9083", + "aws.s3.use_instance_profile" = "false", + "aws.s3.access_key" = "admin", + "aws.s3.secret_key" = "password", + "aws.s3.region" = "us-east-1", + "aws.s3.enable_ssl" = "false", + "aws.s3.enable_path_style_access" = "true", + "aws.s3.endpoint" = "http://minio:9000" +); + +SET CATALOG hudi_catalog_hms; + +SHOW DATABASES; + +USE default; + +SHOW tables; + +SELECT * from hudi_coders_hive\G diff --git a/ci/quickstart_hudi_test.go b/ci/quickstart_hudi_test.go new file mode 100644 index 0000000..e6d42dc --- /dev/null +++ b/ci/quickstart_hudi_test.go @@ -0,0 +1,30 @@ +package docs_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var _ = Describe("QuickstartHudi", func() { + + When("Running the Hudi Quick Start", Ordered, func() { + + // The database is already initialized, and a connection + // is available with the variable `db` which is setup + // in the helpers.go file. + + BeforeAll(func() { + }) + + AfterAll(func() { + }) + + It("DDL: Setup quickstart DB", func() { + By("creating a database") + SQL := SQLFromFile("SQL/quickstart/hudi/quickstart_DB.sql") + _, err := db.Exec(SQL) + Expect(err).ToNot(HaveOccurred()) + }) + + }) +}) From e1f2a3f3d5ccfc022ebfc322b0a1d0928bd6f00b Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 12:51:46 -0500 Subject: [PATCH 09/20] remove AWS env Signed-off-by: DanRoscigno --- .github/workflows/test_with_hudi.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/test_with_hudi.yml b/.github/workflows/test_with_hudi.yml index ed9fe22..76e7dc0 100644 --- a/.github/workflows/test_with_hudi.yml +++ b/.github/workflows/test_with_hudi.yml @@ -75,6 +75,5 @@ jobs: # file. - name: Test; Hudi SQL test if: always() - env: run: ginkgo -v --focus-file=./quickstart_hudi_test.go working-directory: testing/ci From 20b5d122b0437223e8d2d94c6ea3b645e27a1263 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 13:08:16 -0500 Subject: [PATCH 10/20] split into separate files Signed-off-by: DanRoscigno --- .../{quickstart_DB.sql => create_catalog.sql} | 10 ------ ci/SQL/quickstart/hudi/select.sql | 1 + ci/SQL/quickstart/hudi/set_catalog.sql | 1 + ci/SQL/quickstart/hudi/show_databases.sql | 1 + ci/SQL/quickstart/hudi/show_tables.sql | 1 + ci/SQL/quickstart/hudi/use_default.sql | 1 + ci/quickstart_hudi_test.go | 32 ++++++++++++++++--- 7 files changed, 33 insertions(+), 14 deletions(-) rename ci/SQL/quickstart/hudi/{quickstart_DB.sql => create_catalog.sql} (80%) create mode 100644 ci/SQL/quickstart/hudi/select.sql create mode 100644 ci/SQL/quickstart/hudi/set_catalog.sql create mode 100644 ci/SQL/quickstart/hudi/show_databases.sql create mode 100644 ci/SQL/quickstart/hudi/show_tables.sql create mode 100644 ci/SQL/quickstart/hudi/use_default.sql diff --git a/ci/SQL/quickstart/hudi/quickstart_DB.sql b/ci/SQL/quickstart/hudi/create_catalog.sql similarity index 80% rename from ci/SQL/quickstart/hudi/quickstart_DB.sql rename to ci/SQL/quickstart/hudi/create_catalog.sql index 9cb98d5..543a44b 100644 --- a/ci/SQL/quickstart/hudi/quickstart_DB.sql +++ b/ci/SQL/quickstart/hudi/create_catalog.sql @@ -12,13 +12,3 @@ PROPERTIES "aws.s3.enable_path_style_access" = "true", "aws.s3.endpoint" = "http://minio:9000" ); - -SET CATALOG hudi_catalog_hms; - -SHOW DATABASES; - -USE default; - -SHOW tables; - -SELECT * from hudi_coders_hive\G diff --git a/ci/SQL/quickstart/hudi/select.sql b/ci/SQL/quickstart/hudi/select.sql new file mode 100644 index 0000000..58a18d0 --- /dev/null +++ b/ci/SQL/quickstart/hudi/select.sql @@ -0,0 +1 @@ +SELECT * from hudi_coders_hive\G diff --git a/ci/SQL/quickstart/hudi/set_catalog.sql b/ci/SQL/quickstart/hudi/set_catalog.sql new file mode 100644 index 0000000..1c2063c --- /dev/null +++ b/ci/SQL/quickstart/hudi/set_catalog.sql @@ -0,0 +1 @@ +SET CATALOG hudi_catalog_hms; diff --git a/ci/SQL/quickstart/hudi/show_databases.sql b/ci/SQL/quickstart/hudi/show_databases.sql new file mode 100644 index 0000000..ede8133 --- /dev/null +++ b/ci/SQL/quickstart/hudi/show_databases.sql @@ -0,0 +1 @@ +SHOW DATABASES; diff --git a/ci/SQL/quickstart/hudi/show_tables.sql b/ci/SQL/quickstart/hudi/show_tables.sql new file mode 100644 index 0000000..021b5c0 --- /dev/null +++ b/ci/SQL/quickstart/hudi/show_tables.sql @@ -0,0 +1 @@ +SHOW tables; diff --git a/ci/SQL/quickstart/hudi/use_default.sql b/ci/SQL/quickstart/hudi/use_default.sql new file mode 100644 index 0000000..f12ce14 --- /dev/null +++ b/ci/SQL/quickstart/hudi/use_default.sql @@ -0,0 +1 @@ +USE default; diff --git a/ci/quickstart_hudi_test.go b/ci/quickstart_hudi_test.go index e6d42dc..a2abe22 100644 --- a/ci/quickstart_hudi_test.go +++ b/ci/quickstart_hudi_test.go @@ -19,12 +19,36 @@ var _ = Describe("QuickstartHudi", func() { AfterAll(func() { }) - It("DDL: Setup quickstart DB", func() { - By("creating a database") - SQL := SQLFromFile("SQL/quickstart/hudi/quickstart_DB.sql") + It("SQL: Work with Hudi", func() { + By("creating an external catalog") + SQL := SQLFromFile("SQL/quickstart/hudi/create_catalog.sql") + _, err := db.Exec(SQL) + Expect(err).ToNot(HaveOccurred()) + + By("creating an external catalog") + SQL := SQLFromFile("SQL/quickstart/hudi/set_catalog.sql") + _, err := db.Exec(SQL) + Expect(err).ToNot(HaveOccurred()) + + By("creating an external catalog") + SQL := SQLFromFile("SQL/quickstart/hudi/show_databases.sql") + _, err := db.Exec(SQL) + Expect(err).ToNot(HaveOccurred()) + + By("creating an external catalog") + SQL := SQLFromFile("SQL/quickstart/hudi/use_default.sql") + _, err := db.Exec(SQL) + Expect(err).ToNot(HaveOccurred()) + + By("creating an external catalog") + SQL := SQLFromFile("SQL/quickstart/hudi/show_tables.sql") _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) - }) + By("creating an external catalog") + SQL := SQLFromFile("SQL/quickstart/hudi/select.sql") + _, err := db.Exec(SQL) + Expect(err).ToNot(HaveOccurred()) + }) }) }) From 13d815e30c038cd501d888cd81963fcdcd30f71c Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 13:13:44 -0500 Subject: [PATCH 11/20] fix colons Signed-off-by: DanRoscigno --- ci/quickstart_hudi_test.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ci/quickstart_hudi_test.go b/ci/quickstart_hudi_test.go index a2abe22..b3b0a59 100644 --- a/ci/quickstart_hudi_test.go +++ b/ci/quickstart_hudi_test.go @@ -26,28 +26,28 @@ var _ = Describe("QuickstartHudi", func() { Expect(err).ToNot(HaveOccurred()) By("creating an external catalog") - SQL := SQLFromFile("SQL/quickstart/hudi/set_catalog.sql") - _, err := db.Exec(SQL) + SQL = SQLFromFile("SQL/quickstart/hudi/set_catalog.sql") + _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) By("creating an external catalog") - SQL := SQLFromFile("SQL/quickstart/hudi/show_databases.sql") - _, err := db.Exec(SQL) + SQL = SQLFromFile("SQL/quickstart/hudi/show_databases.sql") + _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) By("creating an external catalog") - SQL := SQLFromFile("SQL/quickstart/hudi/use_default.sql") - _, err := db.Exec(SQL) + SQL = SQLFromFile("SQL/quickstart/hudi/use_default.sql") + _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) By("creating an external catalog") - SQL := SQLFromFile("SQL/quickstart/hudi/show_tables.sql") - _, err := db.Exec(SQL) + SQL = SQLFromFile("SQL/quickstart/hudi/show_tables.sql") + _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) By("creating an external catalog") - SQL := SQLFromFile("SQL/quickstart/hudi/select.sql") - _, err := db.Exec(SQL) + SQL = SQLFromFile("SQL/quickstart/hudi/select.sql") + _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) }) }) From 8d7acc04aebab4bc687e4b5544056ac869407e35 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 13:25:42 -0500 Subject: [PATCH 12/20] separate steps Signed-off-by: DanRoscigno --- ci/quickstart_hudi_test.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/ci/quickstart_hudi_test.go b/ci/quickstart_hudi_test.go index b3b0a59..5459522 100644 --- a/ci/quickstart_hudi_test.go +++ b/ci/quickstart_hudi_test.go @@ -19,33 +19,43 @@ var _ = Describe("QuickstartHudi", func() { AfterAll(func() { }) - It("SQL: Work with Hudi", func() { + It("DDL: External Catalog", func() { By("creating an external catalog") SQL := SQLFromFile("SQL/quickstart/hudi/create_catalog.sql") _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) + }) + It("DDL: SET catalog", func() { By("creating an external catalog") SQL = SQLFromFile("SQL/quickstart/hudi/set_catalog.sql") _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) + }) + It("DDL: Show databases in catalog", func() { By("creating an external catalog") SQL = SQLFromFile("SQL/quickstart/hudi/show_databases.sql") _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) + }) + It("DDL: Use the default DB in the external catalog", func() { - By("creating an external catalog") + By("USE default") SQL = SQLFromFile("SQL/quickstart/hudi/use_default.sql") _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) + }) + It("SQL: SHOW TABLES", func() { - By("creating an external catalog") + By("SHOW TABLES") SQL = SQLFromFile("SQL/quickstart/hudi/show_tables.sql") _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) + }) + It("SQL: SELECT FROM the Hudi table", func() { - By("creating an external catalog") + By("SELECT") SQL = SQLFromFile("SQL/quickstart/hudi/select.sql") _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) From 3bb0e94afb6d1f700720565c23a9c0542fff818a Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 13:29:42 -0500 Subject: [PATCH 13/20] add colons Signed-off-by: DanRoscigno --- ci/quickstart_hudi_test.go | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/ci/quickstart_hudi_test.go b/ci/quickstart_hudi_test.go index 5459522..7895686 100644 --- a/ci/quickstart_hudi_test.go +++ b/ci/quickstart_hudi_test.go @@ -28,36 +28,36 @@ var _ = Describe("QuickstartHudi", func() { It("DDL: SET catalog", func() { By("creating an external catalog") - SQL = SQLFromFile("SQL/quickstart/hudi/set_catalog.sql") - _, err = db.Exec(SQL) + SQL := SQLFromFile("SQL/quickstart/hudi/set_catalog.sql") + _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) }) It("DDL: Show databases in catalog", func() { By("creating an external catalog") - SQL = SQLFromFile("SQL/quickstart/hudi/show_databases.sql") - _, err = db.Exec(SQL) + SQL := SQLFromFile("SQL/quickstart/hudi/show_databases.sql") + _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) }) It("DDL: Use the default DB in the external catalog", func() { By("USE default") - SQL = SQLFromFile("SQL/quickstart/hudi/use_default.sql") - _, err = db.Exec(SQL) + SQL := SQLFromFile("SQL/quickstart/hudi/use_default.sql") + _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) }) It("SQL: SHOW TABLES", func() { By("SHOW TABLES") - SQL = SQLFromFile("SQL/quickstart/hudi/show_tables.sql") - _, err = db.Exec(SQL) + SQL := SQLFromFile("SQL/quickstart/hudi/show_tables.sql") + _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) }) It("SQL: SELECT FROM the Hudi table", func() { By("SELECT") - SQL = SQLFromFile("SQL/quickstart/hudi/select.sql") - _, err = db.Exec(SQL) + SQL := SQLFromFile("SQL/quickstart/hudi/select.sql") + _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) }) }) From 9b6e95596eac35e231030f3be1e51566593a58bb Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 13:42:35 -0500 Subject: [PATCH 14/20] try with -A Signed-off-by: DanRoscigno --- ci/SQL/quickstart/hudi/use_default.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/SQL/quickstart/hudi/use_default.sql b/ci/SQL/quickstart/hudi/use_default.sql index f12ce14..36f8a56 100644 --- a/ci/SQL/quickstart/hudi/use_default.sql +++ b/ci/SQL/quickstart/hudi/use_default.sql @@ -1 +1 @@ -USE default; +USE default -A; From 817b671bbde85be266a9909cb0d1ef2219e621c1 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 13:49:23 -0500 Subject: [PATCH 15/20] debug Signed-off-by: DanRoscigno --- ci/SQL/quickstart/hudi/use_default.sql | 2 +- ci/quickstart_hudi_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/SQL/quickstart/hudi/use_default.sql b/ci/SQL/quickstart/hudi/use_default.sql index 36f8a56..3bd8e7b 100644 --- a/ci/SQL/quickstart/hudi/use_default.sql +++ b/ci/SQL/quickstart/hudi/use_default.sql @@ -1 +1 @@ -USE default -A; +XXX default -A; diff --git a/ci/quickstart_hudi_test.go b/ci/quickstart_hudi_test.go index 7895686..9ae0a4c 100644 --- a/ci/quickstart_hudi_test.go +++ b/ci/quickstart_hudi_test.go @@ -41,7 +41,7 @@ var _ = Describe("QuickstartHudi", func() { }) It("DDL: Use the default DB in the external catalog", func() { - By("USE default") + By("ABC USE default") SQL := SQLFromFile("SQL/quickstart/hudi/use_default.sql") _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) From 21f655a1738a797bb535708478588beaf19f715c Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 13:54:09 -0500 Subject: [PATCH 16/20] debug Signed-off-by: DanRoscigno --- ci/SQL/quickstart/hudi/use_default.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/SQL/quickstart/hudi/use_default.sql b/ci/SQL/quickstart/hudi/use_default.sql index 3bd8e7b..d7ff381 100644 --- a/ci/SQL/quickstart/hudi/use_default.sql +++ b/ci/SQL/quickstart/hudi/use_default.sql @@ -1 +1 @@ -XXX default -A; +use default -A; From 4a62b056efb5c0bcf7bbb488c717abd290bd59d7 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 14:12:21 -0500 Subject: [PATCH 17/20] create and set in one step Signed-off-by: DanRoscigno --- ci/SQL/quickstart/hudi/set_catalog.sql | 2 +- ci/SQL/quickstart/hudi/show_databases.sql | 1 - ci/SQL/quickstart/hudi/use_default.sql | 1 - ci/quickstart_hudi_test.go | 16 +--------------- 4 files changed, 2 insertions(+), 18 deletions(-) delete mode 100644 ci/SQL/quickstart/hudi/show_databases.sql delete mode 100644 ci/SQL/quickstart/hudi/use_default.sql diff --git a/ci/SQL/quickstart/hudi/set_catalog.sql b/ci/SQL/quickstart/hudi/set_catalog.sql index 1c2063c..77cd40f 100644 --- a/ci/SQL/quickstart/hudi/set_catalog.sql +++ b/ci/SQL/quickstart/hudi/set_catalog.sql @@ -1 +1 @@ -SET CATALOG hudi_catalog_hms; +set catalog hudi_catalog_hms; use default; diff --git a/ci/SQL/quickstart/hudi/show_databases.sql b/ci/SQL/quickstart/hudi/show_databases.sql deleted file mode 100644 index ede8133..0000000 --- a/ci/SQL/quickstart/hudi/show_databases.sql +++ /dev/null @@ -1 +0,0 @@ -SHOW DATABASES; diff --git a/ci/SQL/quickstart/hudi/use_default.sql b/ci/SQL/quickstart/hudi/use_default.sql deleted file mode 100644 index d7ff381..0000000 --- a/ci/SQL/quickstart/hudi/use_default.sql +++ /dev/null @@ -1 +0,0 @@ -use default -A; diff --git a/ci/quickstart_hudi_test.go b/ci/quickstart_hudi_test.go index 9ae0a4c..ff26785 100644 --- a/ci/quickstart_hudi_test.go +++ b/ci/quickstart_hudi_test.go @@ -27,25 +27,11 @@ var _ = Describe("QuickstartHudi", func() { }) It("DDL: SET catalog", func() { - By("creating an external catalog") + By("Setting the catalog") SQL := SQLFromFile("SQL/quickstart/hudi/set_catalog.sql") _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) }) - It("DDL: Show databases in catalog", func() { - - By("creating an external catalog") - SQL := SQLFromFile("SQL/quickstart/hudi/show_databases.sql") - _, err := db.Exec(SQL) - Expect(err).ToNot(HaveOccurred()) - }) - It("DDL: Use the default DB in the external catalog", func() { - - By("ABC USE default") - SQL := SQLFromFile("SQL/quickstart/hudi/use_default.sql") - _, err := db.Exec(SQL) - Expect(err).ToNot(HaveOccurred()) - }) It("SQL: SHOW TABLES", func() { By("SHOW TABLES") From ee0cadd694436628fde3d542170e5971bf73dc5d Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 15:26:06 -0500 Subject: [PATCH 18/20] working with database named hudi_sample Signed-off-by: DanRoscigno --- ci/SHELL/quickstart/hudi/spark_shell.scala | 5 +++-- ci/SQL/quickstart/hudi/set_catalog.sql | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ci/SHELL/quickstart/hudi/spark_shell.scala b/ci/SHELL/quickstart/hudi/spark_shell.scala index 4541a90..0e13b8a 100644 --- a/ci/SHELL/quickstart/hudi/spark_shell.scala +++ b/ci/SHELL/quickstart/hudi/spark_shell.scala @@ -20,6 +20,7 @@ val rowData= Seq(Row("Java", "20000", "a"), val df = spark.createDataFrame(rowData,schema) +val databaseName = "hudi_sample" val tableName = "hudi_coders_hive" val basePath = "s3a://huditest/hudi_coders" @@ -31,8 +32,8 @@ df.write.format("hudi"). option("hoodie.datasource.write.hive_style_partitioning", "true"). option("hoodie.datasource.hive_sync.enable", "true"). option("hoodie.datasource.hive_sync.mode", "hms"). - option("hoodie.datasource.hive_sync.database", "default"). - option("hoodie.datasource.hive_sync.table", "hudi_coders_hive"). + option("hoodie.datasource.hive_sync.database", databaseName). + option("hoodie.datasource.hive_sync.table", tableName). option("hoodie.datasource.hive_sync.partition_fields", "language"). option("hoodie.datasource.hive_sync.partition_extractor_class", "org.apache.hudi.hive.MultiPartKeysValueExtractor"). option("hoodie.datasource.hive_sync.metastore.uris", "thrift://hive-metastore:9083"). diff --git a/ci/SQL/quickstart/hudi/set_catalog.sql b/ci/SQL/quickstart/hudi/set_catalog.sql index 77cd40f..c978eed 100644 --- a/ci/SQL/quickstart/hudi/set_catalog.sql +++ b/ci/SQL/quickstart/hudi/set_catalog.sql @@ -1 +1 @@ -set catalog hudi_catalog_hms; use default; +set catalog hudi_catalog_hms; use hudi_sample; From 1bebdcea62061d5c6f3cb73708fadcc2a86ee37e Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 12 Feb 2024 15:37:34 -0500 Subject: [PATCH 19/20] group commands in test Signed-off-by: DanRoscigno --- ci/SQL/quickstart/hudi/set_catalog.sql | 2 +- ci/SQL/quickstart/hudi/use_database.sql | 1 + ci/quickstart_hudi_test.go | 17 +++++++++-------- 3 files changed, 11 insertions(+), 9 deletions(-) create mode 100644 ci/SQL/quickstart/hudi/use_database.sql diff --git a/ci/SQL/quickstart/hudi/set_catalog.sql b/ci/SQL/quickstart/hudi/set_catalog.sql index c978eed..1c2063c 100644 --- a/ci/SQL/quickstart/hudi/set_catalog.sql +++ b/ci/SQL/quickstart/hudi/set_catalog.sql @@ -1 +1 @@ -set catalog hudi_catalog_hms; use hudi_sample; +SET CATALOG hudi_catalog_hms; diff --git a/ci/SQL/quickstart/hudi/use_database.sql b/ci/SQL/quickstart/hudi/use_database.sql new file mode 100644 index 0000000..167ab8c --- /dev/null +++ b/ci/SQL/quickstart/hudi/use_database.sql @@ -0,0 +1 @@ +USE hudi_sample; diff --git a/ci/quickstart_hudi_test.go b/ci/quickstart_hudi_test.go index ff26785..8db07e6 100644 --- a/ci/quickstart_hudi_test.go +++ b/ci/quickstart_hudi_test.go @@ -24,19 +24,20 @@ var _ = Describe("QuickstartHudi", func() { SQL := SQLFromFile("SQL/quickstart/hudi/create_catalog.sql") _, err := db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) - }) - It("DDL: SET catalog", func() { By("Setting the catalog") - SQL := SQLFromFile("SQL/quickstart/hudi/set_catalog.sql") - _, err := db.Exec(SQL) + SQL = SQLFromFile("SQL/quickstart/hudi/set_catalog.sql") + _, err = db.Exec(SQL) + Expect(err).ToNot(HaveOccurred()) + + By("USEing the database") + SQL = SQLFromFile("SQL/quickstart/hudi/use_database.sql") + _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) - }) - It("SQL: SHOW TABLES", func() { By("SHOW TABLES") - SQL := SQLFromFile("SQL/quickstart/hudi/show_tables.sql") - _, err := db.Exec(SQL) + SQL = SQLFromFile("SQL/quickstart/hudi/show_tables.sql") + _, err = db.Exec(SQL) Expect(err).ToNot(HaveOccurred()) }) It("SQL: SELECT FROM the Hudi table", func() { From fbdc981afa91897bdf796feb3fdc5bc73bbbb121 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 13 Feb 2024 14:05:38 -0500 Subject: [PATCH 20/20] add env var Signed-off-by: DanRoscigno --- .github/workflows/test_with_hudi.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test_with_hudi.yml b/.github/workflows/test_with_hudi.yml index 76e7dc0..3988ef6 100644 --- a/.github/workflows/test_with_hudi.yml +++ b/.github/workflows/test_with_hudi.yml @@ -75,5 +75,7 @@ jobs: # file. - name: Test; Hudi SQL test if: always() + env: + SR_FE_HOST: 'localhost' run: ginkgo -v --focus-file=./quickstart_hudi_test.go working-directory: testing/ci