diff --git a/README.md b/README.md
index db3790e64..6732db3af 100644
--- a/README.md
+++ b/README.md
@@ -90,6 +90,8 @@ bin/spark-shell --packages "org.opensearch:opensearch-spark-ppl_2.12:0.7.0-SNAPS
 ### PPL Run queries on a local spark cluster
 See ppl usage sample on local spark cluster [PPL on local spark ](docs/ppl-lang/local-spark-ppl-test-instruction.md)
 
+### Running integration tests on a local spark cluster
+See integration test documentation [Docker Integration Tests](integ-test/script/README.md)
 
 ## Code of Conduct
 
diff --git a/docker/integ-test/.env b/docker/integ-test/.env
new file mode 100644
index 000000000..cf73bdc89
--- /dev/null
+++ b/docker/integ-test/.env
@@ -0,0 +1,13 @@
+SPARK_VERSION=3.5.3
+OPENSEARCH_VERSION=latest
+DASHBOARDS_VERSION=latest
+MASTER_UI_PORT=8080
+MASTER_PORT=7077
+UI_PORT=4040
+SPARK_CONNECT_PORT=15002
+PPL_JAR=../../ppl-spark-integration/target/scala-2.12/ppl-spark-integration-assembly-0.7.0-SNAPSHOT.jar
+FLINT_JAR=../../flint-spark-integration/target/scala-2.12/flint-spark-integration-assembly-0.7.0-SNAPSHOT.jar
+OPENSEARCH_NODE_MEMORY=512m
+OPENSEARCH_ADMIN_PASSWORD=C0rrecthorsebatterystaple.
+OPENSEARCH_PORT=9200
+OPENSEARCH_DASHBOARDS_PORT=5601
diff --git a/docker/integ-test/docker-compose.yml b/docker/integ-test/docker-compose.yml
new file mode 100644
index 000000000..c5ee53d7d
--- /dev/null
+++ b/docker/integ-test/docker-compose.yml
@@ -0,0 +1,143 @@
+services:
+  spark:
+    image: bitnami/spark:${SPARK_VERSION:-3.5.3}
+    container_name: spark
+    ports:
+      - "${MASTER_UI_PORT:-8080}:8080"
+      - "${MASTER_PORT:-7077}:7077"
+      - "${UI_PORT:-4040}:4040"
+      - "${SPARK_CONNECT_PORT}:15002"
+    entrypoint: /opt/bitnami/scripts/spark/master-entrypoint.sh
+    environment:
+      - SPARK_MODE=master
+      - SPARK_RPC_AUTHENTICATION_ENABLED=no
+      - SPARK_RPC_ENCRYPTION_ENABLED=no
+      - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
+      - SPARK_SSL_ENABLED=no
+      - SPARK_PUBLIC_DNS=localhost
+    volumes:
+      - type: bind
+        source: ./spark-master-entrypoint.sh
+        target: /opt/bitnami/scripts/spark/master-entrypoint.sh
+      - type: bind
+        source: ./spark-defaults.conf
+        target: /opt/bitnami/spark/conf/spark-defaults.conf
+      - type: bind
+        source: ./log4j2.properties
+        target: /opt/bitnami/spark/conf/log4j2.properties
+      - type: bind
+        source: $PPL_JAR
+        target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
+      - type: bind
+        source: $FLINT_JAR
+        target: /opt/bitnami/spark/jars/flint-spark-integration.jar
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8080/"]
+      interval: 1m
+      timeout: 5s
+      retries: 3
+      start_period: 30s
+      start_interval: 5s
+    networks:
+      - opensearch-net
+
+  spark-worker:
+    image: bitnami/spark:${SPARK_VERSION:-3.5.3}
+    container_name: spark-worker
+    environment:
+      - SPARK_MODE=worker
+      - SPARK_MASTER_URL=spark://spark:7077
+      - SPARK_WORKER_MEMORY=${WORKER_MEMORY:-1G}
+      - SPARK_WORKER_CORES=${WORKER_CORES:-1}
+      - SPARK_RPC_AUTHENTICATION_ENABLED=no
+      - SPARK_RPC_ENCRYPTION_ENABLED=no
+      - SPARK_LOCAL_STORAGE_ENCRYPTION_ENABLED=no
+      - SPARK_SSL_ENABLED=no
+      - SPARK_PUBLIC_DNS=localhost
+    volumes:
+      - type: bind
+        source: ./spark-defaults.conf
+        target: /opt/bitnami/spark/conf/spark-defaults.conf
+      - type: bind
+        source: ./log4j2.properties
+        target: /opt/bitnami/spark/conf/log4j2.properties
+      - type: bind
+        source: $PPL_JAR
+        target: /opt/bitnami/spark/jars/ppl-spark-integration.jar
+      - type: bind
+        source: $FLINT_JAR
+        target: /opt/bitnami/spark/jars/flint-spark-integration.jar
+    networks:
+      - opensearch-net
+    depends_on:
+      - spark
+
+  opensearch:
+    image: opensearchproject/opensearch:${OPENSEARCH_VERSION:-latest}
+    container_name: opensearch
+    environment:
+      - cluster.name=opensearch-cluster
+      - node.name=opensearch
+      - discovery.seed_hosts=opensearch
+      - cluster.initial_cluster_manager_nodes=opensearch
+      - bootstrap.memory_lock=true
+      - plugins.security.ssl.http.enabled=false
+      - OPENSEARCH_JAVA_OPTS=-Xms${OPENSEARCH_NODE_MEMORY:-512m} -Xmx${OPENSEARCH_NODE_MEMORY:-512m}
+      - OPENSEARCH_INITIAL_ADMIN_PASSWORD=${OPENSEARCH_ADMIN_PASSWORD}
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+      nofile:
+        soft: 65536
+        hard: 65536
+    volumes:
+      - opensearch-data:/usr/share/opensearch/data
+    ports:
+      - ${OPENSEARCH_PORT:-9200}:9200
+      - 9600:9600
+    expose:
+      - "${OPENSEARCH_PORT:-9200}"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "-u", "admin:${OPENSEARCH_ADMIN_PASSWORD}", "http://localhost:9200/_cluster/health"]
+      interval: 1m
+      timeout: 5s
+      retries: 3
+      start_period: 30s
+      start_interval: 5s
+    networks:
+      - opensearch-net
+
+  opensearch-dashboards:
+    image: opensearchproject/opensearch-dashboards:${DASHBOARDS_VERSION}
+    container_name: opensearch-dashboards
+    ports:
+      - ${OPENSEARCH_DASHBOARDS_PORT:-5601}:5601
+    expose:
+      - "${OPENSEARCH_DASHBOARDS_PORT:-5601}"
+    environment:
+      OPENSEARCH_HOSTS: '["http://opensearch:9200"]'
+    networks:
+      - opensearch-net
+    depends_on:
+      - opensearch
+
+  minio:
+    image: minio/minio
+    container_name: minio-S3
+    # See original entrypoint/command under https://github.com/minio/minio/blob/master/Dockerfile
+    entrypoint: sh -c 'mkdir -p /data/test && minio server /data --console-address ":9001"'
+    ports:
+      - "9000:9000"
+      - "9001:9001"
+    volumes:
+      - minio-data:/data
+    networks:
+      - opensearch-net
+
+volumes:
+  opensearch-data:
+  minio-data:
+
+networks:
+  opensearch-net:
diff --git a/docker/integ-test/log4j2.properties b/docker/integ-test/log4j2.properties
new file mode 100644
index 000000000..ab96e03ba
--- /dev/null
+++ b/docker/integ-test/log4j2.properties
@@ -0,0 +1,69 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the console
+rootLogger.level = info
+rootLogger.appenderRef.stdout.ref = console
+
+# In the pattern layout configuration below, we specify an explicit `%ex` conversion
+# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
+# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
+# class packaging information. That extra information can sometimes add a substantial
+# performance overhead, so we disable it in our default logging config.
+# For more information, see SPARK-39361.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
+
+# Set the default spark-shell/spark-sql log level to WARN. When running the
+# spark-shell/spark-sql, the log level for these classes is used to overwrite
+# the root logger's log level, so that the user can have different defaults
+# for the shell and regular Spark apps.
+logger.repl.name = org.apache.spark.repl.Main
+logger.repl.level = warn
+
+logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
+logger.thriftserver.level = warn
+
+# Settings to quiet third party logs that are too verbose
+logger.jetty1.name = org.sparkproject.jetty
+logger.jetty1.level = warn
+logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
+logger.jetty2.level = error
+logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
+logger.replexprTyper.level = info
+logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
+logger.replSparkILoopInterpreter.level = info
+logger.parquet1.name = org.apache.parquet
+logger.parquet1.level = error
+logger.parquet2.name = parquet
+logger.parquet2.level = error
+
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
+logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
+logger.RetryingHMSHandler.level = fatal
+logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
+logger.FunctionRegistry.level = error
+
+# For deploying Spark ThriftServer
+# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
+appender.console.filter.1.type = RegexFilter
+appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
+appender.console.filter.1.onMatch = deny
+appender.console.filter.1.onMismatch = neutral
diff --git a/docker/integ-test/prepare_scala_queries.py b/docker/integ-test/prepare_scala_queries.py
new file mode 100755
index 000000000..dec62593b
--- /dev/null
+++ b/docker/integ-test/prepare_scala_queries.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+
+import csv
+
+queries = None
+with open('../../integ-test/script/test_cases.csv', 'r') as f:
+	reader = csv.DictReader(f)
+	queries = [(row['query'], i, row.get('expected_status', None)) for i, row in enumerate(reader, start=1) if row['query'].strip()]
+
+print('try {')
+for query in queries:
+	query_str = query[0].replace('\n', '').replace('"', '\\"')
+	if 'FAILED' == query[2]:
+		print('    try {')
+		print(f'        spark.sql("{query_str}")')
+		print('        throw new Error')
+		print('    } catch {')
+		print('        case e: Exception => null')
+		print('    }\n')
+	else:
+		print(f'    spark.sql("{query_str}")\n')
+print('}')
+
diff --git a/docker/integ-test/queries.scala b/docker/integ-test/queries.scala
new file mode 100644
index 000000000..7d6ee78c1
--- /dev/null
+++ b/docker/integ-test/queries.scala
@@ -0,0 +1,619 @@
+{
+    try {
+        spark.sql("describe myglue_test.default.http_logs")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("describe `myglue_test`.`default`.`http_logs`")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | dedup 1 status | fields @timestamp, clientip, status, size | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | dedup status, size | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | dedup 1 status keepempty=true | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | dedup status, size keepempty=true | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | dedup 2 status | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | dedup 2 status, size | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | dedup 2 status, size keepempty=true | head 10")
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | dedup status CONSECUTIVE=true | fields status")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | dedup 2 status, size  CONSECUTIVE=true | fields status")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | sort stat | fields @timestamp, clientip, status | head 10")
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | fields @timestamp, notexisted | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("source = myglue_test.default.nested | fields int_col, struct_col.field1, struct_col2.field1 | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("source = myglue_test.default.nested | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | fields - @timestamp, clientip, status | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval new_time = @timestamp, new_clientip = clientip | fields - new_time, new_clientip, status | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval new_clientip = lower(clientip) | fields - new_clientip | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | fields + @timestamp, clientip, status | fields - clientip, status | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | fields - clientip, status  | fields + @timestamp, clientip, status| head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where status = 200 | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where status != 200 | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where size > 0 | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where size <= 0 | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where clientip = '236.14.2.0' | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100")
+
+    spark.sql("source = myglue_test.default.http_logs | where size <= 0 AND like(request, 'GET%')  | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs status = 200 | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100")
+
+    spark.sql("source = myglue_test.default.http_logs size <= 0 AND like(request, 'GET%') | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs substring(clientip, 5, 2) = \"12\" | head 10")
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | where isempty(size)")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | where ispresent(size)")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | where isnull(size) | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where isnotnull(size) | head 10")
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | where isnotnull(coalesce(size, status)) | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | where like(request, 'GET%') | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where like(request, '%bordeaux%') | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where substring(clientip, 5, 2) = \"12\" | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where lower(request) = \"get /images/backnews.gif http/1.0\" | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | where length(request) = 38 | head 10")
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | where case(status = 200, 'success' else 'failed') = 'success' | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | eval h = \"Hello\",  w = \"World\" | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval @h = \"Hello\" | eval @w = \"World\" | fields @timestamp, @h, @w")
+
+    spark.sql("source = myglue_test.default.http_logs | eval newF = clientip | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval newF = clientip | fields clientip, newF | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval f = size | where f > 1 | sort f | fields size, clientip, status | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval f = status * 2 | eval h = f * 2 | fields status, f, h | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval f = size * 2, h = status | stats sum(f) by h")
+
+    spark.sql("source = myglue_test.default.http_logs | eval f = UPPER(request) | eval h = 40 | fields f, h | head 10")
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | eval request = \"test\" | fields request | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | eval size = abs(size) | where size < 500")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | eval status_string = case(status = 200, 'success' else 'failed') | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | eval n = now() | eval t = unix_timestamp(@timestamp) | fields n, t | head 10")
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | eval e = isempty(size) | eval p = ispresent(size) | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | eval c = coalesce(size, status) | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | eval c = coalesce(request) | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | eval col1 = ln(size) | eval col2 = unix_timestamp(@timestamp) | sort - col1 | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval col1 = 1 | sort col1 | head 4 | eval col2 = 2 | sort - col2 | sort - size | head 2 | fields @timestamp, clientip, col2")
+
+    spark.sql("source = myglue_test.default.mini_http_logs | eval stat = status | where stat > 300 | sort stat | fields @timestamp,clientip,status | head 5")
+
+    spark.sql("source = myglue_test.default.http_logs |  eval col1 = size, col2 = clientip | stats avg(col1) by col2")
+
+    spark.sql("source = myglue_test.default.http_logs | stats avg(size) by clientip")
+
+    spark.sql("source = myglue_test.default.http_logs | eval new_request = upper(request) | eval compound_field = concat('Hello ', if(like(new_request, '%bordeaux%'), 'World', clientip)) | fields new_request, compound_field | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | stats avg(size)")
+
+    spark.sql("source = myglue_test.default.nested | stats max(int_col) by struct_col.field2")
+
+    spark.sql("source = myglue_test.default.nested | stats distinct_count(int_col)")
+
+    spark.sql("source = myglue_test.default.nested | stats stddev_samp(int_col)")
+
+    spark.sql("source = myglue_test.default.nested | stats stddev_pop(int_col)")
+
+    spark.sql("source = myglue_test.default.nested | stats percentile(int_col)")
+
+    spark.sql("source = myglue_test.default.nested | stats percentile_approx(int_col)")
+
+    spark.sql("source = myglue_test.default.mini_http_logs | stats stddev_samp(status)")
+
+    spark.sql("source = myglue_test.default.mini_http_logs | where stats > 200 | stats percentile_approx(status, 99)")
+
+    spark.sql("source = myglue_test.default.nested | stats count(int_col) by span(struct_col.field2, 10) as a_span")
+
+    spark.sql("source = myglue_test.default.nested | stats avg(int_col) by span(struct_col.field2, 10) as a_span, struct_col2.field2")
+
+    spark.sql("source = myglue_test.default.http_logs | stats sum(size) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | stats distinct_count(clientip) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | stats avg(size) as avg_size by status, year | stats avg(avg_size) as avg_avg_size by year")
+
+    spark.sql("source = myglue_test.default.http_logs | stats avg(size) as avg_size by status, year, month | stats avg(avg_size) as avg_avg_size by year, month | stats avg(avg_avg_size) as avg_avg_avg_size by year")
+
+    try {
+        spark.sql("source = myglue_test.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | stats avg(avg_int) as avg_avg_int by struct_col2.field2")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | eval new_col = avg_int | stats avg(avg_int) as avg_avg_int by new_col")
+
+    spark.sql("source = myglue_test.default.nested | rare int_col")
+
+    spark.sql("source = myglue_test.default.nested | rare int_col by struct_col.field2")
+
+    spark.sql("source = myglue_test.default.http_logs | rare request")
+
+    spark.sql("source = myglue_test.default.http_logs | where status > 300 | rare request by status")
+
+    spark.sql("source = myglue_test.default.http_logs | rare clientip")
+
+    spark.sql("source = myglue_test.default.http_logs | where status > 300 | rare clientip")
+
+    spark.sql("source = myglue_test.default.http_logs | where status > 300 | rare clientip by day")
+
+    spark.sql("source = myglue_test.default.nested | top int_col by struct_col.field2")
+
+    spark.sql("source = myglue_test.default.nested | top 1 int_col by struct_col.field2")
+
+    spark.sql("source = myglue_test.default.nested | top 2 int_col by struct_col.field2")
+
+    spark.sql("source = myglue_test.default.nested | top int_col")
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | inner join left=l right=r on l.status = r.int_col myglue_test.default.nested | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | fields request, domain | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | top 1 domain")
+
+    spark.sql("source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | stats count() by domain")
+
+    spark.sql("source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | eval a = 1 | fields a, domain | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | where size > 0 | sort - size | fields size, domain | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/(?<picName>[a-zA-Z]+)/.*' | where domain = 'english' | sort - picName | fields domain, picName | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | patterns request | fields patterns_field | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | patterns request | where size > 0 | fields patterns_field | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | fields request, no_letter | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | stats count() by no_letter")
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | patterns new_field='status' pattern='[a-zA-Z]' request | fields request, status | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    try {
+        spark.sql("source = myglue_test.default.http_logs | rename @timestamp as timestamp | head 10")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.default.http_logs | sort size | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | sort + size | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | sort - size | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | sort + size, + @timestamp | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | sort - size, - @timestamp | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | sort - size, @timestamp | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval c1 = upper(request) | eval c2 = concat('Hello ', if(like(c1, '%bordeaux%'), 'World', clientip)) | eval c3 = length(request) | eval c4 = ltrim(request) | eval c5 = rtrim(request) | eval c6 = substring(clientip, 5, 2) | eval c7 = trim(request) | eval c8 = upper(request) | eval c9 = position('bordeaux' IN request) | eval c10 = replace(request, 'GET', 'GGG') | fields c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval c1 = unix_timestamp(@timestamp) | eval c2 = now() | eval c3 =DAY_OF_WEEK(@timestamp) | eval c4 =DAY_OF_MONTH(@timestamp) | eval c5 =DAY_OF_YEAR(@timestamp) | eval c6 =WEEK_OF_YEAR(@timestamp) | eval c7 =WEEK(@timestamp) | eval c8 =MONTH_OF_YEAR(@timestamp) | eval c9 =HOUR_OF_DAY(@timestamp) | eval c10 =MINUTE_OF_HOUR(@timestamp) | eval c11 =SECOND_OF_MINUTE(@timestamp) | eval c12 =LOCALTIME() | fields c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12 | head 10")
+
+    spark.sql("source=myglue_test.default.people  | eval c1 = adddate(@timestamp, 1) | fields c1 | head 10")
+
+    spark.sql("source=myglue_test.default.people  | eval c2 = subdate(@timestamp, 1) | fields c2 | head 10")
+
+    spark.sql("source=myglue_test.default.people  | eval c1 = date_add(@timestamp INTERVAL 1 DAY) | fields c1 | head 10")
+
+    spark.sql("source=myglue_test.default.people  | eval c1 = date_sub(@timestamp INTERVAL 1 DAY) | fields c1 | head 10")
+
+    spark.sql("source=myglue_test.default.people | eval `CURDATE()` = CURDATE() | fields `CURDATE()`")
+
+    spark.sql("source=myglue_test.default.people | eval `CURRENT_DATE()` = CURRENT_DATE() | fields `CURRENT_DATE()`")
+
+    spark.sql("source=myglue_test.default.people | eval `CURRENT_TIMESTAMP()` = CURRENT_TIMESTAMP() | fields `CURRENT_TIMESTAMP()`")
+
+    spark.sql("source=myglue_test.default.people | eval `DATE('2020-08-26')` = DATE('2020-08-26') | fields `DATE('2020-08-26')`")
+
+    spark.sql("source=myglue_test.default.people  | eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) | fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))`")
+
+    spark.sql("source=myglue_test.default.people  | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')`")
+
+    spark.sql("source=myglue_test.default.people  | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')` = DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')`")
+
+    spark.sql("source=myglue_test.default.people  | eval `'2000-01-02' - '2000-01-01'` = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')), `'2001-02-01' - '2004-01-01'` = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')) | fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`")
+
+    spark.sql("source=myglue_test.default.people  | eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) | fields `DAY(DATE('2020-08-26'))`")
+
+    try {
+        spark.sql("source=myglue_test.default.people  | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))`")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source=myglue_test.default.people  | eval `CURRENT_TIMEZONE()` = CURRENT_TIMEZONE() | fields `CURRENT_TIMEZONE()`")
+
+    spark.sql("source=myglue_test.default.people  | eval `UTC_TIMESTAMP()` = UTC_TIMESTAMP() | fields `UTC_TIMESTAMP()`")
+
+    spark.sql("source=myglue_test.default.people  | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))` = TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))`")
+
+    spark.sql("source=myglue_test.default.people  | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')`")
+
+    spark.sql(" source = myglue_test.default.http_logs | stats count()")
+
+    spark.sql("source = myglue_test.default.http_logs | stats avg(size) as c1, max(size) as c2, min(size) as c3, sum(size) as c4, percentile(size, 50) as c5, stddev_pop(size) as c6, stddev_samp(size) as c7, distinct_count(size) as c8")
+
+    spark.sql("source = myglue_test.default.http_logs | eval c1 = abs(size) | eval c2 = ceil(size) | eval c3 = floor(size) | eval c4 = sqrt(size) | eval c5 = ln(size) | eval c6 = pow(size, 2) | eval c7 = mod(size, 2) | fields c1, c2, c3, c4, c5, c6, c7 | head 10")
+
+    spark.sql("source = myglue_test.default.http_logs | eval c1 = isnull(request) | eval c2 = isnotnull(request) | eval c3 = ifnull(request,\"Unknown\") | eval c4 = nullif(request,\"Unknown\") | eval c5 = isnull(size) | eval c6 = if(like(request, '%bordeaux%'), 'hello', 'world') | fields c1, c2, c3, c4, c5, c6 | head 10")
+
+    spark.sql("/* this is block comment */ source = myglue_test.tpch_csv.orders | head 1 // this is line comment")
+
+    spark.sql("/* test in tpch q16, q18, q20 */ source = myglue_test.tpch_csv.orders | head 1 // add source=xx to avoid failure in automation")
+
+    spark.sql("/* test in tpch q4, q21, q22 */ source = myglue_test.tpch_csv.orders | head 1")
+
+    spark.sql("/* test in tpch q2, q11, q15, q17, q20, q22 */ source = myglue_test.tpch_csv.orders | head 1")
+
+    spark.sql("/* test in tpch q7, q8, q9, q13, q15, q22 */ source = myglue_test.tpch_csv.orders | head 1")
+
+    spark.sql("/* lots of inner join tests in tpch */ source = myglue_test.tpch_csv.orders | head 1")
+
+    spark.sql("/* left join test in tpch q13 */ source = myglue_test.tpch_csv.orders | head 1")
+
+    spark.sql("source = myglue_test.tpch_csv.orders | right outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%')  myglue_test.tpch_csv.customer| stats count(o_orderkey) as c_count by c_custkey| sort - c_count")
+
+    spark.sql("source = myglue_test.tpch_csv.orders | full outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%')  myglue_test.tpch_csv.customer| stats count(o_orderkey) as c_count by c_custkey| sort - c_count")
+
+    spark.sql("source = myglue_test.tpch_csv.customer| semi join ON c_custkey = o_custkey myglue_test.tpch_csv.orders| where c_mktsegment = 'BUILDING' | sort - c_custkey| head 10")
+
+    spark.sql("source = myglue_test.tpch_csv.customer| anti join ON c_custkey = o_custkey myglue_test.tpch_csv.orders| where c_mktsegment = 'BUILDING' | sort - c_custkey| head 10")
+
+    spark.sql("source = myglue_test.tpch_csv.supplier| where like(s_comment, '%Customer%Complaints%')| join ON s_nationkey > n_nationkey [ source = myglue_test.tpch_csv.nation | where n_name = 'SAUDI ARABIA' ]| sort - s_name| head 10")
+
+    spark.sql("source = myglue_test.tpch_csv.supplier| where like(s_comment, '%Customer%Complaints%')| join [ source = myglue_test.tpch_csv.nation | where n_name = 'SAUDI ARABIA' ]| sort - s_name| head 10")
+
+    spark.sql("source=myglue_test.default.people | LOOKUP myglue_test.default.work_info uid AS id REPLACE department | stats distinct_count(department)")
+
+    spark.sql("source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS id APPEND department | stats distinct_count(department)")
+
+    spark.sql("source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS id REPLACE department AS country | stats distinct_count(country)")
+
+    spark.sql("source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS id APPEND department AS country | stats distinct_count(country)")
+
+    spark.sql("source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uID AS id, name REPLACE department | stats distinct_count(department)")
+
+    spark.sql("source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS ID, name APPEND department | stats distinct_count(department)")
+
+    spark.sql("source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uID AS id, name | head 10")
+
+    spark.sql("source = myglue_test.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP myglue_test.default.work_info name REPLACE occupation AS major | stats distinct_count(major)")
+
+    spark.sql("source = myglue_test.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP myglue_test.default.work_info name APPEND occupation AS major | stats distinct_count(major)")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json('{\"account_number\":1,\"balance\":39225,\"age\":32,\"gender\":\"M\"}') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json('{\"f1\":\"abc\",\"f2\":{\"f3\":\"a\",\"f4\":\"b\"}}') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json('[1,2,3,{\"f1\":1,\"f2\":[5,6]},4]') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json('[]') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json(‘{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json('{\"invalid\": \"json\"') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json('[1,2,3]') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json(‘[1,2') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json('[invalid json]') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json('invalid json') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json(null) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_array('this', 'is', 'a', 'string', 'array') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_array() | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_array(1, 2, 0, -1, 1.1, -0.11) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_array('this', 'is', 1.1, -0.11, true, false) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = to_json_string(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = array_length(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = array_length(json_array()) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_array_length('[]') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_array_length('[1,2,3,{\"f1\":1,\"f2\":[5,6]},4]') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_array_length('{\"key\": 1}') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_array_length('[1,2') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', 'string_value')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', 123.45)) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', true)) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = to_json_string(json_object(\"a\", 1, \"b\", 2, \"c\", 3)) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', array())) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', array(1, 2, 3))) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = to_json_string(json_object('outer', json_object('inner', 123.45))) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = to_json_string(json_object(\"array\", json_array(1,2,0,-1,1.1,-0.11))) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | where json_valid(('{\"account_number\":1,\"balance\":39225,\"age\":32,\"gender\":\"M\"}') | head 1")
+
+    spark.sql("source = myglue_test.default.http_logs | where not json_valid(('{\"account_number\":1,\"balance\":39225,\"age\":32,\"gender\":\"M\"}') | head 1")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json('{\"account_number\":1,\"balance\":39225,\"age\":32,\"gender\":\"M\"}')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json('{\"f1\":\"abc\",\"f2\":{\"f3\":\"a\",\"f4\":\"b\"}}')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json('[1,2,3,{\"f1\":1,\"f2\":[5,6]},4]')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json('[]')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json(‘{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json('{\"invalid\": \"json\"')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json('[1,2,3]')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json('[1,2')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json('[invalid json]')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json('invalid json')) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_keys(json(null)) | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_extract('{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}', '$') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_extract('{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}', '$.teacher') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_extract('{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}', '$.student') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_extract('{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}', '$.student[*]') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_extract('{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}', '$.student[0]') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_extract('{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}', '$.student[*].name') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_extract('{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}', '$.student[1].name') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_extract('{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}', '$.student[0].not_exist_key') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.http_logs | eval res = json_extract('{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}', '$.student[10]') | head 1 | fields res")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > 0) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > -10) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(json_object(\"a\",1,\"b\",-1),json_object(\"a\",-1,\"b\",-1)), result = forall(array, x -> x.a > 0) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(json_object(\"a\",1,\"b\",-1),json_object(\"a\",-1,\"b\",-1)), result = exists(array, x -> x.b < 0) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 0) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 10) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 0) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 10) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,3), result = transform(array, x -> x + 1) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,3), result = transform(array, (x, y) -> x + y) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x) | head 1 | fields result")
+
+    spark.sql("source = myglue_test.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | head 1 | fields result")
+
+    spark.sql("source=myglue_test.default.people | eval age = salary | eventstats avg(age) | sort id | head 10")
+
+    spark.sql("source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count | sort id | head 10")
+
+    spark.sql("source=myglue_test.default.people | eventstats avg(salary) by country | sort id | head 10")
+
+    spark.sql("source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by country | sort id | head 10")
+
+    spark.sql("source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as countby span(age, 10) | sort id | head 10")
+
+    spark.sql("source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by span(age, 10) as age_span, country | sort id | head 10")
+
+    spark.sql("source=myglue_test.default.people | where country != 'USA' | eventstats stddev_samp(salary), stddev_pop(salary), percentile_approx(salary, 60) by span(salary, 1000) as salary_span | sort id | head 10")
+
+    spark.sql("source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age by occupation, country | eventstats avg(avg_age) as avg_state_age by country | sort id | head 10")
+
+    try {
+        spark.sql("source=myglue_test.default.people | eventstats distinct_count(salary) by span(salary, 1000) as age_span")
+        throw new Error
+    } catch {
+        case e: Exception => null
+    }
+
+    spark.sql("source = myglue_test.tpch_csv.lineitem| where l_shipdate <= subdate(date('1998-12-01'), 90)| stats sum(l_quantity) as sum_qty,    sum(l_extendedprice) as sum_base_price,    sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,    sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,    avg(l_quantity) as avg_qty,    avg(l_extendedprice) as avg_price,    avg(l_discount) as avg_disc,    count() as count_order   by l_returnflag, l_linestatus| sort l_returnflag, l_linestatus")
+
+    spark.sql("source = myglue_test.tpch_csv.part| join ON p_partkey = ps_partkey myglue_test.tpch_csv.partsupp| join ON s_suppkey = ps_suppkey myglue_test.tpch_csv.supplier| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation| join ON n_regionkey = r_regionkey myglue_test.tpch_csv.region| where p_size = 15 AND like(p_type, '%BRASS') AND r_name = 'EUROPE' AND ps_supplycost = [    source = myglue_test.tpch_csv.partsupp    | join ON s_suppkey = ps_suppkey myglue_test.tpch_csv.supplier    | join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation    | join ON n_regionkey = r_regionkey myglue_test.tpch_csv.region    | where r_name = 'EUROPE'    | stats MIN(ps_supplycost)  ]| sort - s_acctbal, n_name, s_name, p_partkey| head 100")
+
+    spark.sql("source = myglue_test.tpch_csv.customer| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders| join ON l_orderkey = o_orderkey myglue_test.tpch_csv.lineitem| where c_mktsegment = 'BUILDING' AND o_orderdate < date('1995-03-15') AND l_shipdate > date('1995-03-15')| stats sum(l_extendedprice * (1 - l_discount)) as revenue by l_orderkey, o_orderdate, o_shippriority | sort - revenue, o_orderdate| head 10")
+
+    spark.sql("source = myglue_test.tpch_csv.orders| where o_orderdate >= date('1993-07-01')  and o_orderdate < date_add(date('1993-07-01'), interval 3 month)  and exists [    source = myglue_test.tpch_csv.lineitem    | where l_orderkey = o_orderkey and l_commitdate < l_receiptdate  ]| stats count() as order_count by o_orderpriority| sort o_orderpriority")
+
+    spark.sql("source = myglue_test.tpch_csv.customer| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders| join ON l_orderkey = o_orderkey myglue_test.tpch_csv.lineitem| join ON l_suppkey = s_suppkey AND c_nationkey = s_nationkey myglue_test.tpch_csv.supplier| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation| join ON n_regionkey = r_regionkey myglue_test.tpch_csv.region| where r_name = 'ASIA' AND o_orderdate >= date('1994-01-01') AND o_orderdate < date_add(date('1994-01-01'), interval 1 year)| stats sum(l_extendedprice * (1 - l_discount)) as revenue by n_name| sort - revenue")
+
+    spark.sql("source = myglue_test.tpch_csv.lineitem| where l_shipdate >= date('1994-01-01')  and l_shipdate < adddate(date('1994-01-01'), 365)  and l_discount between .06 - 0.01 and .06 + 0.01  and l_quantity < 24| stats sum(l_extendedprice * l_discount) as revenue")
+
+    spark.sql("source = [    source = myglue_test.tpch_csv.supplier    | join ON s_suppkey = l_suppkey myglue_test.tpch_csv.lineitem    | join ON o_orderkey = l_orderkey myglue_test.tpch_csv.orders    | join ON c_custkey = o_custkey myglue_test.tpch_csv.customer    | join ON s_nationkey = n1.n_nationkey myglue_test.tpch_csv.nation as n1    | join ON c_nationkey = n2.n_nationkey myglue_test.tpch_csv.nation as n2    | where l_shipdate between date('1995-01-01') and date('1996-12-31')        and n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY' or n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE'    | eval supp_nation = n1.n_name, cust_nation = n2.n_name, l_year = year(l_shipdate), volume = l_extendedprice * (1 - l_discount)    | fields supp_nation, cust_nation, l_year, volume  ] as shipping| stats sum(volume) as revenue by supp_nation, cust_nation, l_year| sort supp_nation, cust_nation, l_year")
+
+    spark.sql("source = [    source = myglue_test.tpch_csv.part    | join ON p_partkey = l_partkey myglue_test.tpch_csv.lineitem    | join ON s_suppkey = l_suppkey myglue_test.tpch_csv.supplier    | join ON l_orderkey = o_orderkey myglue_test.tpch_csv.orders    | join ON o_custkey = c_custkey myglue_test.tpch_csv.customer    | join ON c_nationkey = n1.n_nationkey myglue_test.tpch_csv.nation as n1    | join ON s_nationkey = n2.n_nationkey myglue_test.tpch_csv.nation as n2    | join ON n1.n_regionkey = r_regionkey myglue_test.tpch_csv.region    | where r_name = 'AMERICA' AND p_type = 'ECONOMY ANODIZED STEEL'      and o_orderdate between date('1995-01-01') and date('1996-12-31')    | eval o_year = year(o_orderdate)    | eval volume = l_extendedprice * (1 - l_discount)    | eval nation = n2.n_name    | fields o_year, volume, nation  ] as all_nations| stats sum(case(nation = 'BRAZIL', volume else 0)) as sum_case, sum(volume) as sum_volume by o_year| eval mkt_share = sum_case / sum_volume| fields mkt_share, o_year| sort o_year")
+
+    spark.sql("source = [    source = myglue_test.tpch_csv.part    | join ON p_partkey = l_partkey myglue_test.tpch_csv.lineitem    | join ON s_suppkey = l_suppkey myglue_test.tpch_csv.supplier    | join ON ps_partkey = l_partkey and ps_suppkey = l_suppkey myglue_test.tpch_csv.partsupp    | join ON o_orderkey = l_orderkey myglue_test.tpch_csv.orders    | join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation    | where like(p_name, '%green%')    | eval nation = n_name    | eval o_year = year(o_orderdate)    | eval amount = l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity    | fields nation, o_year, amount  ] as profit| stats sum(amount) as sum_profit by nation, o_year| sort nation, - o_year")
+
+    spark.sql("source = myglue_test.tpch_csv.customer| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders| join ON l_orderkey = o_orderkey myglue_test.tpch_csv.lineitem| join ON c_nationkey = n_nationkey myglue_test.tpch_csv.nation| where o_orderdate >= date('1993-10-01')  AND o_orderdate < date_add(date('1993-10-01'), interval 3 month)  AND l_returnflag = 'R'| stats sum(l_extendedprice * (1 - l_discount)) as revenue by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment| sort - revenue| head 20")
+
+    spark.sql("source = myglue_test.tpch_csv.partsupp| join ON ps_suppkey = s_suppkey myglue_test.tpch_csv.supplier| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation| where n_name = 'GERMANY'| stats sum(ps_supplycost * ps_availqty) as value by ps_partkey| where value > [    source = myglue_test.tpch_csv.partsupp    | join ON ps_suppkey = s_suppkey myglue_test.tpch_csv.supplier    | join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation    | where n_name = 'GERMANY'    | stats sum(ps_supplycost * ps_availqty) as check    | eval threshold = check * 0.0001000000    | fields threshold  ]| sort - value")
+
+    spark.sql("source = myglue_test.tpch_csv.orders| join ON o_orderkey = l_orderkey myglue_test.tpch_csv.lineitem| where l_commitdate < l_receiptdate    and l_shipdate < l_commitdate    and l_shipmode in ('MAIL', 'SHIP')    and l_receiptdate >= date('1994-01-01')    and l_receiptdate < date_add(date('1994-01-01'), interval 1 year)| stats sum(case(o_orderpriority = '1-URGENT' or o_orderpriority = '2-HIGH', 1 else 0)) as high_line_count,        sum(case(o_orderpriority != '1-URGENT' and o_orderpriority != '2-HIGH', 1 else 0)) as low_line_countby        by l_shipmode| sort l_shipmode")
+
+    spark.sql("source = [    source = myglue_test.tpch_csv.customer    | left outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%')      myglue_test.tpch_csv.orders    | stats count(o_orderkey) as c_count by c_custkey  ] as c_orders| stats count() as custdist by c_count| sort - custdist, - c_count")
+
+    spark.sql("source = myglue_test.tpch_csv.lineitem| join ON l_partkey = p_partkey    AND l_shipdate >= date('1995-09-01')    AND l_shipdate < date_add(date('1995-09-01'), interval 1 month)  myglue_test.tpch_csv.part| stats sum(case(like(p_type, 'PROMO%'), l_extendedprice * (1 - l_discount) else 0)) as sum1,        sum(l_extendedprice * (1 - l_discount)) as sum2| eval promo_revenue = 100.00 * sum1 / sum2 // Stats and Eval commands can combine when issues/819 resolved| fields promo_revenue")
+
+    spark.sql("source = myglue_test.tpch_csv.supplier| join right = revenue0 ON s_suppkey = supplier_no [    source = myglue_test.tpch_csv.lineitem    | where l_shipdate >= date('1996-01-01') AND l_shipdate < date_add(date('1996-01-01'), interval 3 month)    | eval supplier_no = l_suppkey    | stats sum(l_extendedprice * (1 - l_discount)) as total_revenue by supplier_no  ]| where total_revenue = [    source = [        source = myglue_test.tpch_csv.lineitem        | where l_shipdate >= date('1996-01-01') AND l_shipdate < date_add(date('1996-01-01'), interval 3 month)        | eval supplier_no = l_suppkey        | stats sum(l_extendedprice * (1 - l_discount)) as total_revenue by supplier_no      ]    | stats max(total_revenue)  ]| sort s_suppkey| fields s_suppkey, s_name, s_address, s_phone, total_revenue")
+
+    spark.sql("source = myglue_test.tpch_csv.partsupp| join ON p_partkey = ps_partkey myglue_test.tpch_csv.part| where p_brand != 'Brand#45'    and not like(p_type, 'MEDIUM POLISHED%')    and p_size in (49, 14, 23, 45, 19, 3, 36, 9)    and ps_suppkey not in [          source = myglue_test.tpch_csv.supplier          | where like(s_comment, '%Customer%Complaints%')          | fields s_suppkey        ]| stats distinct_count(ps_suppkey) as supplier_cnt by p_brand, p_type, p_size| sort - supplier_cnt, p_brand, p_type, p_size")
+
+    spark.sql("source = myglue_test.tpch_csv.lineitem| join ON p_partkey = l_partkey myglue_test.tpch_csv.part| where p_brand = 'Brand#23'    and p_container = 'MED BOX'    and l_quantity < [          source = myglue_test.tpch_csv.lineitem          | where l_partkey = p_partkey          | stats avg(l_quantity) as avg          | eval `0.2 * avg` = 0.2 * avg          | fields `0.2 * avg`        ]| stats sum(l_extendedprice) as sum| eval avg_yearly = sum / 7.0| fields avg_yearly")
+
+    spark.sql("source = myglue_test.tpch_csv.customer| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders| join ON o_orderkey = l_orderkey myglue_test.tpch_csv.lineitem| where o_orderkey in [    source = myglue_test.tpch_csv.lineitem    | stats sum(l_quantity) as sum by l_orderkey    | where sum > 300    | fields l_orderkey  ]| stats sum(l_quantity) by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice| sort - o_totalprice, o_orderdate| head 100")
+
+    spark.sql("source = myglue_test.tpch_csv.lineitem| join ON p_partkey = l_partkey     and p_brand = 'Brand#12'     and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')     and l_quantity >= 1 and l_quantity <= 1 + 10     and p_size between 1 and 5     and l_shipmode in ('AIR', 'AIR REG')     and l_shipinstruct = 'DELIVER IN PERSON'     OR p_partkey = l_partkey     and p_brand = 'Brand#23'     and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')     and l_quantity >= 10 and l_quantity <= 10 + 10     and p_size between 1 and 10     and l_shipmode in ('AIR', 'AIR REG')     and l_shipinstruct = 'DELIVER IN PERSON'     OR p_partkey = l_partkey     and p_brand = 'Brand#34'     and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')     and l_quantity >= 20 and l_quantity <= 20 + 10     and p_size between 1 and 15     and l_shipmode in ('AIR', 'AIR REG')     and l_shipinstruct = 'DELIVER IN PERSON'  myglue_test.tpch_csv.part")
+
+    spark.sql("source = myglue_test.tpch_csv.supplier| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation| where n_name = 'CANADA'  and s_suppkey in [    source = myglue_test.tpch_csv.partsupp    | where ps_partkey in [        source = myglue_test.tpch_csv.part        | where like(p_name, 'forest%')        | fields p_partkey      ]      and ps_availqty > [        source = myglue_test.tpch_csv.lineitem        | where l_partkey = ps_partkey          and l_suppkey = ps_suppkey          and l_shipdate >= date('1994-01-01')          and l_shipdate < date_add(date('1994-01-01'), interval 1 year)        | stats sum(l_quantity) as sum_l_quantity        | eval half_sum_l_quantity = 0.5 * sum_l_quantity        | fields half_sum_l_quantity      ]    | fields ps_suppkey  ]")
+
+    spark.sql("source = myglue_test.tpch_csv.supplier| join ON s_suppkey = l1.l_suppkey myglue_test.tpch_csv.lineitem as l1| join ON o_orderkey = l1.l_orderkey myglue_test.tpch_csv.orders| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation| where o_orderstatus = 'F'  and l1.l_receiptdate > l1.l_commitdate  and exists [    source = myglue_test.tpch_csv.lineitem as l2    | where l2.l_orderkey = l1.l_orderkey      and l2.l_suppkey != l1.l_suppkey  ]  and not exists [    source = myglue_test.tpch_csv.lineitem as l3    | where l3.l_orderkey = l1.l_orderkey      and l3.l_suppkey != l1.l_suppkey      and l3.l_receiptdate > l3.l_commitdate  ]  and n_name = 'SAUDI ARABIA'| stats count() as numwait by s_name| sort - numwait, s_name| head 100")
+
+    spark.sql("source = [  source = myglue_test.tpch_csv.customer    | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')      and c_acctbal > [          source = myglue_test.tpch_csv.customer          | where c_acctbal > 0.00            and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')          | stats avg(c_acctbal)        ]      and not exists [          source = myglue_test.tpch_csv.orders          | where o_custkey = c_custkey        ]    | eval cntrycode = substring(c_phone, 1, 2)    | fields cntrycode, c_acctbal  ] as custsale| stats count() as numcust, sum(c_acctbal) as totacctbal by cntrycode| sort cntrycode")
+
+}
diff --git a/docker/integ-test/spark-defaults.conf b/docker/integ-test/spark-defaults.conf
new file mode 100644
index 000000000..19b9e4ec1
--- /dev/null
+++ b/docker/integ-test/spark-defaults.conf
@@ -0,0 +1,35 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default system properties included when running spark-submit.
+# This is useful for setting default environmental settings.
+
+# Example:
+# spark.master                     spark://master:7077
+# spark.eventLog.enabled           true
+# spark.eventLog.dir               hdfs://namenode:8021/directory
+# spark.serializer                 org.apache.spark.serializer.KryoSerializer
+# spark.driver.memory              5g
+# spark.executor.extraJavaOptions  -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
+spark.sql.extensions                  org.opensearch.flint.spark.FlintPPLSparkExtensions,org.opensearch.flint.spark.FlintSparkExtensions
+spark.sql.catalog.dev                 org.apache.spark.opensearch.catalog.OpenSearchCatalog
+spark.datasource.flint.host           opensearch
+spark.datasource.flint.port           9200
+spark.datasource.flint.scheme         http
+spark.datasource.flint.auth           basic
+spark.datasource.flint.auth.username  admin
+spark.datasource.flint.auth.password  C0rrecthorsebatterystaple.
diff --git a/docker/integ-test/spark-master-entrypoint.sh b/docker/integ-test/spark-master-entrypoint.sh
new file mode 100755
index 000000000..a21c20643
--- /dev/null
+++ b/docker/integ-test/spark-master-entrypoint.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+function start_spark_connect() {
+  sc_version=$(ls -1 /opt/bitnami/spark/jars/spark-core_*.jar | sed -e 's/^.*\/spark-core_//' -e 's/\.jar$//' -e 's/-/:/')
+
+  attempt=1
+  while [ -e "/tmp/spark_master_running" -a "$attempt" -le 10 ]; do
+    sleep 1
+    /opt/bitnami/spark/sbin/start-connect-server.sh --master spark://spark:7077 --packages org.apache.spark:spark-connect_${sc_version}
+    attempt=$(($attempt+1))
+  done
+}
+
+touch /tmp/spark_master_running
+start_spark_connect &
+/opt/bitnami/scripts/spark/entrypoint.sh /opt/bitnami/scripts/spark/run.sh
+rm /tmp/spark_master_running
diff --git a/docs/ppl-lang/functions/ppl-ip.md b/docs/ppl-lang/functions/ppl-ip.md
index fb0b468ba..65cc9dac9 100644
--- a/docs/ppl-lang/functions/ppl-ip.md
+++ b/docs/ppl-lang/functions/ppl-ip.md
@@ -32,4 +32,67 @@ Note:
  - `ip` can be an IPv4 or an IPv6 address
  - `cidr` can be an IPv4 or an IPv6 block
  - `ip` and `cidr` must be either both IPv4 or both IPv6
- - `ip` and `cidr` must both be valid and non-empty/non-null
\ No newline at end of file
+ - `ip` and `cidr` must both be valid and non-empty/non-null
+
+### `GEOIP`
+
+**Description**
+
+`GEOIP(ip[, property]...)` retrieves geospatial data corresponding to the provided `ip`.
+
+**Argument type:**
+- `ip` is string be **STRING** representing an IPv4 or an IPv6 address.
+- `property` is **STRING** and must be one of the following:
+    - `COUNTRY_ISO_CODE`
+    - `COUNTRY_NAME`
+    - `CONTINENT_NAME`
+    - `REGION_ISO_CODE`
+    - `REGION_NAME`
+    - `CITY_NAME`
+    - `TIME_ZONE`
+    - `LOCATION`
+- Return type:
+    - **STRING** if one property given
+    - **STRUCT_TYPE** if more than one or no property is given
+
+Example:
+
+_Without properties:_
+
+    os> source=ips | eval a = geoip(ip) | fields ip, a
+    fetched rows / total rows = 2/2
+    +---------------------+-------------------------------------------------------------------------------------------------------+
+    |ip                   |lol                                                                                                    |
+    +---------------------+-------------------------------------------------------------------------------------------------------+
+    |66.249.157.90        |{JM, Jamaica, North America, 14, Saint Catherine Parish, Portmore, America/Jamaica, 17.9686,-76.8827}  |
+    |2a09:bac2:19f8:2ac3::|{CA, Canada, North America, PE, Prince Edward Island, Charlottetown, America/Halifax, 46.2396,-63.1355}|
+    +---------------------+-------+------+-------------------------------------------------------------------------------------------------------+
+
+_With one property:_
+
+    os> source=users | eval a = geoip(ip, COUNTRY_NAME) | fields ip, a
+    fetched rows / total rows = 2/2
+    +---------------------+-------+
+    |ip                   |a      |
+    +---------------------+-------+
+    |66.249.157.90        |Jamaica|
+    |2a09:bac2:19f8:2ac3::|Canada |
+    +---------------------+-------+
+
+_With multiple properties:_
+
+    os> source=users | eval a = geoip(ip, COUNTRY_NAME, REGION_NAME, CITY_NAME) | fields ip, a
+    fetched rows / total rows = 2/2
+    +---------------------+---------------------------------------------+
+    |ip                   |a                                            |
+    +---------------------+---------------------------------------------+
+    |66.249.157.90        |{Jamaica, Saint Catherine Parish, Portmore}  |
+    |2a09:bac2:19f8:2ac3::|{Canada, Prince Edward Island, Charlottetown}|
+    +---------------------+---------------------------------------------+
+
+Note:
+- To use `geoip` user must create spark table containing geo ip location data. Instructions to create table can be found [here](../../opensearch-geoip.md).
+    - `geoip` command by default expects the created table to be called `geoip_ip_data`.
+    - if a different table name is desired, can set `spark.geoip.tablename` spark config to new table name.
+- `ip` can be an IPv4 or an IPv6 address.
+- `geoip` commands will always calculated first if used with other eval functions.
diff --git a/docs/ppl-lang/planning/ppl-geoip-command.md b/docs/ppl-lang/planning/ppl-geoip-command.md
new file mode 100644
index 000000000..aaed6c156
--- /dev/null
+++ b/docs/ppl-lang/planning/ppl-geoip-command.md
@@ -0,0 +1,59 @@
+## geoip syntax proposal
+
+geoip function to add information about the geographical location of an IPv4 or IPv6 address
+
+**Implementation syntax**
+- `... | eval geoinfo = geoip(ipAddress *[,properties])`
+- generic syntax     
+- `... | eval geoinfo = geoip(ipAddress)`
+- retrieves all geo data
+- `... | eval geoinfo = geoip(ipAddress, city, location)`
+-  retrieve only city, and location
+
+**Implementation details**
+- Current implementation requires user to have created a geoip table. Geoip table has the following schema:
+
+    ```SQL
+        CREATE TABLE geoip (
+            cidr STRING,
+            country_iso_code STRING,
+            country_name STRING,
+            continent_name STRING,
+            region_iso_code STRING,
+            region_name STRING,
+            city_name STRING,
+            time_zone STRING,
+            location STRING,
+            ip_range_start BIGINT,
+            ip_range_end BIGINT,
+            ipv4 BOOLEAN
+        )
+    ```     
+
+- `geoip` is resolved by performing a join on said table and projecting the resulting geoip data as a struct.
+- an example of using `geoip` is equivalent to running the following SQL query:
+
+   ```SQL
+        SELECT source.*, struct(geoip.country_name, geoip.city_name) AS a
+        FROM source, geoip
+        WHERE geoip.ip_range_start <= ip_to_int(source.ip)
+          AND geoip.ip_range_end > ip_to_int(source.ip)
+          AND geoip.ip_type = is_ipv4(source.ip);
+   ```
+- in the case that only one property is provided in function call, `geoip` returns string of specified property instead: 
+  
+  ```SQL
+        SELECT source.*, geoip.country_name AS a
+        FROM source, geoip
+        WHERE geoip.ip_range_start <= ip_to_int(source.ip)
+          AND geoip.ip_range_end > ip_to_int(source.ip)
+          AND geoip.ip_type = is_ipv4(source.ip);
+  ```
+
+**Future plan for additional data-sources**
+
+- Currently only using pre-existing geoip table defined within spark is possible.
+- There is future plans to allow users to specify data sources:
+    - API data sources - if users have their own geoip provided will create ability for users to configure and call said endpoints
+    - OpenSearch geospatial client - once geospatial client is published we can leverage client to utilize OpenSearch geo2ip functionality.
+- Additional datasource connection params will be provided through spark config options.
diff --git a/integ-test/script/README.md b/integ-test/script/README.md
index 7ce0c6886..f9e9a8e93 100644
--- a/integ-test/script/README.md
+++ b/integ-test/script/README.md
@@ -17,41 +17,55 @@ Apart from the basic feature, it also has some advanced functionality includes:
 ### Usage
 To use this script, you need to have Python **3.6** or higher installed. It also requires the following Python libraries:
 ```shell
-pip install requests pandas openpyxl
+pip install requests pandas openpyxl pyspark setuptools pyarrow grpcio grpcio-status protobuf
+```
+
+Build the Flint and PPL extensions for Spark.
+```
+sbt clean
+sbt sparkSqlApplicationCosmetic/assembly sparkPPLCosmetic/assembly
+```
+
+Next start the Docker containers that will be used for the tests. In the directory `docker/integ-test`
+```shell
+docker compose up -d
+```
+
+After the tests are finished, the Docker containers can be stopped from the directory `docker/integ-test` with:
+```shell
+docker compose down
 ```
 
 After getting the requisite libraries, you can run the script with the following command line parameters in your shell:
 ```shell
-python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --datasource ${DATASOURCE_NAME} --input-csv test_cases.csv --output-file test_report --max-workers 2 --check-interval 10 --timeout 600
+python SanityTest.py --spark-url ${SPARK_URL} --username *** --password *** --opensearch-url ${OPENSEARCH_URL} --input-csv test_cases.csv --output-file test_report
 ```
-You need to replace the placeholders with your actual values of URL_ADDRESS, DATASOURCE_NAME and USERNAME, PASSWORD for authentication to your endpoint.
+You need to replace the placeholders with your actual values of SPARK_URL, OPENSEARCH_URL and USERNAME, PASSWORD for authentication to your endpoint.
+
+Running against the docker cluster, `SPARK_URL` should be set to `sc://localhost:15002` and `OPENSEARCH_URL` should be set
+to `http://localhost:9200`
 
 For more details of the command line parameters, you can see the help manual via command:
 ```shell
 python SanityTest.py --help   
 
-usage: SanityTest.py [-h] --base-url BASE_URL --username USERNAME --password PASSWORD --datasource DATASOURCE --input-csv INPUT_CSV
-                                      --output-file OUTPUT_FILE [--max-workers MAX_WORKERS] [--check-interval CHECK_INTERVAL] [--timeout TIMEOUT]
+usage: SanityTest.py [-h] --spark-url SPARK_URL --username USERNAME --password PASSWORD --datasource DATASOURCE --input-csv INPUT_CSV
+                                      --output-file OPENSEARCH_URL [--max-workers MAX_WORKERS] [--check-interval CHECK_INTERVAL] [--timeout TIMEOUT]
                                       [--start-row START_ROW] [--end-row END_ROW]
 
 Run tests from a CSV file and generate a report.
 
 options:
   -h, --help            show this help message and exit
-  --base-url BASE_URL   Base URL of the service
+  --spark-url SPARK_URL Spark Connect URL of the service
   --username USERNAME   Username for authentication
   --password PASSWORD   Password for authentication
-  --datasource DATASOURCE
-                        Datasource name
+  --output-file OPENSEARCH_URL
+                        URL of the OpenSearch service
   --input-csv INPUT_CSV
                         Path to the CSV file containing test queries
   --output-file OUTPUT_FILE
                         Path to the output report file
-  --max-workers MAX_WORKERS
-                        optional, Maximum number of worker threads (default: 2)
-  --check-interval CHECK_INTERVAL
-                        optional, Check interval in seconds (default: 10)
-  --timeout TIMEOUT     optional, Timeout in seconds (default: 600)
   --start-row START_ROW
                         optional, The start row of the query to run, start from 1
   --end-row END_ROW     optional, The end row of the query to run, not included
@@ -64,7 +78,20 @@ As claimed in the description, the input CSV file should at least have the colum
 
 We also provide a sample input CSV file `test_cases.csv` for reference. It includes all sanity test cases we have currently in the Flint.
 
-**TODO**: the prerequisite data of the test cases and ingesting process
+### Indices and Data for Testing
+After the docker containers have started, the test script will try to create indices that are needed for testing. It will look in the directory `data`. It will start by
+looking for all files with names ending with `.mapping.json`. The start of the filename is the name of the index to create. The contents of the file is the field mappings.
+
+[Supported field types](https://opensearch.org/docs/latest/field-types/supported-field-types/index/)
+
+[Example mapping](https://opensearch.org/docs/latest/field-types/supported-field-types/index/#example)
+
+After the indices have been created, the script will look for all other files ending with `.json`. These are the files for bulk inserting data into the indices. The start
+of the filename is the index to insert data into. The contents of the file are used as the body of the bulk insert request.
+
+[Bulk Insert](https://opensearch.org/docs/latest/api-reference/document-apis/bulk/)
+
+[Example Body](https://opensearch.org/docs/latest/api-reference/document-apis/bulk/)
 
 ### Report Explanation
 The generated report contains two files:
@@ -78,12 +105,12 @@ It also provides the query_id, session_id and start/end time for each query, whi
 
 An example of Excel report:
 
-| query_name | query                                                                                                                                                      | expected_status | status  | check_status | error                                                                              | result                                                                                                                                                      | Duration (s) | query_id                      | session_id                   | Start Time           | End Time            |
-|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|---------|--------------|------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|-------------------------------|------------------------------|----------------------|---------------------|
-| 1          | describe myglue_test.default.http_logs                                                                                                                     | SUCCESS         | SUCCESS | TRUE         |                                                                                    | {'status': 'SUCCESS', 'schema': [{...}, ...], 'datarows': [[...], ...], 'total': 31, 'size': 31}                                                            | 37.51        | SHFEVWxDNnZjem15Z2x1ZV90ZXN0  | RkgzZm0xNlA5MG15Z2x1ZV90ZXN0 | 2024-11-07 13:34:10  | 2024-11-07 13:34:47 |
-| 2          | source = myglue_test.default.http_logs \| dedup status CONSECUTIVE=true                                                                                    | SUCCESS         | FAILED  | FALSE        | {"Message":"Fail to run query. Cause: Consecutive deduplication is not supported"} |                                                                                                                                                             | 39.53        | dVNlaVVxOFZrZW15Z2x1ZV90ZXN0  | ZGU2MllVYmI4dG15Z2x1ZV90ZXN0 | 2024-11-07 13:34:10  | 2024-11-07 13:34:49 |
-| 3          | source = myglue_test.default.http_logs \| eval res = json_keys(json('{"account_number":1,"balance":39225,"age":32,"gender":"M"}')) \| head 1 \| fields res | SUCCESS         | SUCCESS | TRUE         |                                                                                    | {'status': 'SUCCESS', 'schema': [{'name': 'res', 'type': 'array'}], 'datarows': [[['account_number', 'balance', 'age', 'gender']]], 'total': 1, 'size': 1}  | 12.77        | WHQxaXlVSGtGUm15Z2x1ZV90ZXN0  | RkgzZm0xNlA5MG15Z2x1ZV90ZXN0 | 2024-11-07 13:34:47  | 2024-11-07 13:38:45 |
-| ...        | ...                                                                                                                                                        | ...             | ...     | ...          |                                                                                    |                                                                                                                                                             | ...          | ...                           | ...                          | ...                  | ...                 |
+| query_name | query                                                                                                                                                      | expected_status | status  | check_status | error                                                                              | result                                                                                                                                                      | duration (s) | Start Time           | End Time            |
+|------------|------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|---------|--------------|------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|----------------------|---------------------|
+| 1          | describe myglue_test.default.http_logs                                                                                                                     | SUCCESS         | SUCCESS | TRUE         |                                                                                    | {'status': 'SUCCESS', 'schema': [{...}, ...], 'datarows': [[...], ...], 'total': 31, 'size': 31}                                                            | 37.51        | 2024-11-07 13:34:10  | 2024-11-07 13:34:47 |
+| 2          | source = myglue_test.default.http_logs \| dedup status CONSECUTIVE=true                                                                                    | SUCCESS         | FAILED  | FALSE        | {"Message":"Fail to run query. Cause: Consecutive deduplication is not supported"} |                                                                                                                                                             | 39.53        | 2024-11-07 13:34:10  | 2024-11-07 13:34:49 |
+| 3          | source = myglue_test.default.http_logs \| eval res = json_keys(json('{"account_number":1,"balance":39225,"age":32,"gender":"M"}')) \| head 1 \| fields res | SUCCESS         | SUCCESS | TRUE         |                                                                                    | {'status': 'SUCCESS', 'schema': [{'name': 'res', 'type': 'array'}], 'datarows': [[['account_number', 'balance', 'age', 'gender']]], 'total': 1, 'size': 1}  | 12.77        | 2024-11-07 13:34:47  | 2024-11-07 13:38:45 |
+| ...        | ...                                                                                                                                                        | ...             | ...     | ...          |                                                                                    |                                                                                                                                                             | ...          | ...                  | ...                 |
 
 
 #### JSON Report
@@ -103,7 +130,7 @@ An example of JSON report:
   "detailed_results": [
     {
       "query_name": 1,
-      "query": "source = myglue_test.default.http_logs | stats avg(size)",
+      "query": "source = dev.default.http_logs | stats avg(size)",
       "query_id": "eFZmTlpTa3EyTW15Z2x1ZV90ZXN0",
       "session_id": "bFJDMWxzb2NVUm15Z2x1ZV90ZXN0",
       "status": "SUCCESS",
@@ -130,7 +157,7 @@ An example of JSON report:
     },
     {
       "query_name": 2,
-      "query": "source = myglue_test.default.http_logs | eval res = json_keys(json(\u2018{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}')) | head 1 | fields res",
+      "query": "source = def.default.http_logs | eval res = json_keys(json(\u2018{\"teacher\":\"Alice\",\"student\":[{\"name\":\"Bob\",\"rank\":1},{\"name\":\"Charlie\",\"rank\":2}]}')) | head 1 | fields res",
       "query_id": "bjF4Y1VnbXdFYm15Z2x1ZV90ZXN0",
       "session_id": "c3pvU1V6OW8xM215Z2x1ZV90ZXN0",
       "status": "FAILED",
@@ -142,7 +169,7 @@ An example of JSON report:
     },
     {
       "query_name": 2,
-      "query": "source = myglue_test.default.http_logs |  eval col1 = size, col2 = clientip | stats avg(col1) by col2",
+      "query": "source = dev.default.http_logs |  eval col1 = size, col2 = clientip | stats avg(col1) by col2",
       "query_id": "azVyMFFORnBFRW15Z2x1ZV90ZXN0",
       "session_id": "VWF0SEtrNWM3bm15Z2x1ZV90ZXN0",
       "status": "TIMEOUT",
diff --git a/integ-test/script/SanityTest.py b/integ-test/script/SanityTest.py
index eb97752b4..b4e6210bb 100644
--- a/integ-test/script/SanityTest.py
+++ b/integ-test/script/SanityTest.py
@@ -3,6 +3,7 @@
 SPDX-License-Identifier: Apache-2.0
 """
 
+import glob
 import signal
 import sys
 import requests
@@ -11,18 +12,18 @@
 import time
 import logging
 from datetime import datetime
-import pandas as pd
 import argparse
 from requests.auth import HTTPBasicAuth
-from concurrent.futures import ThreadPoolExecutor, as_completed
+from pyspark.sql import SparkSession
 import threading
+import pandas as pd
 
 """
 Environment: python3
 
 Example to use this script:
 
-python SanityTest.py --base-url ${URL_ADDRESS} --username *** --password *** --datasource ${DATASOURCE_NAME} --input-csv test_queries.csv --output-file test_report --max-workers 2 --check-interval 10 --timeout 600
+python SanityTest.py --spark-url ${SPARK_URL} --username *** --password *** --opensearch_url ${OPENSEARCH_URL} --input-csv test_queries.csv --output-file test_report
 
 The input file test_queries.csv should contain column: `query`
 
@@ -33,24 +34,19 @@
 """
 
 class FlintTester:
-  def __init__(self, base_url, username, password, datasource, max_workers, check_interval, timeout, output_file, start_row, end_row, log_level):
-    self.base_url = base_url
+  def __init__(self, spark_url, username, password, opensearch_url, output_file, start_row, end_row, log_level):
+    self.spark_url = spark_url
     self.auth = HTTPBasicAuth(username, password)
-    self.datasource = datasource
-    self.headers = { 'Content-Type': 'application/json' }
-    self.max_workers = max_workers
-    self.check_interval = check_interval
-    self.timeout = timeout
+    self.opensearch_url = opensearch_url
     self.output_file = output_file
     self.start = start_row - 1 if start_row else None
     self.end = end_row - 1 if end_row else None
     self.log_level = log_level
-    self.max_attempts = (int)(timeout / check_interval)
     self.logger = self._setup_logger()
-    self.executor = ThreadPoolExecutor(max_workers=self.max_workers)
-    self.thread_local = threading.local()
     self.test_results = []
 
+    self.spark_client = SparkSession.builder.remote(spark_url).appName("integ-test").getOrCreate()
+
   def _setup_logger(self):
     logger = logging.getLogger('FlintTester')
     logger.setLevel(self.log_level)
@@ -72,126 +68,80 @@ def _setup_logger(self):
 
     return logger
 
+  # Create the indices needed for the tests
+  def create_indices(self):
+    self.logger.info("Creating indices")
 
-  def get_session_id(self):
-    if not hasattr(self.thread_local, 'session_id'):
-      self.thread_local.session_id = "empty_session_id"
-    self.logger.debug(f"get session id {self.thread_local.session_id}")
-    return self.thread_local.session_id
+    json_files = glob.glob('data/*.json')
+    mapping_files = [f for f in json_files if f.endswith('.mapping.json')]
+    data_files = [f for f in json_files if not f.endswith('.mapping.json')]
+    existing_indices = set()
 
-  def set_session_id(self, session_id):
-    """Reuse the session id for the same thread"""
-    self.logger.debug(f"set session id {session_id}")
-    self.thread_local.session_id = session_id
+    for mapping_file in mapping_files:
+      index_name = mapping_file[5 : mapping_file.index('.')]
 
-  # Call submit API to submit the query
-  def submit_query(self, query, session_id="Empty"):
-    url = f"{self.base_url}/_plugins/_async_query"
-    payload = {
-      "datasource": self.datasource,
-      "lang": "ppl",
-      "query": query,
-      "sessionId": session_id
-    }
-    self.logger.debug(f"Submit query with payload: {payload}")
-    response_json = None
-    try:
-      response = requests.post(url, auth=self.auth, json=payload, headers=self.headers)
-      response_json = response.json()
-      response.raise_for_status()
-      return response_json
-    except Exception as e:
-      return {"error": f"{str(e)}, got response {response_json}"}
+      self.logger.info(f"Checking if index exists: {index_name}")
+      response = requests.get(f'{self.opensearch_url}/{index_name}', auth=self.auth)
+      if response.status_code == 200:
+        existing_indices.add(index_name)
+        continue
 
-  # Call get API to check the query status
-  def get_query_result(self, query_id):
-    url = f"{self.base_url}/_plugins/_async_query/{query_id}"
-    response_json = None
-    try:
-      response = requests.get(url, auth=self.auth)
-      response_json = response.json()
-      response.raise_for_status()
-      return response_json
-    except Exception as e:
-      return {"status": "FAILED", "error": f"{str(e)}, got response {response_json}"}
+      self.logger.info(f"Creating index: {index_name}")
 
-  # Call delete API to cancel the query
-  def cancel_query(self, query_id):
-    url = f"{self.base_url}/_plugins/_async_query/{query_id}"
-    response_json = None
-    try:
-      response = requests.delete(url, auth=self.auth)
-      response_json = response.json()
-      response.raise_for_status()
-      self.logger.info(f"Cancelled query [{query_id}] with info {response.json()}")
-      return response_json
-    except Exception as e:
-      self.logger.warning(f"Cancel query [{query_id}] error: {str(e)}, got response {response_json}")
+      file_data = open(mapping_file, 'rb').read()
+      headers = {'Content-Type': 'application/json'}
+
+      response = requests.put(f'{self.opensearch_url}/{index_name}', auth=self.auth, headers=headers, data=file_data)
+      if response.status_code != 200:
+        self.logger.error(f'Failed to create index: {index_name}')
+        response.raise_for_status()
+
+    for data_file in data_files:
+      index_name = data_file[5 : data_file.index('.')]
+      if index_name in existing_indices:
+        continue
+
+      self.logger.info(f"Populating index: {index_name}")
+
+      file_data = open(data_file, 'rb').read()
+      headers = {'Content-Type': 'application/x-ndjson'}
+
+      response = requests.post(f'{self.opensearch_url}/{index_name}/_bulk', auth=self.auth, headers=headers, data=file_data)
+      if response.status_code != 200:
+        response.raise_for_status()
 
   # Run the test and return the result
   def run_test(self, query, seq_id, expected_status):
     self.logger.info(f"Starting test: {seq_id}, {query}")
     start_time = datetime.now()
-    pre_session_id = self.get_session_id()
-    submit_result = self.submit_query(query, pre_session_id)
-    if "error" in submit_result:
-      self.logger.warning(f"Submit error: {submit_result}")
-      return {
-        "query_name": seq_id,
-        "query": query,
-        "expected_status": expected_status,
-        "status": "SUBMIT_FAILED",
-        "check_status": "SUBMIT_FAILED" == expected_status if expected_status else None,
-        "error": submit_result["error"],
-        "duration": 0,
-        "start_time": start_time,
-        "end_time": datetime.now()
-      }
-
-    query_id = submit_result["queryId"]
-    session_id = submit_result["sessionId"]
-    self.logger.info(f"Submit return: {submit_result}")
-    if (session_id != pre_session_id):
-      self.logger.info(f"Update session id from {pre_session_id} to {session_id}")
-      self.set_session_id(session_id)
-
-    test_result = self.check_query_status(query_id)
+
+    query_str = query.replace('\n', ' ')
+    status = None
+    result = None
+    error_str = None
+    try:
+      result = self.spark_client.sql(query_str)
+      status = 'SUCCESS'
+    except Exception as e:
+      status = 'FAILED'
+      error_str = str(e)
+
     end_time = datetime.now()
     duration = (end_time - start_time).total_seconds()
 
     return {
       "query_name": seq_id,
       "query": query,
-      "query_id": query_id,
-      "session_id": session_id,
       "expected_status": expected_status,
-      "status": test_result["status"],
-      "check_status": test_result["status"] == expected_status if expected_status else None,
-      "error": test_result.get("error", ""),
-      "result": test_result if test_result["status"] == "SUCCESS" else None,
+      "status": status,
+      "check_status": status == expected_status if expected_status else None,
+      "error": error_str if error_str else None,
+      "result": result,
       "duration": duration,
       "start_time": start_time,
       "end_time": end_time
     }
 
-  # Check the status of the query periodically until it is completed or failed or exceeded the timeout
-  def check_query_status(self, query_id):
-    query_id = query_id
-
-    for attempt in range(self.max_attempts):
-      time.sleep(self.check_interval)
-      result = self.get_query_result(query_id)
-
-      if result["status"] == "FAILED" or result["status"] == "SUCCESS":
-        return result
-
-    # Cancel the query if it exceeds the timeout
-    self.cancel_query(query_id)
-    return {
-      "status": "TIMEOUT",
-      "error": "Query execution exceeded " + str(self.timeout) + " seconds with last status: " + result["status"],
-    }
-
   def run_tests_from_csv(self, csv_file):
     with open(csv_file, 'r') as f:
       reader = csv.DictReader(f)
@@ -200,20 +150,15 @@ def run_tests_from_csv(self, csv_file):
     # Filtering queries based on start and end
     queries = queries[self.start:self.end]
 
-    # Parallel execution
-    futures = [self.executor.submit(self.run_test, query, seq_id, expected_status) for query, seq_id, expected_status in queries]
-    for future in as_completed(futures):
-      result = future.result()
-      self.logger.info(f"Completed test: {result["query_name"]}, {result["query"]}, got result status: {result["status"]}")
-      self.test_results.append(result)
+    self.test_results = []
+    for query in queries:
+      self.test_results.append(self.run_test(query[0], query[1], query[2]))
 
   def generate_report(self):
     self.logger.info("Generating report...")
     total_queries = len(self.test_results)
     successful_queries = sum(1 for r in self.test_results if r['status'] == 'SUCCESS')
     failed_queries = sum(1 for r in self.test_results if r['status'] == 'FAILED')
-    submit_failed_queries = sum(1 for r in self.test_results if r['status'] == 'SUBMIT_FAILED')
-    timeout_queries = sum(1 for r in self.test_results if r['status'] == 'TIMEOUT')
 
     # Create report
     report = {
@@ -221,8 +166,6 @@ def generate_report(self):
         "total_queries": total_queries,
         "successful_queries": successful_queries,
         "failed_queries": failed_queries,
-        "submit_failed_queries": submit_failed_queries,
-        "timeout_queries": timeout_queries,
         "execution_time": sum(r['duration'] for r in self.test_results)
       },
       "detailed_results": self.test_results
@@ -249,15 +192,12 @@ def signal_handler(sig, frame, tester):
 def main():
   # Parse command line arguments
   parser = argparse.ArgumentParser(description="Run tests from a CSV file and generate a report.")
-  parser.add_argument("--base-url", required=True, help="Base URL of the service")
+  parser.add_argument("--spark-url", required=True, help="URL of the Spark service")
   parser.add_argument("--username", required=True, help="Username for authentication")
   parser.add_argument("--password", required=True, help="Password for authentication")
-  parser.add_argument("--datasource", required=True, help="Datasource name")
+  parser.add_argument("--opensearch-url", required=True, help="URL of the OpenSearch service")
   parser.add_argument("--input-csv", required=True, help="Path to the CSV file containing test queries")
   parser.add_argument("--output-file", required=True, help="Path to the output report file")
-  parser.add_argument("--max-workers", type=int, default=2, help="optional, Maximum number of worker threads (default: 2)")
-  parser.add_argument("--check-interval", type=int, default=5, help="optional, Check interval in seconds (default: 5)")
-  parser.add_argument("--timeout", type=int, default=600, help="optional, Timeout in seconds (default: 600)")
   parser.add_argument("--start-row", type=int, default=None, help="optional, The start row of the query to run, start from 1")
   parser.add_argument("--end-row", type=int, default=None, help="optional, The end row of the query to run, not included")
   parser.add_argument("--log-level", default="INFO", help="optional, Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL, default: INFO)")
@@ -265,13 +205,10 @@ def main():
   args = parser.parse_args()
 
   tester = FlintTester(
-    base_url=args.base_url,
+    spark_url=args.spark_url,
     username=args.username,
     password=args.password,
-    datasource=args.datasource,
-    max_workers=args.max_workers,
-    check_interval=args.check_interval,
-    timeout=args.timeout,
+    opensearch_url=args.opensearch_url,
     output_file=args.output_file,
     start_row=args.start_row,
     end_row=args.end_row,
@@ -282,6 +219,9 @@ def main():
   signal.signal(signal.SIGINT, lambda sig, frame: signal_handler(sig, frame, tester))
   signal.signal(signal.SIGTERM, lambda sig, frame: signal_handler(sig, frame, tester))
 
+  # Create indices
+  tester.create_indices()
+
   # Running tests
   tester.run_tests_from_csv(args.input_csv)
 
diff --git a/integ-test/script/data/customer.mapping.json b/integ-test/script/data/customer.mapping.json
new file mode 100644
index 000000000..a98d473a2
--- /dev/null
+++ b/integ-test/script/data/customer.mapping.json
@@ -0,0 +1,30 @@
+{
+  "mappings": {
+    "properties": {
+      "c_custkey": {
+        "type": "integer"
+      },
+      "c_name": {
+        "type": "text"
+      },
+      "c_address": {
+        "type": "text"
+      },
+      "c_nationkey": {
+        "type": "integer"
+      },
+      "c_phone": {
+        "type": "text"
+      },
+      "c_acctbal": {
+        "type": "double"
+      },
+      "c_mktsegment": {
+        "type": "text"
+      },
+      "c_comment": {
+        "type": "text"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/http_logs.json b/integ-test/script/data/http_logs.json
new file mode 100644
index 000000000..ff2aa2fca
--- /dev/null
+++ b/integ-test/script/data/http_logs.json
@@ -0,0 +1,12 @@
+{"index": {"_index": "http_logs"}}
+{"@timestamp": 1696154400000, "year": 2023, "month": 10, "day": 1, "clientip": "40.135.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
+{"index": {"_index": "http_logs"}}
+{"@timestamp": 1696154700000, "year": 2023, "month": 10, "day": 1, "clientip": "232.0.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
+{"index": {"_index": "http_logs"}}
+{"@timestamp": 1696155000000, "year": 2023, "month": 10, "day": 1, "clientip": "26.1.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
+{"index": {"_index": "http_logs"}}
+{"@timestamp": 1696155300000, "year": 2023, "month": 10, "day": 1, "clientip": "247.37.0.0", "request": "GET /french/splash_inet.html HTTP/1.0", "status": 200, "size": 3781}
+{"index": {"_index": "http_logs"}}
+{"@timestamp": 1696155600000, "year": 2023, "month": 10, "day": 1, "clientip": "247.37.0.0", "request": "GET /images/hm_nbg.jpg HTTP/1.0", "status": 304, "size": 0}
+{"index": {"_index": "http_logs"}}
+{"@timestamp": 1696155900000, "year": 2023, "month": 10, "day": 1, "clientip": "252.0.0.0", "request": "GET /images/hm_bg.jpg HTTP/1.0", "status": 200, "size": 24736}
diff --git a/integ-test/script/data/http_logs.mapping.json b/integ-test/script/data/http_logs.mapping.json
new file mode 100644
index 000000000..b944fbd4b
--- /dev/null
+++ b/integ-test/script/data/http_logs.mapping.json
@@ -0,0 +1,30 @@
+{
+  "mappings": {
+    "properties": {
+      "@timestamp": {
+        "type": "date"
+      },
+      "year": {
+        "type": "integer"
+      },
+      "month": {
+        "type": "integer"
+      },
+      "day": {
+        "type": "integer"
+      },
+      "clientip": {
+        "type": "keyword"
+      },
+      "request": {
+        "type": "text"
+      },
+      "status": {
+        "type": "integer"
+      },
+      "size": {
+        "type": "integer"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/lineitem.mapping.json b/integ-test/script/data/lineitem.mapping.json
new file mode 100644
index 000000000..2fb1cdb40
--- /dev/null
+++ b/integ-test/script/data/lineitem.mapping.json
@@ -0,0 +1,54 @@
+{
+  "mappings": {
+    "properties": {
+      "l_orderkey": {
+        "type": "integer"
+      },
+      "l_partkey": {
+        "type": "text"
+      },
+      "l_suppkey": {
+        "type": "integer"
+      },
+      "l_linenumber": {
+        "type": "integer"
+      },
+      "l_quantity": {
+        "type": "double"
+      },
+      "l_extendedprice": {
+        "type": "double"
+      },
+      "l_discount": {
+        "type": "double"
+      },
+      "l_tax": {
+        "type": "double"
+      },
+      "l_returnflag": {
+        "type": "text"
+      },
+      "l_linestatus": {
+        "type": "text"
+      },
+      "l_shipdate": {
+        "type": "date"
+      },
+      "l_commitdate": {
+        "type": "date"
+      },
+      "l_receiptdate": {
+        "type": "date"
+      },
+      "l_shipinstruct": {
+        "type": "text"
+      },
+      "l_shipmode": {
+        "type": "text"
+      },
+      "l_comment": {
+        "type": "text"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/nation.mapping.json b/integ-test/script/data/nation.mapping.json
new file mode 100644
index 000000000..d0e82e559
--- /dev/null
+++ b/integ-test/script/data/nation.mapping.json
@@ -0,0 +1,18 @@
+{
+  "mappings": {
+    "properties": {
+      "n_nationkey": {
+        "type": "integer"
+      },
+      "n_name": {
+        "type": "text"
+      },
+      "n_regionkey": {
+        "type": "integer"
+      },
+      "n_comment": {
+        "type": "text"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/nested.json b/integ-test/script/data/nested.json
new file mode 100644
index 000000000..eb8af683b
--- /dev/null
+++ b/integ-test/script/data/nested.json
@@ -0,0 +1,10 @@
+{"index": {"_index": "nested"}}
+{"int_col": 30, "struct_col": {"field1": {"subfield": "value1"}, "field2": 123}, "struct_col2": {"field1": {"subfield": "valueA"}, "field2": 23}}
+{"index": {"_index": "nested"}}
+{"int_col": 40, "struct_col": {"field1": {"subfield": "value5"}, "field2": 123}, "struct_col2": {"field1": {"subfield": "valueB"}, "field2": 33}}
+{"index": {"_index": "nested"}}
+{"int_col": 30, "struct_col": {"field1": {"subfield": "value4"}, "field2": 823}, "struct_col2": {"field1": {"subfield": "valueC"}, "field2": 83}}
+{"index": {"_index": "nested"}}
+{"int_col": 40, "struct_col": {"field1": {"subfield": "value2"}, "field2": 456}, "struct_col2": {"field1": {"subfield": "valueD"}, "field2": 46}}
+{"index": {"_index": "nested"}}
+{"int_col": 50, "struct_col": {"field1": {"subfield": "value3"}, "field2": 789}, "struct_col2": {"field1": {"subfield": "valueE"}, "field2": 89}}
diff --git a/integ-test/script/data/nested.mapping.json b/integ-test/script/data/nested.mapping.json
new file mode 100644
index 000000000..1aa189415
--- /dev/null
+++ b/integ-test/script/data/nested.mapping.json
@@ -0,0 +1,37 @@
+{
+  "mappings": {
+    "properties": {
+      "int_col": {
+        "type": "integer"
+      },
+      "struct_col": {
+        "properties": {
+          "field1": {
+            "properties": {
+              "subfield": {
+                "type": "text"
+              }
+            }
+          },
+          "field2": {
+            "type": "integer"
+          }
+        }
+      },
+      "struct_col2": {
+        "properties": {
+          "field1": {
+            "properties": {
+              "subfield": {
+                "type": "text"
+              }
+            }
+          },
+          "field2": {
+            "type": "integer"
+          }
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/orders.mapping.json b/integ-test/script/data/orders.mapping.json
new file mode 100644
index 000000000..59b3cecdd
--- /dev/null
+++ b/integ-test/script/data/orders.mapping.json
@@ -0,0 +1,33 @@
+{
+  "mappings": {
+    "properties": {
+      "o_orderkey": {
+        "type": "integer"
+      },
+      "o_custkey": {
+        "type": "integer"
+      },
+      "o_orderstatus": {
+        "type": "text"
+      },
+      "o_totalprice": {
+        "type": "double"
+      },
+      "o_orderdate": {
+        "type": "date"
+      },
+      "o_orderpriority": {
+        "type": "text"
+      },
+      "o_clerk": {
+        "type": "text"
+      },
+      "o_shippriority": {
+        "type": "integer"
+      },
+      "o_comment": {
+        "type": "text"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/part.mapping.json b/integ-test/script/data/part.mapping.json
new file mode 100644
index 000000000..8be7e9aa0
--- /dev/null
+++ b/integ-test/script/data/part.mapping.json
@@ -0,0 +1,33 @@
+{
+  "mappings": {
+    "properties": {
+      "p_partkey": {
+        "type": "integer"
+      },
+      "p_name": {
+        "type": "text"
+      },
+      "p_mfgr": {
+        "type": "text"
+      },
+      "p_brand": {
+        "type": "text"
+      },
+      "p_type": {
+        "type": "text"
+      },
+      "p_size": {
+        "type": "integer"
+      },
+      "p_container": {
+        "type": "text"
+      },
+      "p_retailprice": {
+        "type": "double"
+      },
+      "p_comment": {
+        "type": "text"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/partsupp.mapping.json b/integ-test/script/data/partsupp.mapping.json
new file mode 100644
index 000000000..13509ad46
--- /dev/null
+++ b/integ-test/script/data/partsupp.mapping.json
@@ -0,0 +1,21 @@
+{
+  "mappings": {
+    "properties": {
+      "ps_partkey": {
+        "type": "integer"
+      },
+      "ps_suppkey": {
+        "type": "integer"
+      },
+      "ps_availqty": {
+        "type": "integer"
+      },
+      "ps_supplycost": {
+        "type": "double"
+      },
+      "ps_comment": {
+        "type": "text"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/people.json b/integ-test/script/data/people.json
new file mode 100644
index 000000000..4563a2c4b
--- /dev/null
+++ b/integ-test/script/data/people.json
@@ -0,0 +1,12 @@
+{"index": {"_index": "people"}}
+{"@timestamp": 1718458823000, "id": 1000, "name": "Jake", "occupation": "Engineer", "country": "England", "salary": 100000}
+{"index": {"_index": "people"}}
+{"@timestamp": 1718458833000, "id": 1001, "name": "Hello", "occupation": "Artist", "country": "USA", "salary": 70000}
+{"index": {"_index": "people"}}
+{"@timestamp": 1718458843000, "id": 1002, "name": "John", "occupation": "Doctor", "country": "Canada", "salary": 120000}
+{"index": {"_index": "people"}}
+{"@timestamp": 1718458853000, "id": 1003, "name": "David", "occupation": "Doctor", "country": null, "salary": 120000}
+{"index": {"_index": "people"}}
+{"@timestamp": 1718458863000, "id": 1004, "name": "David", "occupation": null, "country": "Canada", "salary": 0}
+{"index": {"_index": "people"}}
+{"@timestamp": 1718458873000, "id": 1005, "name": "Jane", "occupation": "Scientist", "country": "Canada", "salary": 90000}
diff --git a/integ-test/script/data/people.mapping.json b/integ-test/script/data/people.mapping.json
new file mode 100644
index 000000000..b5dde8ff6
--- /dev/null
+++ b/integ-test/script/data/people.mapping.json
@@ -0,0 +1,24 @@
+{
+  "mappings": {
+    "properties": {
+      "@timestamp": {
+        "type": "date"
+      },
+      "id": {
+        "type": "integer"
+      },
+      "name": {
+        "type": "text"
+      },
+      "occupation": {
+        "type": "text"
+      },
+      "country": {
+        "type": "text"
+      },
+      "salary": {
+        "type": "integer"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/region.mapping.json b/integ-test/script/data/region.mapping.json
new file mode 100644
index 000000000..3dddbc580
--- /dev/null
+++ b/integ-test/script/data/region.mapping.json
@@ -0,0 +1,15 @@
+{
+  "mappings": {
+    "properties": {
+      "r_regionkey": {
+        "type": "integer"
+      },
+      "r_name": {
+        "type": "text"
+      },
+      "r_comment": {
+        "type": "text"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/supplier.mapping.json b/integ-test/script/data/supplier.mapping.json
new file mode 100644
index 000000000..bdcb933b6
--- /dev/null
+++ b/integ-test/script/data/supplier.mapping.json
@@ -0,0 +1,27 @@
+{
+  "mappings": {
+    "properties": {
+      "s_suppkey": {
+        "type": "integer"
+      },
+      "s_name": {
+        "type": "text"
+      },
+      "s_address": {
+        "type": "text"
+      },
+      "s_nationkey": {
+        "type": "integer"
+      },
+      "s_phone": {
+        "type": "text"
+      },
+      "s_acctbal": {
+        "type": "double"
+      },
+      "s_comment": {
+        "type": "text"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/data/work_info.json b/integ-test/script/data/work_info.json
new file mode 100644
index 000000000..64802bdad
--- /dev/null
+++ b/integ-test/script/data/work_info.json
@@ -0,0 +1,10 @@
+{"index": {"_index": "work_info"}}
+{"uid": 1000, "name": "Jake", "department": "IT", "occupation": "Engineer"}
+{"index": {"_index": "work_info"}}
+{"uid": 1002, "name": "John", "department": "DATA", "occupation": "Scientist"}
+{"index": {"_index": "work_info"}}
+{"uid": 1003, "name": "David", "department": "HR", "occupation": "Doctor"}
+{"index": {"_index": "work_info"}}
+{"uid": 1005, "name": "Jane", "department": "DATA", "occupation": "Engineer"}
+{"index": {"_index": "work_info"}}
+{"uid": 1006, "name": "Tom", "department": "SALES", "occupation": "Artist"}
diff --git a/integ-test/script/data/work_info.mapping.json b/integ-test/script/data/work_info.mapping.json
new file mode 100644
index 000000000..3fb5e2c28
--- /dev/null
+++ b/integ-test/script/data/work_info.mapping.json
@@ -0,0 +1,18 @@
+{
+  "mappings": {
+    "properties": {
+      "uid": {
+        "type": "integer"
+      },
+      "name": {
+        "type": "text"
+      },
+      "department": {
+        "type": "text"
+      },
+      "occupation": {
+        "type": "text"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/integ-test/script/test_cases.csv b/integ-test/script/test_cases.csv
index 7df05f5a3..91500efea 100644
--- a/integ-test/script/test_cases.csv
+++ b/integ-test/script/test_cases.csv
@@ -1,116 +1,116 @@
 query,expected_status
-describe myglue_test.default.http_logs,FAILED
-describe `myglue_test`.`default`.`http_logs`,FAILED
-"source = myglue_test.default.http_logs | dedup 1 status | fields @timestamp, clientip, status, size | head 10",SUCCESS
-"source = myglue_test.default.http_logs | dedup status, size | head 10",SUCCESS
-source = myglue_test.default.http_logs | dedup 1 status keepempty=true | head 10,SUCCESS
-"source = myglue_test.default.http_logs | dedup status, size keepempty=true | head 10",SUCCESS
-source = myglue_test.default.http_logs | dedup 2 status | head 10,SUCCESS
-"source = myglue_test.default.http_logs | dedup 2 status, size | head 10",SUCCESS
-"source = myglue_test.default.http_logs | dedup 2 status, size keepempty=true | head 10",SUCCESS
-source = myglue_test.default.http_logs | dedup status CONSECUTIVE=true | fields status,FAILED
-"source = myglue_test.default.http_logs | dedup 2 status, size  CONSECUTIVE=true | fields status",FAILED
-"source = myglue_test.default.http_logs | sort stat | fields @timestamp, clientip, status | head 10",SUCCESS
-"source = myglue_test.default.http_logs | fields @timestamp, notexisted | head 10",FAILED
-"source = myglue_test.default.nested | fields int_col, struct_col.field1, struct_col2.field1 | head 10",FAILED
-"source = myglue_test.default.nested | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield",FAILED
-"source = myglue_test.default.http_logs | fields - @timestamp, clientip, status | head 10",SUCCESS
-"source = myglue_test.default.http_logs | eval new_time = @timestamp, new_clientip = clientip | fields - new_time, new_clientip, status | head 10",SUCCESS
-source = myglue_test.default.http_logs | eval new_clientip = lower(clientip) | fields - new_clientip | head 10,SUCCESS
-"source = myglue_test.default.http_logs | fields + @timestamp, clientip, status | fields - clientip, status | head 10",SUCCESS
-"source = myglue_test.default.http_logs | fields - clientip, status  | fields + @timestamp, clientip, status| head 10",SUCCESS
-source = myglue_test.default.http_logs | where status = 200 | head 10,SUCCESS
-source = myglue_test.default.http_logs | where status != 200 | head 10,SUCCESS
-source = myglue_test.default.http_logs | where size > 0 | head 10,SUCCESS
-source = myglue_test.default.http_logs | where size <= 0 | head 10,SUCCESS
-source = myglue_test.default.http_logs | where clientip = '236.14.2.0' | head 10,SUCCESS
-source = myglue_test.default.http_logs | where size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100,SUCCESS
-"source = myglue_test.default.http_logs | where size <= 0 AND like(request, 'GET%')  | head 10",SUCCESS
-source = myglue_test.default.http_logs status = 200 | head 10,SUCCESS
-source = myglue_test.default.http_logs size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100,SUCCESS
-"source = myglue_test.default.http_logs size <= 0 AND like(request, 'GET%') | head 10",SUCCESS
-"source = myglue_test.default.http_logs substring(clientip, 5, 2) = ""12"" | head 10",SUCCESS
-source = myglue_test.default.http_logs | where isempty(size),FAILED
-source = myglue_test.default.http_logs | where ispresent(size),FAILED
-source = myglue_test.default.http_logs | where isnull(size) | head 10,SUCCESS
-source = myglue_test.default.http_logs | where isnotnull(size) | head 10,SUCCESS
-"source = myglue_test.default.http_logs | where isnotnull(coalesce(size, status)) | head 10",FAILED
-"source = myglue_test.default.http_logs | where like(request, 'GET%') | head 10",SUCCESS
-"source = myglue_test.default.http_logs | where like(request, '%bordeaux%') | head 10",SUCCESS
-"source = myglue_test.default.http_logs | where substring(clientip, 5, 2) = ""12"" | head 10",SUCCESS
-"source = myglue_test.default.http_logs | where lower(request) = ""get /images/backnews.gif http/1.0"" | head 10",SUCCESS
-source = myglue_test.default.http_logs | where length(request) = 38 | head 10,SUCCESS
-"source = myglue_test.default.http_logs | where case(status = 200, 'success' else 'failed') = 'success' | head 10",FAILED
-"source = myglue_test.default.http_logs | eval h = ""Hello"",  w = ""World"" | head 10",SUCCESS
-"source = myglue_test.default.http_logs | eval @h = ""Hello"" | eval @w = ""World"" | fields @timestamp, @h, @w",SUCCESS
-source = myglue_test.default.http_logs | eval newF = clientip | head 10,SUCCESS
-"source = myglue_test.default.http_logs | eval newF = clientip | fields clientip, newF | head 10",SUCCESS
-"source = myglue_test.default.http_logs | eval f = size | where f > 1 | sort f | fields size, clientip, status | head 10",SUCCESS
-"source = myglue_test.default.http_logs | eval f = status * 2 | eval h = f * 2 | fields status, f, h | head 10",SUCCESS
-"source = myglue_test.default.http_logs | eval f = size * 2, h = status | stats sum(f) by h",SUCCESS
-"source = myglue_test.default.http_logs | eval f = UPPER(request) | eval h = 40 | fields f, h | head 10",SUCCESS
-"source = myglue_test.default.http_logs | eval request = ""test"" | fields request | head 10",FAILED
-source = myglue_test.default.http_logs | eval size = abs(size) | where size < 500,FAILED
-"source = myglue_test.default.http_logs | eval status_string = case(status = 200, 'success' else 'failed') | head 10",FAILED
-"source = myglue_test.default.http_logs | eval n = now() | eval t = unix_timestamp(@timestamp) | fields n, t | head 10",SUCCESS
-source = myglue_test.default.http_logs | eval e = isempty(size) | eval p = ispresent(size) | head 10,FAILED
-"source = myglue_test.default.http_logs | eval c = coalesce(size, status) | head 10",FAILED
-source = myglue_test.default.http_logs | eval c = coalesce(request) | head 10,FAILED
-source = myglue_test.default.http_logs | eval col1 = ln(size) | eval col2 = unix_timestamp(@timestamp) | sort - col1 | head 10,SUCCESS
-"source = myglue_test.default.http_logs | eval col1 = 1 | sort col1 | head 4 | eval col2 = 2 | sort - col2 | sort - size | head 2 | fields @timestamp, clientip, col2",SUCCESS
-"source = myglue_test.default.mini_http_logs | eval stat = status | where stat > 300 | sort stat | fields @timestamp,clientip,status | head 5",SUCCESS
-"source = myglue_test.default.http_logs |  eval col1 = size, col2 = clientip | stats avg(col1) by col2",SUCCESS
-source = myglue_test.default.http_logs | stats avg(size) by clientip,SUCCESS
-"source = myglue_test.default.http_logs | eval new_request = upper(request) | eval compound_field = concat('Hello ', if(like(new_request, '%bordeaux%'), 'World', clientip)) | fields new_request, compound_field | head 10",SUCCESS
-source = myglue_test.default.http_logs | stats avg(size),SUCCESS
-source = myglue_test.default.nested | stats max(int_col) by struct_col.field2,SUCCESS
-source = myglue_test.default.nested | stats distinct_count(int_col),SUCCESS
-source = myglue_test.default.nested | stats stddev_samp(int_col),SUCCESS
-source = myglue_test.default.nested | stats stddev_pop(int_col),SUCCESS
-source = myglue_test.default.nested | stats percentile(int_col),SUCCESS
-source = myglue_test.default.nested | stats percentile_approx(int_col),SUCCESS
-source = myglue_test.default.mini_http_logs | stats stddev_samp(status),SUCCESS
-"source = myglue_test.default.mini_http_logs | where stats > 200 | stats percentile_approx(status, 99)",SUCCESS
-"source = myglue_test.default.nested | stats count(int_col) by span(struct_col.field2, 10) as a_span",SUCCESS
-"source = myglue_test.default.nested | stats avg(int_col) by span(struct_col.field2, 10) as a_span, struct_col2.field2",SUCCESS
-"source = myglue_test.default.http_logs | stats sum(size) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10",SUCCESS
-"source = myglue_test.default.http_logs | stats distinct_count(clientip) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10",SUCCESS
-"source = myglue_test.default.http_logs | stats avg(size) as avg_size by status, year | stats avg(avg_size) as avg_avg_size by year",SUCCESS
-"source = myglue_test.default.http_logs | stats avg(size) as avg_size by status, year, month | stats avg(avg_size) as avg_avg_size by year, month | stats avg(avg_avg_size) as avg_avg_avg_size by year",SUCCESS
-"source = myglue_test.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | stats avg(avg_int) as avg_avg_int by struct_col2.field2",FAILED
-"source = myglue_test.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | eval new_col = avg_int | stats avg(avg_int) as avg_avg_int by new_col",SUCCESS
-source = myglue_test.default.nested | rare int_col,SUCCESS
-source = myglue_test.default.nested | rare int_col by struct_col.field2,SUCCESS
-source = myglue_test.default.http_logs | rare request,SUCCESS
-source = myglue_test.default.http_logs | where status > 300 | rare request by status,SUCCESS
-source = myglue_test.default.http_logs | rare clientip,SUCCESS
-source = myglue_test.default.http_logs | where status > 300 | rare clientip,SUCCESS
-source = myglue_test.default.http_logs | where status > 300 | rare clientip by day,SUCCESS
-source = myglue_test.default.nested | top int_col by struct_col.field2,SUCCESS
-source = myglue_test.default.nested | top 1 int_col by struct_col.field2,SUCCESS
-source = myglue_test.default.nested | top 2 int_col by struct_col.field2,SUCCESS
-source = myglue_test.default.nested | top int_col,SUCCESS
-source = myglue_test.default.http_logs | inner join left=l right=r on l.status = r.int_col myglue_test.default.nested | head 10,FAILED
-"source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | fields request, domain | head 10",SUCCESS
-source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | top 1 domain,SUCCESS
-source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | stats count() by domain,SUCCESS
-"source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | eval a = 1 | fields a, domain | head 10",SUCCESS
-"source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | where size > 0 | sort - size | fields size, domain | head 10",SUCCESS
-"source = myglue_test.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/(?<picName>[a-zA-Z]+)/.*' | where domain = 'english' | sort - picName | fields domain, picName | head 10",SUCCESS
-source = myglue_test.default.http_logs | patterns request | fields patterns_field | head 10,SUCCESS
-source = myglue_test.default.http_logs | patterns request | where size > 0 | fields patterns_field | head 10,SUCCESS
-"source = myglue_test.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | fields request, no_letter | head 10",SUCCESS
-source = myglue_test.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | stats count() by no_letter,SUCCESS
-"source = myglue_test.default.http_logs | patterns new_field='status' pattern='[a-zA-Z]' request | fields request, status | head 10",FAILED
-source = myglue_test.default.http_logs | rename @timestamp as timestamp | head 10,FAILED
-source = myglue_test.default.http_logs | sort size | head 10,SUCCESS
-source = myglue_test.default.http_logs | sort + size | head 10,SUCCESS
-source = myglue_test.default.http_logs | sort - size | head 10,SUCCESS
-"source = myglue_test.default.http_logs | sort + size, + @timestamp | head 10",SUCCESS
-"source = myglue_test.default.http_logs | sort - size, - @timestamp | head 10",SUCCESS
-"source = myglue_test.default.http_logs | sort - size, @timestamp | head 10",SUCCESS
-"source = myglue_test.default.http_logs | eval c1 = upper(request) | eval c2 = concat('Hello ', if(like(c1, '%bordeaux%'), 'World', clientip)) | eval c3 = length(request) | eval c4 = ltrim(request) | eval c5 = rtrim(request) | eval c6 = substring(clientip, 5, 2) | eval c7 = trim(request) | eval c8 = upper(request) | eval c9 = position('bordeaux' IN request) | eval c10 = replace(request, 'GET', 'GGG') | fields c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 | head 10",SUCCESS
-"source = myglue_test.default.http_logs | eval c1 = unix_timestamp(@timestamp) | eval c2 = now() | eval c3 =
+describe dev.default.http_logs,FAILED
+describe `dev`.`default`.`http_logs`,FAILED
+"source = dev.default.http_logs | dedup 1 status | fields @timestamp, clientip, status, size | head 10",SUCCESS
+"source = dev.default.http_logs | dedup status, size | head 10",SUCCESS
+source = dev.default.http_logs | dedup 1 status keepempty=true | head 10,SUCCESS
+"source = dev.default.http_logs | dedup status, size keepempty=true | head 10",SUCCESS
+source = dev.default.http_logs | dedup 2 status | head 10,SUCCESS
+"source = dev.default.http_logs | dedup 2 status, size | head 10",SUCCESS
+"source = dev.default.http_logs | dedup 2 status, size keepempty=true | head 10",SUCCESS
+source = dev.default.http_logs | dedup status CONSECUTIVE=true | fields status,FAILED
+"source = dev.default.http_logs | dedup 2 status, size  CONSECUTIVE=true | fields status",FAILED
+"source = dev.default.http_logs | sort status | fields @timestamp, clientip, status | head 10",SUCCESS
+"source = dev.default.http_logs | fields @timestamp, notexisted | head 10",FAILED
+"source = dev.default.nested | fields int_col, struct_col.field1, struct_col2.field1 | head 10",SUCCESS
+"source = dev.default.nested | where struct_col2.field1.subfield > 'valueA' | sort int_col | fields int_col, struct_col.field1.subfield, struct_col2.field1.subfield",SUCCESS
+"source = dev.default.http_logs | fields - @timestamp, clientip, status | head 10",SUCCESS
+"source = dev.default.http_logs | eval new_time = @timestamp, new_clientip = clientip | fields - new_time, new_clientip, status | head 10",SUCCESS
+source = dev.default.http_logs | eval new_clientip = lower(clientip) | fields - new_clientip | head 10,SUCCESS
+"source = dev.default.http_logs | fields + @timestamp, clientip, status | fields - clientip, status | head 10",SUCCESS
+"source = dev.default.http_logs | fields - clientip, status  | fields + @timestamp, clientip, status| head 10",SUCCESS
+source = dev.default.http_logs | where status = 200 | head 10,SUCCESS
+source = dev.default.http_logs | where status != 200 | head 10,SUCCESS
+source = dev.default.http_logs | where size > 0 | head 10,SUCCESS
+source = dev.default.http_logs | where size <= 0 | head 10,SUCCESS
+source = dev.default.http_logs | where clientip = '236.14.2.0' | head 10,SUCCESS
+source = dev.default.http_logs | where size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100,SUCCESS
+"source = dev.default.http_logs | where size <= 0 AND like(request, 'GET%')  | head 10",SUCCESS
+source = dev.default.http_logs status = 200 | head 10,SUCCESS
+source = dev.default.http_logs size > 0 AND status = 200 OR clientip = '236.14.2.0' | head 100,SUCCESS
+"source = dev.default.http_logs size <= 0 AND like(request, 'GET%') | head 10",SUCCESS
+"source = dev.default.http_logs substring(clientip, 5, 2) = ""12"" | head 10",SUCCESS
+source = dev.default.http_logs | where isempty(size),SUCCESS
+source = dev.default.http_logs | where ispresent(size),SUCCESS
+source = dev.default.http_logs | where isnull(size) | head 10,SUCCESS
+source = dev.default.http_logs | where isnotnull(size) | head 10,SUCCESS
+"source = dev.default.http_logs | where isnotnull(coalesce(size, status)) | head 10",SUCCESS
+"source = dev.default.http_logs | where like(request, 'GET%') | head 10",SUCCESS
+"source = dev.default.http_logs | where like(request, '%bordeaux%') | head 10",SUCCESS
+"source = dev.default.http_logs | where substring(clientip, 5, 2) = ""12"" | head 10",SUCCESS
+"source = dev.default.http_logs | where lower(request) = ""get /images/backnews.gif http/1.0"" | head 10",SUCCESS
+source = dev.default.http_logs | where length(request) = 38 | head 10,SUCCESS
+"source = dev.default.http_logs | where case(status = 200, 'success' else 'failed') = 'success' | head 10",SUCCESS
+"source = dev.default.http_logs | eval h = ""Hello"",  w = ""World"" | head 10",SUCCESS
+"source = dev.default.http_logs | eval @h = ""Hello"" | eval @w = ""World"" | fields @timestamp, @h, @w",SUCCESS
+source = dev.default.http_logs | eval newF = clientip | head 10,SUCCESS
+"source = dev.default.http_logs | eval newF = clientip | fields clientip, newF | head 10",SUCCESS
+"source = dev.default.http_logs | eval f = size | where f > 1 | sort f | fields size, clientip, status | head 10",SUCCESS
+"source = dev.default.http_logs | eval f = status * 2 | eval h = f * 2 | fields status, f, h | head 10",SUCCESS
+"source = dev.default.http_logs | eval f = size * 2, h = status | stats sum(f) by h",SUCCESS
+"source = dev.default.http_logs | eval f = UPPER(request) | eval h = 40 | fields f, h | head 10",SUCCESS
+"source = dev.default.http_logs | eval request = ""test"" | fields request | head 10",FAILED
+source = dev.default.http_logs | eval size = abs(size) | where size < 500,FAILED
+"source = dev.default.http_logs | eval status_string = case(status = 200, 'success' else 'failed') | head 10",SUCCESS
+"source = dev.default.http_logs | eval n = now() | eval t = unix_timestamp(@timestamp) | fields n, t | head 10",SUCCESS
+source = dev.default.http_logs | eval e = isempty(size) | eval p = ispresent(size) | head 10,SUCCESS
+"source = dev.default.http_logs | eval c = coalesce(size, status) | head 10",SUCCESS
+source = dev.default.http_logs | eval c = coalesce(request) | head 10,SUCCESS
+source = dev.default.http_logs | eval col1 = ln(size) | eval col2 = unix_timestamp(@timestamp) | sort - col1 | head 10,SUCCESS
+"source = dev.default.http_logs | eval col1 = 1 | sort col1 | head 4 | eval col2 = 2 | sort - col2 | sort - size | head 2 | fields @timestamp, clientip, col2",SUCCESS
+"source = dev.default.http_logs | eval stat = status | where stat > 300 | sort stat | fields @timestamp,clientip,status | head 5",SUCCESS
+"source = dev.default.http_logs |  eval col1 = size, col2 = clientip | stats avg(col1) by col2",SUCCESS
+source = dev.default.http_logs | stats avg(size) by clientip,SUCCESS
+"source = dev.default.http_logs | eval new_request = upper(request) | eval compound_field = concat('Hello ', if(like(new_request, '%bordeaux%'), 'World', clientip)) | fields new_request, compound_field | head 10",SUCCESS
+source = dev.default.http_logs | stats avg(size),SUCCESS
+source = dev.default.nested | stats max(int_col) by struct_col.field2,SUCCESS
+source = dev.default.nested | stats distinct_count(int_col),SUCCESS
+source = dev.default.nested | stats stddev_samp(int_col),SUCCESS
+source = dev.default.nested | stats stddev_pop(int_col),SUCCESS
+"source = dev.default.nested | stats percentile(int_col, 90)",SUCCESS
+"source = dev.default.nested | stats percentile_approx(int_col, 99)",SUCCESS
+source = dev.default.http_logs | stats stddev_samp(status),SUCCESS
+"source = dev.default.http_logs | where status > 200 | stats percentile_approx(status, 99)",SUCCESS
+"source = dev.default.nested | stats count(int_col) by span(struct_col.field2, 10) as a_span",SUCCESS
+"source = dev.default.nested | stats avg(int_col) by span(struct_col.field2, 10) as a_span, struct_col2.field2",SUCCESS
+"source = dev.default.http_logs | stats sum(size) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10",SUCCESS
+"source = dev.default.http_logs | stats distinct_count(clientip) by span(@timestamp, 1d) as age_size_per_day | sort - age_size_per_day | head 10",SUCCESS
+"source = dev.default.http_logs | stats avg(size) as avg_size by status, year | stats avg(avg_size) as avg_avg_size by year",SUCCESS
+"source = dev.default.http_logs | stats avg(size) as avg_size by status, year, month | stats avg(avg_size) as avg_avg_size by year, month | stats avg(avg_avg_size) as avg_avg_avg_size by year",SUCCESS
+"source = dev.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | stats avg(avg_int) as avg_avg_int by struct_col2.field2",FAILED
+"source = dev.default.nested | stats avg(int_col) as avg_int by struct_col.field2, struct_col2.field2 | eval new_col = avg_int | stats avg(avg_int) as avg_avg_int by new_col",SUCCESS
+source = dev.default.nested | rare int_col,SUCCESS
+source = dev.default.nested | rare int_col by struct_col.field2,SUCCESS
+source = dev.default.http_logs | rare request,SUCCESS
+source = dev.default.http_logs | where status > 300 | rare request by status,SUCCESS
+source = dev.default.http_logs | rare clientip,SUCCESS
+source = dev.default.http_logs | where status > 300 | rare clientip,SUCCESS
+source = dev.default.http_logs | where status > 300 | rare clientip by day,SUCCESS
+source = dev.default.nested | top int_col by struct_col.field2,SUCCESS
+source = dev.default.nested | top 1 int_col by struct_col.field2,SUCCESS
+source = dev.default.nested | top 2 int_col by struct_col.field2,SUCCESS
+source = dev.default.nested | top int_col,SUCCESS
+source = dev.default.http_logs | inner join left=l right=r on l.status = r.int_col dev.default.nested | head 10,SUCCESS
+"source = dev.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | fields request, domain | head 10",SUCCESS
+source = dev.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | top 1 domain,SUCCESS
+source = dev.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | stats count() by domain,SUCCESS
+"source = dev.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | eval a = 1 | fields a, domain | head 10",SUCCESS
+"source = dev.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/.*' | where size > 0 | sort - size | fields size, domain | head 10",SUCCESS
+"source = dev.default.http_logs | parse request 'GET /(?<domain>[a-zA-Z]+)/(?<picName>[a-zA-Z]+)/.*' | where domain = 'english' | sort - picName | fields domain, picName | head 10",SUCCESS
+source = dev.default.http_logs | patterns request | fields patterns_field | head 10,SUCCESS
+source = dev.default.http_logs | patterns request | where size > 0 | fields patterns_field | head 10,SUCCESS
+"source = dev.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | fields request, no_letter | head 10",SUCCESS
+source = dev.default.http_logs | patterns new_field='no_letter' pattern='[a-zA-Z]' request | stats count() by no_letter,SUCCESS
+"source = dev.default.http_logs | patterns new_field='status' pattern='[a-zA-Z]' request | fields request, status | head 10",FAILED
+source = dev.default.http_logs | rename @timestamp as timestamp | head 10,SUCCESS
+source = dev.default.http_logs | sort size | head 10,SUCCESS
+source = dev.default.http_logs | sort + size | head 10,SUCCESS
+source = dev.default.http_logs | sort - size | head 10,SUCCESS
+"source = dev.default.http_logs | sort + size, + @timestamp | head 10",SUCCESS
+"source = dev.default.http_logs | sort - size, - @timestamp | head 10",SUCCESS
+"source = dev.default.http_logs | sort - size, @timestamp | head 10",SUCCESS
+"source = dev.default.http_logs | eval c1 = upper(request) | eval c2 = concat('Hello ', if(like(c1, '%bordeaux%'), 'World', clientip)) | eval c3 = length(request) | eval c4 = ltrim(request) | eval c5 = rtrim(request) | eval c6 = substring(clientip, 5, 2) | eval c7 = trim(request) | eval c8 = upper(request) | eval c9 = position('bordeaux' IN request) | eval c10 = replace(request, 'GET', 'GGG') | fields c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 | head 10",SUCCESS
+"source = dev.default.http_logs | eval c1 = unix_timestamp(@timestamp) | eval c2 = now() | eval c3 =
 DAY_OF_WEEK(@timestamp) | eval c4 =
 DAY_OF_MONTH(@timestamp) | eval c5 =
 DAY_OF_YEAR(@timestamp) | eval c6 =
@@ -121,151 +121,151 @@ HOUR_OF_DAY(@timestamp) | eval c10 =
 MINUTE_OF_HOUR(@timestamp) | eval c11 =
 SECOND_OF_MINUTE(@timestamp) | eval c12 =
 LOCALTIME() | fields c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12 | head 10",SUCCESS
-"source=myglue_test.default.people  | eval c1 = adddate(@timestamp, 1) | fields c1 | head 10",SUCCESS
-"source=myglue_test.default.people  | eval c2 = subdate(@timestamp, 1) | fields c2 | head 10",SUCCESS
-source=myglue_test.default.people  | eval c1 = date_add(@timestamp INTERVAL 1 DAY) | fields c1 | head 10,SUCCESS
-source=myglue_test.default.people  | eval c1 = date_sub(@timestamp INTERVAL 1 DAY) | fields c1 | head 10,SUCCESS
-source=myglue_test.default.people | eval `CURDATE()` = CURDATE() | fields `CURDATE()`,SUCCESS
-source=myglue_test.default.people | eval `CURRENT_DATE()` = CURRENT_DATE() | fields `CURRENT_DATE()`,SUCCESS
-source=myglue_test.default.people | eval `CURRENT_TIMESTAMP()` = CURRENT_TIMESTAMP() | fields `CURRENT_TIMESTAMP()`,SUCCESS
-source=myglue_test.default.people | eval `DATE('2020-08-26')` = DATE('2020-08-26') | fields `DATE('2020-08-26')`,SUCCESS
-source=myglue_test.default.people  | eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) | fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))`,SUCCESS
-source=myglue_test.default.people  | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')`,SUCCESS
-"source=myglue_test.default.people  | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')` = DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')`",SUCCESS
-"source=myglue_test.default.people  | eval `'2000-01-02' - '2000-01-01'` = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')), `'2001-02-01' - '2004-01-01'` = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')) | fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`",
-source=myglue_test.default.people  | eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) | fields `DAY(DATE('2020-08-26'))`,
-source=myglue_test.default.people  | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))`,FAILED
-source=myglue_test.default.people  | eval `CURRENT_TIMEZONE()` = CURRENT_TIMEZONE() | fields `CURRENT_TIMEZONE()`,SUCCESS
-source=myglue_test.default.people  | eval `UTC_TIMESTAMP()` = UTC_TIMESTAMP() | fields `UTC_TIMESTAMP()`,SUCCESS
-"source=myglue_test.default.people  | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))` = TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))`",SUCCESS
-"source=myglue_test.default.people  | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')`",SUCCESS
- source = myglue_test.default.http_logs | stats count(),SUCCESS
-"source = myglue_test.default.http_logs | stats avg(size) as c1, max(size) as c2, min(size) as c3, sum(size) as c4, percentile(size, 50) as c5, stddev_pop(size) as c6, stddev_samp(size) as c7, distinct_count(size) as c8",SUCCESS
-"source = myglue_test.default.http_logs | eval c1 = abs(size) | eval c2 = ceil(size) | eval c3 = floor(size) | eval c4 = sqrt(size) | eval c5 = ln(size) | eval c6 = pow(size, 2) | eval c7 = mod(size, 2) | fields c1, c2, c3, c4, c5, c6, c7 | head 10",SUCCESS
-"source = myglue_test.default.http_logs | eval c1 = isnull(request) | eval c2 = isnotnull(request) | eval c3 = ifnull(request,
+"source=dev.default.people  | eval c1 = adddate(@timestamp, 1) | fields c1 | head 10",SUCCESS
+"source=dev.default.people  | eval c2 = subdate(@timestamp, 1) | fields c2 | head 10",SUCCESS
+source=dev.default.people  | eval c1 = date_add(@timestamp INTERVAL 1 DAY) | fields c1 | head 10,SUCCESS
+source=dev.default.people  | eval c1 = date_sub(@timestamp INTERVAL 1 DAY) | fields c1 | head 10,SUCCESS
+source=dev.default.people | eval `CURDATE()` = CURDATE() | fields `CURDATE()`,SUCCESS
+source=dev.default.people | eval `CURRENT_DATE()` = CURRENT_DATE() | fields `CURRENT_DATE()`,SUCCESS
+source=dev.default.people | eval `CURRENT_TIMESTAMP()` = CURRENT_TIMESTAMP() | fields `CURRENT_TIMESTAMP()`,SUCCESS
+source=dev.default.people | eval `DATE('2020-08-26')` = DATE('2020-08-26') | fields `DATE('2020-08-26')`,SUCCESS
+source=dev.default.people  | eval `DATE(TIMESTAMP('2020-08-26 13:49:00'))` = DATE(TIMESTAMP('2020-08-26 13:49:00')) | fields `DATE(TIMESTAMP('2020-08-26 13:49:00'))`,SUCCESS
+source=dev.default.people  | eval `DATE('2020-08-26 13:49')` = DATE('2020-08-26 13:49') | fields `DATE('2020-08-26 13:49')`,SUCCESS
+"source=dev.default.people  | eval `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')` = DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS'), `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')` = DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a') | fields `DATE_FORMAT('1998-01-31 13:14:15.012345', 'HH:mm:ss.SSSSSS')`, `DATE_FORMAT(TIMESTAMP('1998-01-31 13:14:15.012345'), 'yyyy-MMM-dd hh:mm:ss a')`",SUCCESS
+"source=dev.default.people  | eval `'2000-01-02' - '2000-01-01'` = DATEDIFF(TIMESTAMP('2000-01-02 00:00:00'), TIMESTAMP('2000-01-01 23:59:59')), `'2001-02-01' - '2004-01-01'` = DATEDIFF(DATE('2001-02-01'), TIMESTAMP('2004-01-01 00:00:00')) | fields `'2000-01-02' - '2000-01-01'`, `'2001-02-01' - '2004-01-01'`",SUCCESS
+source=dev.default.people  | eval `DAY(DATE('2020-08-26'))` = DAY(DATE('2020-08-26')) | fields `DAY(DATE('2020-08-26'))`,SUCCESS
+source=dev.default.people  | eval `DAYNAME(DATE('2020-08-26'))` = DAYNAME(DATE('2020-08-26')) | fields `DAYNAME(DATE('2020-08-26'))`,FAILED
+source=dev.default.people  | eval `CURRENT_TIMEZONE()` = CURRENT_TIMEZONE() | fields `CURRENT_TIMEZONE()`,SUCCESS
+source=dev.default.people  | eval `UTC_TIMESTAMP()` = UTC_TIMESTAMP() | fields `UTC_TIMESTAMP()`,SUCCESS
+"source=dev.default.people  | eval `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')` = TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00') | eval `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))` = TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00')) | fields `TIMESTAMPDIFF(YEAR, '1997-01-01 00:00:00', '2001-03-06 00:00:00')`, `TIMESTAMPDIFF(SECOND, timestamp('1997-01-01 00:00:23'), timestamp('1997-01-01 00:00:00'))`",SUCCESS
+"source=dev.default.people  | eval `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')` = TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00') | eval `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')` = TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00') | fields `TIMESTAMPADD(DAY, 17, '2000-01-01 00:00:00')`, `TIMESTAMPADD(QUARTER, -1, '2000-01-01 00:00:00')`",SUCCESS
+ source = dev.default.http_logs | stats count(),SUCCESS
+"source = dev.default.http_logs | stats avg(size) as c1, max(size) as c2, min(size) as c3, sum(size) as c4, percentile(size, 50) as c5, stddev_pop(size) as c6, stddev_samp(size) as c7, distinct_count(size) as c8",SUCCESS
+"source = dev.default.http_logs | eval c1 = abs(size) | eval c2 = ceil(size) | eval c3 = floor(size) | eval c4 = sqrt(size) | eval c5 = ln(size) | eval c6 = pow(size, 2) | eval c7 = mod(size, 2) | fields c1, c2, c3, c4, c5, c6, c7 | head 10",SUCCESS
+"source = dev.default.http_logs | eval c1 = isnull(request) | eval c2 = isnotnull(request) | eval c3 = ifnull(request,
 ""Unknown"") | eval c4 = nullif(request,
 ""Unknown"") | eval c5 = isnull(size) | eval c6 = if(like(request, '%bordeaux%'), 'hello', 'world') | fields c1, c2, c3, c4, c5, c6 | head 10",SUCCESS
-/* this is block comment */ source = myglue_test.tpch_csv.orders | head 1 // this is line comment,SUCCESS
-"/* test in tpch q16, q18, q20 */ source = myglue_test.tpch_csv.orders | head 1 // add source=xx to avoid failure in automation",SUCCESS
-"/* test in tpch q4, q21, q22 */ source = myglue_test.tpch_csv.orders | head 1",SUCCESS
-"/* test in tpch q2, q11, q15, q17, q20, q22 */ source = myglue_test.tpch_csv.orders | head 1",SUCCESS
-"/* test in tpch q7, q8, q9, q13, q15, q22 */ source = myglue_test.tpch_csv.orders | head 1",SUCCESS
-/* lots of inner join tests in tpch */ source = myglue_test.tpch_csv.orders | head 1,SUCCESS
-/* left join test in tpch q13 */ source = myglue_test.tpch_csv.orders | head 1,SUCCESS
-"source = myglue_test.tpch_csv.orders
+/* this is block comment */ source = dev.default.orders | head 1 // this is line comment,SUCCESS
+"/* test in tpch q16, q18, q20 */ source = dev.default.orders | head 1 // add source=xx to avoid failure in automation",SUCCESS
+"/* test in tpch q4, q21, q22 */ source = dev.default.orders | head 1",SUCCESS
+"/* test in tpch q2, q11, q15, q17, q20, q22 */ source = dev.default.orders | head 1",SUCCESS
+"/* test in tpch q7, q8, q9, q13, q15, q22 */ source = dev.default.orders | head 1",SUCCESS
+/* lots of inner join tests in tpch */ source = dev.default.orders | head 1,SUCCESS
+/* left join test in tpch q13 */ source = dev.default.orders | head 1,SUCCESS
+"source = dev.default.orders
  | right outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%')
-  myglue_test.tpch_csv.customer
+  dev.default.customer
 | stats count(o_orderkey) as c_count by c_custkey
 | sort - c_count",SUCCESS
-"source = myglue_test.tpch_csv.orders
+"source = dev.default.orders
  | full outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%')
-  myglue_test.tpch_csv.customer
+  dev.default.customer
 | stats count(o_orderkey) as c_count by c_custkey
 | sort - c_count",SUCCESS
-"source = myglue_test.tpch_csv.customer
-| semi join ON c_custkey = o_custkey myglue_test.tpch_csv.orders
+"source = dev.default.customer
+| semi join ON c_custkey = o_custkey dev.default.orders
 | where c_mktsegment = 'BUILDING'
  | sort - c_custkey
 | head 10",SUCCESS
-"source = myglue_test.tpch_csv.customer
-| anti join ON c_custkey = o_custkey myglue_test.tpch_csv.orders
+"source = dev.default.customer
+| anti join ON c_custkey = o_custkey dev.default.orders
 | where c_mktsegment = 'BUILDING'
  | sort - c_custkey
 | head 10",SUCCESS
-"source = myglue_test.tpch_csv.supplier
+"source = dev.default.supplier
 | where like(s_comment, '%Customer%Complaints%')
-| join ON s_nationkey > n_nationkey [ source = myglue_test.tpch_csv.nation | where n_name = 'SAUDI ARABIA' ]
+| join ON s_nationkey > n_nationkey [ source = dev.default.nation | where n_name = 'SAUDI ARABIA' ]
 | sort - s_name
 | head 10",SUCCESS
-"source = myglue_test.tpch_csv.supplier
+"source = dev.default.supplier
 | where like(s_comment, '%Customer%Complaints%')
-| join [ source = myglue_test.tpch_csv.nation | where n_name = 'SAUDI ARABIA' ]
+| join [ source = dev.default.nation | where n_name = 'SAUDI ARABIA' ]
 | sort - s_name
 | head 10",SUCCESS
-source=myglue_test.default.people | LOOKUP myglue_test.default.work_info uid AS id REPLACE department | stats distinct_count(department),SUCCESS
-source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS id APPEND department | stats distinct_count(department),SUCCESS
-source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS id REPLACE department AS country | stats distinct_count(country),SUCCESS
-source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS id APPEND department AS country | stats distinct_count(country),SUCCESS
-"source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uID AS id, name REPLACE department | stats distinct_count(department)",SUCCESS
-"source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uid AS ID, name APPEND department | stats distinct_count(department)",SUCCESS
-"source = myglue_test.default.people| LOOKUP myglue_test.default.work_info uID AS id, name | head 10",SUCCESS
-"source = myglue_test.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP myglue_test.default.work_info name REPLACE occupation AS major | stats distinct_count(major)",SUCCESS
-"source = myglue_test.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP myglue_test.default.work_info name APPEND occupation AS major | stats distinct_count(major)",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json('{""f1"":""abc"",""f2"":{""f3"":""a"",""f4"":""b""}}') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json('[1,2,3,{""f1"":1,""f2"":[5,6]},4]') | head 1 | fields res",SUCCESS
-source = myglue_test.default.http_logs | eval res = json('[]') | head 1 | fields res,SUCCESS
-"source = myglue_test.default.http_logs | eval res = json(‘{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json('{""invalid"": ""json""') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json('[1,2,3]') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json(‘[1,2') | head 1 | fields res",SUCCESS
-source = myglue_test.default.http_logs | eval res = json('[invalid json]') | head 1 | fields res,SUCCESS
-source = myglue_test.default.http_logs | eval res = json('invalid json') | head 1 | fields res,SUCCESS
-source = myglue_test.default.http_logs | eval res = json(null) | head 1 | fields res,SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_array('this', 'is', 'a', 'string', 'array') | head 1 | fields res",SUCCESS
-source = myglue_test.default.http_logs | eval res = json_array() | head 1 | fields res,SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_array(1, 2, 0, -1, 1.1, -0.11) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_array('this', 'is', 1.1, -0.11, true, false) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = to_json_string(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = array_length(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res",SUCCESS
-source = myglue_test.default.http_logs | eval res = array_length(json_array()) | head 1 | fields res,SUCCESS
-source = myglue_test.default.http_logs | eval res = json_array_length('[]') | head 1 | fields res,SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_array_length('[1,2,3,{""f1"":1,""f2"":[5,6]},4]') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_array_length('{\""key\"": 1}') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_array_length('[1,2') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', 'string_value')) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', 123.45)) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', true)) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = to_json_string(json_object(""a"", 1, ""b"", 2, ""c"", 3)) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', array())) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('key', array(1, 2, 3))) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = to_json_string(json_object('outer', json_object('inner', 123.45))) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = to_json_string(json_object(""array"", json_array(1,2,0,-1,1.1,-0.11))) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | where json_valid(('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1",SUCCESS
-"source = myglue_test.default.http_logs | where not json_valid(('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_keys(json('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}')) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_keys(json('{""f1"":""abc"",""f2"":{""f3"":""a"",""f4"":""b""}}')) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_keys(json('[1,2,3,{""f1"":1,""f2"":[5,6]},4]')) | head 1 | fields res",SUCCESS
-source = myglue_test.default.http_logs | eval res = json_keys(json('[]')) | head 1 | fields res,SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_keys(json(‘{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}')) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_keys(json('{""invalid"": ""json""')) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_keys(json('[1,2,3]')) | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_keys(json('[1,2')) | head 1 | fields res",SUCCESS
-source = myglue_test.default.http_logs | eval res = json_keys(json('[invalid json]')) | head 1 | fields res,SUCCESS
-source = myglue_test.default.http_logs | eval res = json_keys(json('invalid json')) | head 1 | fields res,SUCCESS
-source = myglue_test.default.http_logs | eval res = json_keys(json(null)) | head 1 | fields res,SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.teacher') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[*]') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[0]') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[*].name') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[1].name') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[0].not_exist_key') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[10]') | head 1 | fields res",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > 0) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > -10) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(json_object(""a"",1,""b"",-1),json_object(""a"",-1,""b"",-1)), result = forall(array, x -> x.a > 0) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(json_object(""a"",1,""b"",-1),json_object(""a"",-1,""b"",-1)), result = exists(array, x -> x.b < 0) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 0) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 10) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 0) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 10) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,3), result = transform(array, x -> x + 1) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,3), result = transform(array, (x, y) -> x + y) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x) | head 1 | fields result",SUCCESS
-"source = myglue_test.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | head 1 | fields result",SUCCESS
-source=myglue_test.default.people | eval age = salary | eventstats avg(age) | sort id | head 10,SUCCESS
-"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count | sort id | head 10",SUCCESS
-source=myglue_test.default.people | eventstats avg(salary) by country | sort id | head 10,SUCCESS
-"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by country | sort id | head 10",SUCCESS
-"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count
+source=dev.default.people | LOOKUP dev.default.work_info uid AS id REPLACE department | stats distinct_count(department),SUCCESS
+source = dev.default.people| LOOKUP dev.default.work_info uid AS id APPEND department | stats distinct_count(department),SUCCESS
+source = dev.default.people| LOOKUP dev.default.work_info uid AS id REPLACE department AS country | stats distinct_count(country),SUCCESS
+source = dev.default.people| LOOKUP dev.default.work_info uid AS id APPEND department AS country | stats distinct_count(country),SUCCESS
+"source = dev.default.people| LOOKUP dev.default.work_info uID AS id, name REPLACE department | stats distinct_count(department)",SUCCESS
+"source = dev.default.people| LOOKUP dev.default.work_info uid AS ID, name APPEND department | stats distinct_count(department)",SUCCESS
+"source = dev.default.people| LOOKUP dev.default.work_info uID AS id, name | head 10",SUCCESS
+"source = dev.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP dev.default.work_info name REPLACE occupation AS major | stats distinct_count(major)",SUCCESS
+"source = dev.default.people | eval major = occupation | fields id, name, major, country, salary | LOOKUP dev.default.work_info name APPEND occupation AS major | stats distinct_count(major)",SUCCESS
+"source = dev.default.http_logs | eval res = json('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json('{""f1"":""abc"",""f2"":{""f3"":""a"",""f4"":""b""}}') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json('[1,2,3,{""f1"":1,""f2"":[5,6]},4]') | head 1 | fields res",SUCCESS
+source = dev.default.http_logs | eval res = json('[]') | head 1 | fields res,SUCCESS
+"source = dev.default.http_logs | eval res = json('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json('{""invalid"": ""json""') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json('[1,2,3]') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json('[1,2') | head 1 | fields res",SUCCESS
+source = dev.default.http_logs | eval res = json('[invalid json]') | head 1 | fields res,SUCCESS
+source = dev.default.http_logs | eval res = json('invalid json') | head 1 | fields res,SUCCESS
+source = dev.default.http_logs | eval res = json(null) | head 1 | fields res,SUCCESS
+"source = dev.default.http_logs | eval res = json_array('this', 'is', 'a', 'string', 'array') | head 1 | fields res",SUCCESS
+source = dev.default.http_logs | eval res = json_array() | head 1 | fields res,SUCCESS
+"source = dev.default.http_logs | eval res = json_array(1, 2, 0, -1, 1.1, -0.11) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_array('this', 'is', 1.1, -0.11, true, false) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = to_json_string(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = array_length(json_array(1,2,0,-1,1.1,-0.11)) | head 1 | fields res",SUCCESS
+source = dev.default.http_logs | eval res = array_length(json_array()) | head 1 | fields res,SUCCESS
+source = dev.default.http_logs | eval res = json_array_length('[]') | head 1 | fields res,SUCCESS
+"source = dev.default.http_logs | eval res = json_array_length('[1,2,3,{""f1"":1,""f2"":[5,6]},4]') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_array_length('{\""key\"": 1}') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_array_length('[1,2') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = to_json_string(json_object('key', 'string_value')) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = to_json_string(json_object('key', 123.45)) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = to_json_string(json_object('key', true)) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = to_json_string(json_object(""a"", 1, ""b"", 2, ""c"", 3)) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = to_json_string(json_object('key', array())) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = to_json_string(json_object('key', array(1, 2, 3))) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = to_json_string(json_object('outer', json_object('inner', 123.45))) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = to_json_string(json_object(""array"", json_array(1,2,0,-1,1.1,-0.11))) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | where json_valid('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1",SUCCESS
+"source = dev.default.http_logs | where not json_valid('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}') | head 1",SUCCESS
+"source = dev.default.http_logs | eval res = json_keys(json('{""account_number"":1,""balance"":39225,""age"":32,""gender"":""M""}')) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_keys(json('{""f1"":""abc"",""f2"":{""f3"":""a"",""f4"":""b""}}')) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_keys(json('[1,2,3,{""f1"":1,""f2"":[5,6]},4]')) | head 1 | fields res",SUCCESS
+source = dev.default.http_logs | eval res = json_keys(json('[]')) | head 1 | fields res,SUCCESS
+"source = dev.default.http_logs | eval res = json_keys(json('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}')) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_keys(json('{""invalid"": ""json""')) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_keys(json('[1,2,3]')) | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_keys(json('[1,2')) | head 1 | fields res",SUCCESS
+source = dev.default.http_logs | eval res = json_keys(json('[invalid json]')) | head 1 | fields res,SUCCESS
+source = dev.default.http_logs | eval res = json_keys(json('invalid json')) | head 1 | fields res,SUCCESS
+source = dev.default.http_logs | eval res = json_keys(json(null)) | head 1 | fields res,SUCCESS
+"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.teacher') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[*]') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[0]') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[*].name') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[1].name') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[0].not_exist_key') | head 1 | fields res",SUCCESS
+"source = dev.default.http_logs | eval res = json_extract('{""teacher"":""Alice"",""student"":[{""name"":""Bob"",""rank"":1},{""name"":""Charlie"",""rank"":2}]}', '$.student[10]') | head 1 | fields res",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > 0) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = forall(array, x -> x > -10) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(json_object(""a"",1,""b"",-1),json_object(""a"",-1,""b"",-1)), result = forall(array, x -> x.a > 0) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(json_object(""a"",1,""b"",-1),json_object(""a"",-1,""b"",-1)), result = exists(array, x -> x.b < 0) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 0) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = exists(array, x -> x > 10) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 0) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,0,-1,1.1,-0.11), result = filter(array, x -> x > 10) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,3), result = transform(array, x -> x + 1) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,3), result = transform(array, (x, y) -> x + y) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x) | head 1 | fields result",SUCCESS
+"source = dev.default.people | eval array = json_array(1,2,3), result = reduce(array, 0, (acc, x) -> acc + x, acc -> acc * 10) | head 1 | fields result",SUCCESS
+source=dev.default.people | eval age = salary | eventstats avg(age) | sort id | head 10,SUCCESS
+"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count | sort id | head 10",SUCCESS
+source=dev.default.people | eventstats avg(salary) by country | sort id | head 10,SUCCESS
+"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by country | sort id | head 10",SUCCESS
+"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count
 by span(age, 10) | sort id | head 10",SUCCESS
-"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by span(age, 10) as age_span, country | sort id | head 10",SUCCESS
-"source=myglue_test.default.people | where country != 'USA' | eventstats stddev_samp(salary), stddev_pop(salary), percentile_approx(salary, 60) by span(salary, 1000) as salary_span | sort id | head 10",SUCCESS
-"source=myglue_test.default.people | eval age = salary | eventstats avg(age) as avg_age by occupation, country | eventstats avg(avg_age) as avg_state_age by country | sort id | head 10",SUCCESS
-"source=myglue_test.default.people | eventstats distinct_count(salary) by span(salary, 1000) as age_span",FAILED
-"source = myglue_test.tpch_csv.lineitem
+"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age, max(age) as max_age, min(age) as min_age, count(age) as count by span(age, 10) as age_span, country | sort id | head 10",SUCCESS
+"source=dev.default.people | where country != 'USA' | eventstats stddev_samp(salary), stddev_pop(salary), percentile_approx(salary, 60) by span(salary, 1000) as salary_span | sort id | head 10",SUCCESS
+"source=dev.default.people | eval age = salary | eventstats avg(age) as avg_age by occupation, country | eventstats avg(avg_age) as avg_state_age by country | sort id | head 10",SUCCESS
+"source=dev.default.people | eventstats distinct_count(salary) by span(salary, 1000) as age_span",FAILED
+"source = dev.default.lineitem
 | where l_shipdate <= subdate(date('1998-12-01'), 90)
 | stats sum(l_quantity) as sum_qty,
     sum(l_extendedprice) as sum_base_price,
@@ -277,59 +277,59 @@ by span(age, 10) | sort id | head 10",SUCCESS
     count() as count_order
    by l_returnflag, l_linestatus
 | sort l_returnflag, l_linestatus",SUCCESS
-"source = myglue_test.tpch_csv.part
-| join ON p_partkey = ps_partkey myglue_test.tpch_csv.partsupp
-| join ON s_suppkey = ps_suppkey myglue_test.tpch_csv.supplier
-| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation
-| join ON n_regionkey = r_regionkey myglue_test.tpch_csv.region
+"source = dev.default.part
+| join ON p_partkey = ps_partkey dev.default.partsupp
+| join ON s_suppkey = ps_suppkey dev.default.supplier
+| join ON s_nationkey = n_nationkey dev.default.nation
+| join ON n_regionkey = r_regionkey dev.default.region
 | where p_size = 15 AND like(p_type, '%BRASS') AND r_name = 'EUROPE' AND ps_supplycost = [
-    source = myglue_test.tpch_csv.partsupp
-    | join ON s_suppkey = ps_suppkey myglue_test.tpch_csv.supplier
-    | join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation
-    | join ON n_regionkey = r_regionkey myglue_test.tpch_csv.region
+    source = dev.default.partsupp
+    | join ON s_suppkey = ps_suppkey dev.default.supplier
+    | join ON s_nationkey = n_nationkey dev.default.nation
+    | join ON n_regionkey = r_regionkey dev.default.region
     | where r_name = 'EUROPE'
     | stats MIN(ps_supplycost)
   ]
 | sort - s_acctbal, n_name, s_name, p_partkey
 | head 100",SUCCESS
-"source = myglue_test.tpch_csv.customer
-| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders
-| join ON l_orderkey = o_orderkey myglue_test.tpch_csv.lineitem
+"source = dev.default.customer
+| join ON c_custkey = o_custkey dev.default.orders
+| join ON l_orderkey = o_orderkey dev.default.lineitem
 | where c_mktsegment = 'BUILDING' AND o_orderdate < date('1995-03-15') AND l_shipdate > date('1995-03-15')
 | stats sum(l_extendedprice * (1 - l_discount)) as revenue by l_orderkey, o_orderdate, o_shippriority
  | sort - revenue, o_orderdate
 | head 10",SUCCESS
-"source = myglue_test.tpch_csv.orders
+"source = dev.default.orders
 | where o_orderdate >= date('1993-07-01')
   and o_orderdate < date_add(date('1993-07-01'), interval 3 month)
   and exists [
-    source = myglue_test.tpch_csv.lineitem
+    source = dev.default.lineitem
     | where l_orderkey = o_orderkey and l_commitdate < l_receiptdate
   ]
 | stats count() as order_count by o_orderpriority
 | sort o_orderpriority",SUCCESS
-"source = myglue_test.tpch_csv.customer
-| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders
-| join ON l_orderkey = o_orderkey myglue_test.tpch_csv.lineitem
-| join ON l_suppkey = s_suppkey AND c_nationkey = s_nationkey myglue_test.tpch_csv.supplier
-| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation
-| join ON n_regionkey = r_regionkey myglue_test.tpch_csv.region
+"source = dev.default.customer
+| join ON c_custkey = o_custkey dev.default.orders
+| join ON l_orderkey = o_orderkey dev.default.lineitem
+| join ON l_suppkey = s_suppkey AND c_nationkey = s_nationkey dev.default.supplier
+| join ON s_nationkey = n_nationkey dev.default.nation
+| join ON n_regionkey = r_regionkey dev.default.region
 | where r_name = 'ASIA' AND o_orderdate >= date('1994-01-01') AND o_orderdate < date_add(date('1994-01-01'), interval 1 year)
 | stats sum(l_extendedprice * (1 - l_discount)) as revenue by n_name
 | sort - revenue",SUCCESS
-"source = myglue_test.tpch_csv.lineitem
+"source = dev.default.lineitem
 | where l_shipdate >= date('1994-01-01')
   and l_shipdate < adddate(date('1994-01-01'), 365)
   and l_discount between .06 - 0.01 and .06 + 0.01
   and l_quantity < 24
 | stats sum(l_extendedprice * l_discount) as revenue",SUCCESS
 "source = [
-    source = myglue_test.tpch_csv.supplier
-    | join ON s_suppkey = l_suppkey myglue_test.tpch_csv.lineitem
-    | join ON o_orderkey = l_orderkey myglue_test.tpch_csv.orders
-    | join ON c_custkey = o_custkey myglue_test.tpch_csv.customer
-    | join ON s_nationkey = n1.n_nationkey myglue_test.tpch_csv.nation as n1
-    | join ON c_nationkey = n2.n_nationkey myglue_test.tpch_csv.nation as n2
+    source = dev.default.supplier
+    | join ON s_suppkey = l_suppkey dev.default.lineitem
+    | join ON o_orderkey = l_orderkey dev.default.orders
+    | join ON c_custkey = o_custkey dev.default.customer
+    | join ON s_nationkey = n1.n_nationkey dev.default.nation as n1
+    | join ON c_nationkey = n2.n_nationkey dev.default.nation as n2
     | where l_shipdate between date('1995-01-01') and date('1996-12-31')
         and n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY' or n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE'
     | eval supp_nation = n1.n_name, cust_nation = n2.n_name, l_year = year(l_shipdate), volume = l_extendedprice * (1 - l_discount)
@@ -338,14 +338,14 @@ by span(age, 10) | sort id | head 10",SUCCESS
 | stats sum(volume) as revenue by supp_nation, cust_nation, l_year
 | sort supp_nation, cust_nation, l_year",SUCCESS
 "source = [
-    source = myglue_test.tpch_csv.part
-    | join ON p_partkey = l_partkey myglue_test.tpch_csv.lineitem
-    | join ON s_suppkey = l_suppkey myglue_test.tpch_csv.supplier
-    | join ON l_orderkey = o_orderkey myglue_test.tpch_csv.orders
-    | join ON o_custkey = c_custkey myglue_test.tpch_csv.customer
-    | join ON c_nationkey = n1.n_nationkey myglue_test.tpch_csv.nation as n1
-    | join ON s_nationkey = n2.n_nationkey myglue_test.tpch_csv.nation as n2
-    | join ON n1.n_regionkey = r_regionkey myglue_test.tpch_csv.region
+    source = dev.default.part
+    | join ON p_partkey = l_partkey dev.default.lineitem
+    | join ON s_suppkey = l_suppkey dev.default.supplier
+    | join ON l_orderkey = o_orderkey dev.default.orders
+    | join ON o_custkey = c_custkey dev.default.customer
+    | join ON c_nationkey = n1.n_nationkey dev.default.nation as n1
+    | join ON s_nationkey = n2.n_nationkey dev.default.nation as n2
+    | join ON n1.n_regionkey = r_regionkey dev.default.region
     | where r_name = 'AMERICA' AND p_type = 'ECONOMY ANODIZED STEEL'
       and o_orderdate between date('1995-01-01') and date('1996-12-31')
     | eval o_year = year(o_orderdate)
@@ -358,12 +358,12 @@ by span(age, 10) | sort id | head 10",SUCCESS
 | fields mkt_share, o_year
 | sort o_year",SUCCESS
 "source = [
-    source = myglue_test.tpch_csv.part
-    | join ON p_partkey = l_partkey myglue_test.tpch_csv.lineitem
-    | join ON s_suppkey = l_suppkey myglue_test.tpch_csv.supplier
-    | join ON ps_partkey = l_partkey and ps_suppkey = l_suppkey myglue_test.tpch_csv.partsupp
-    | join ON o_orderkey = l_orderkey myglue_test.tpch_csv.orders
-    | join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation
+    source = dev.default.part
+    | join ON p_partkey = l_partkey dev.default.lineitem
+    | join ON s_suppkey = l_suppkey dev.default.supplier
+    | join ON ps_partkey = l_partkey and ps_suppkey = l_suppkey dev.default.partsupp
+    | join ON o_orderkey = l_orderkey dev.default.orders
+    | join ON s_nationkey = n_nationkey dev.default.nation
     | where like(p_name, '%green%')
     | eval nation = n_name
     | eval o_year = year(o_orderdate)
@@ -372,33 +372,33 @@ by span(age, 10) | sort id | head 10",SUCCESS
   ] as profit
 | stats sum(amount) as sum_profit by nation, o_year
 | sort nation, - o_year",SUCCESS
-"source = myglue_test.tpch_csv.customer
-| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders
-| join ON l_orderkey = o_orderkey myglue_test.tpch_csv.lineitem
-| join ON c_nationkey = n_nationkey myglue_test.tpch_csv.nation
+"source = dev.default.customer
+| join ON c_custkey = o_custkey dev.default.orders
+| join ON l_orderkey = o_orderkey dev.default.lineitem
+| join ON c_nationkey = n_nationkey dev.default.nation
 | where o_orderdate >= date('1993-10-01')
   AND o_orderdate < date_add(date('1993-10-01'), interval 3 month)
   AND l_returnflag = 'R'
 | stats sum(l_extendedprice * (1 - l_discount)) as revenue by c_custkey, c_name, c_acctbal, c_phone, n_name, c_address, c_comment
 | sort - revenue
 | head 20",SUCCESS
-"source = myglue_test.tpch_csv.partsupp
-| join ON ps_suppkey = s_suppkey myglue_test.tpch_csv.supplier
-| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation
+"source = dev.default.partsupp
+| join ON ps_suppkey = s_suppkey dev.default.supplier
+| join ON s_nationkey = n_nationkey dev.default.nation
 | where n_name = 'GERMANY'
 | stats sum(ps_supplycost * ps_availqty) as value by ps_partkey
 | where value > [
-    source = myglue_test.tpch_csv.partsupp
-    | join ON ps_suppkey = s_suppkey myglue_test.tpch_csv.supplier
-    | join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation
+    source = dev.default.partsupp
+    | join ON ps_suppkey = s_suppkey dev.default.supplier
+    | join ON s_nationkey = n_nationkey dev.default.nation
     | where n_name = 'GERMANY'
     | stats sum(ps_supplycost * ps_availqty) as check
     | eval threshold = check * 0.0001000000
     | fields threshold
   ]
 | sort - value",SUCCESS
-"source = myglue_test.tpch_csv.orders
-| join ON o_orderkey = l_orderkey myglue_test.tpch_csv.lineitem
+"source = dev.default.orders
+| join ON o_orderkey = l_orderkey dev.default.lineitem
 | where l_commitdate < l_receiptdate
     and l_shipdate < l_commitdate
     and l_shipmode in ('MAIL', 'SHIP')
@@ -409,32 +409,32 @@ by span(age, 10) | sort id | head 10",SUCCESS
         by l_shipmode
 | sort l_shipmode",SUCCESS
 "source = [
-    source = myglue_test.tpch_csv.customer
+    source = dev.default.customer
     | left outer join ON c_custkey = o_custkey AND not like(o_comment, '%special%requests%')
-      myglue_test.tpch_csv.orders
+      dev.default.orders
     | stats count(o_orderkey) as c_count by c_custkey
   ] as c_orders
 | stats count() as custdist by c_count
 | sort - custdist, - c_count",SUCCESS
-"source = myglue_test.tpch_csv.lineitem
+"source = dev.default.lineitem
 | join ON l_partkey = p_partkey
     AND l_shipdate >= date('1995-09-01')
     AND l_shipdate < date_add(date('1995-09-01'), interval 1 month)
-  myglue_test.tpch_csv.part
+  dev.default.part
 | stats sum(case(like(p_type, 'PROMO%'), l_extendedprice * (1 - l_discount) else 0)) as sum1,
         sum(l_extendedprice * (1 - l_discount)) as sum2
 | eval promo_revenue = 100.00 * sum1 / sum2 // Stats and Eval commands can combine when issues/819 resolved
 | fields promo_revenue",SUCCESS
-"source = myglue_test.tpch_csv.supplier
+"source = dev.default.supplier
 | join right = revenue0 ON s_suppkey = supplier_no [
-    source = myglue_test.tpch_csv.lineitem
+    source = dev.default.lineitem
     | where l_shipdate >= date('1996-01-01') AND l_shipdate < date_add(date('1996-01-01'), interval 3 month)
     | eval supplier_no = l_suppkey
     | stats sum(l_extendedprice * (1 - l_discount)) as total_revenue by supplier_no
   ]
 | where total_revenue = [
     source = [
-        source = myglue_test.tpch_csv.lineitem
+        source = dev.default.lineitem
         | where l_shipdate >= date('1996-01-01') AND l_shipdate < date_add(date('1996-01-01'), interval 3 month)
         | eval supplier_no = l_suppkey
         | stats sum(l_extendedprice * (1 - l_discount)) as total_revenue by supplier_no
@@ -443,24 +443,24 @@ by span(age, 10) | sort id | head 10",SUCCESS
   ]
 | sort s_suppkey
 | fields s_suppkey, s_name, s_address, s_phone, total_revenue",SUCCESS
-"source = myglue_test.tpch_csv.partsupp
-| join ON p_partkey = ps_partkey myglue_test.tpch_csv.part
+"source = dev.default.partsupp
+| join ON p_partkey = ps_partkey dev.default.part
 | where p_brand != 'Brand#45'
     and not like(p_type, 'MEDIUM POLISHED%')
     and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
     and ps_suppkey not in [
-          source = myglue_test.tpch_csv.supplier
+          source = dev.default.supplier
           | where like(s_comment, '%Customer%Complaints%')
           | fields s_suppkey
         ]
 | stats distinct_count(ps_suppkey) as supplier_cnt by p_brand, p_type, p_size
 | sort - supplier_cnt, p_brand, p_type, p_size",SUCCESS
-"source = myglue_test.tpch_csv.lineitem
-| join ON p_partkey = l_partkey myglue_test.tpch_csv.part
+"source = dev.default.lineitem
+| join ON p_partkey = l_partkey dev.default.part
 | where p_brand = 'Brand#23'
     and p_container = 'MED BOX'
     and l_quantity < [
-          source = myglue_test.tpch_csv.lineitem
+          source = dev.default.lineitem
           | where l_partkey = p_partkey
           | stats avg(l_quantity) as avg
           | eval `0.2 * avg` = 0.2 * avg
@@ -469,11 +469,11 @@ by span(age, 10) | sort id | head 10",SUCCESS
 | stats sum(l_extendedprice) as sum
 | eval avg_yearly = sum / 7.0
 | fields avg_yearly",SUCCESS
-"source = myglue_test.tpch_csv.customer
-| join ON c_custkey = o_custkey myglue_test.tpch_csv.orders
-| join ON o_orderkey = l_orderkey myglue_test.tpch_csv.lineitem
+"source = dev.default.customer
+| join ON c_custkey = o_custkey dev.default.orders
+| join ON o_orderkey = l_orderkey dev.default.lineitem
 | where o_orderkey in [
-    source = myglue_test.tpch_csv.lineitem
+    source = dev.default.lineitem
     | stats sum(l_quantity) as sum by l_orderkey
     | where sum > 300
     | fields l_orderkey
@@ -481,7 +481,7 @@ by span(age, 10) | sort id | head 10",SUCCESS
 | stats sum(l_quantity) by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
 | sort - o_totalprice, o_orderdate
 | head 100",SUCCESS
-"source = myglue_test.tpch_csv.lineitem
+"source = dev.default.lineitem
 | join ON p_partkey = l_partkey
      and p_brand = 'Brand#12'
      and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
@@ -503,19 +503,19 @@ by span(age, 10) | sort id | head 10",SUCCESS
      and p_size between 1 and 15
      and l_shipmode in ('AIR', 'AIR REG')
      and l_shipinstruct = 'DELIVER IN PERSON'
-  myglue_test.tpch_csv.part",SUCCESS
-"source = myglue_test.tpch_csv.supplier
-| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation
+  dev.default.part",SUCCESS
+"source = dev.default.supplier
+| join ON s_nationkey = n_nationkey dev.default.nation
 | where n_name = 'CANADA'
   and s_suppkey in [
-    source = myglue_test.tpch_csv.partsupp
+    source = dev.default.partsupp
     | where ps_partkey in [
-        source = myglue_test.tpch_csv.part
+        source = dev.default.part
         | where like(p_name, 'forest%')
         | fields p_partkey
       ]
       and ps_availqty > [
-        source = myglue_test.tpch_csv.lineitem
+        source = dev.default.lineitem
         | where l_partkey = ps_partkey
           and l_suppkey = ps_suppkey
           and l_shipdate >= date('1994-01-01')
@@ -526,19 +526,19 @@ by span(age, 10) | sort id | head 10",SUCCESS
       ]
     | fields ps_suppkey
   ]",SUCCESS
-"source = myglue_test.tpch_csv.supplier
-| join ON s_suppkey = l1.l_suppkey myglue_test.tpch_csv.lineitem as l1
-| join ON o_orderkey = l1.l_orderkey myglue_test.tpch_csv.orders
-| join ON s_nationkey = n_nationkey myglue_test.tpch_csv.nation
+"source = dev.default.supplier
+| join ON s_suppkey = l1.l_suppkey dev.default.lineitem as l1
+| join ON o_orderkey = l1.l_orderkey dev.default.orders
+| join ON s_nationkey = n_nationkey dev.default.nation
 | where o_orderstatus = 'F'
   and l1.l_receiptdate > l1.l_commitdate
   and exists [
-    source = myglue_test.tpch_csv.lineitem as l2
+    source = dev.default.lineitem as l2
     | where l2.l_orderkey = l1.l_orderkey
       and l2.l_suppkey != l1.l_suppkey
   ]
   and not exists [
-    source = myglue_test.tpch_csv.lineitem as l3
+    source = dev.default.lineitem as l3
     | where l3.l_orderkey = l1.l_orderkey
       and l3.l_suppkey != l1.l_suppkey
       and l3.l_receiptdate > l3.l_commitdate
@@ -548,16 +548,16 @@ by span(age, 10) | sort id | head 10",SUCCESS
 | sort - numwait, s_name
 | head 100",SUCCESS
 "source = [
-  source = myglue_test.tpch_csv.customer
+  source = dev.default.customer
     | where substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
       and c_acctbal > [
-          source = myglue_test.tpch_csv.customer
+          source = dev.default.customer
           | where c_acctbal > 0.00
             and substring(c_phone, 1, 2) in ('13', '31', '23', '29', '30', '18', '17')
           | stats avg(c_acctbal)
         ]
       and not exists [
-          source = myglue_test.tpch_csv.orders
+          source = dev.default.orders
           | where o_custkey = c_custkey
         ]
     | eval cntrycode = substring(c_phone, 1, 2)
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSuite.scala
index 7c19cab12..5ea123c9d 100644
--- a/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSuite.scala
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/FlintSparkSuite.scala
@@ -771,6 +771,79 @@ trait FlintSparkSuite extends QueryTest with FlintSuite with OpenSearchSuite wit
            | """.stripMargin)
   }
 
+  protected def createGeoIpTestTable(testTable: String): Unit = {
+    sql(s"""
+         | CREATE TABLE $testTable
+         | (
+         |   ip STRING,
+         |   ipv4 STRING,
+         |   isValid BOOLEAN
+         | )
+         | USING $tableType $tableOptions
+         |""".stripMargin)
+
+    sql(s"""
+         | INSERT INTO $testTable
+         | VALUES ('66.249.157.90', '66.249.157.90', true),
+         |        ('2a09:bac2:19f8:2ac3::', 'Given IPv6 is not mapped to IPv4', true),
+         |        ('192.168.2.', '192.168.2.', false),
+         |        ('2001:db8::ff00:12:', 'Given IPv6 is not mapped to IPv4', false)
+         | """.stripMargin)
+  }
+
+  protected def createGeoIpTable(): Unit = {
+    sql(s"""
+         | CREATE TABLE geoip
+         | (
+         |   cidr STRING,
+         |   country_iso_code STRING,
+         |   country_name STRING,
+         |   continent_name STRING,
+         |   region_iso_code STRING,
+         |   region_name STRING,
+         |   city_name STRING,
+         |   time_zone STRING,
+         |   location STRING,
+         |   ip_range_start DECIMAL(38,0),
+         |   ip_range_end DECIMAL(38,0),
+         |   ipv4 BOOLEAN
+         | )
+         | USING $tableType $tableOptions
+         |""".stripMargin)
+
+    sql(s"""
+         | INSERT INTO geoip
+         | VALUES (
+         |  '66.249.157.0/24',
+         |  'JM',
+         |  'Jamaica',
+         |  'North America',
+         |  '14',
+         |  'Saint Catherine Parish',
+         |  'Portmore',
+         |  'America/Jamaica',
+         |  '17.9686,-76.8827',
+         |  1123654912,
+         |  1123655167,
+         |  true
+         | ),
+         | (
+         |  '2a09:bac2:19f8::/45',
+         |  'CA',
+         |  'Canada',
+         |  'North America',
+         |  'PE',
+         |  'Prince Edward Island',
+         |  'Charlottetown',
+         |  'America/Halifax',
+         |  '46.2396,-63.1355',
+         |  55878094401180025937395073088449675264,
+         |  55878094401189697343951990121847324671,
+         |  false
+         | )
+         | """.stripMargin)
+  }
+
   protected def createNestedJsonContentTable(tempFile: Path, testTable: String): Unit = {
     val json =
       """
diff --git a/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLGeoipITSuite.scala b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLGeoipITSuite.scala
new file mode 100644
index 000000000..7031ab067
--- /dev/null
+++ b/integ-test/src/integration/scala/org/opensearch/flint/spark/ppl/FlintSparkPPLGeoipITSuite.scala
@@ -0,0 +1,314 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.flint.spark.ppl
+
+import java.util
+
+import org.opensearch.sql.expression.function.SerializableUdf.visit
+import org.opensearch.sql.ppl.utils.DataTypeTransformer.seq
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, CreateNamedStruct, EqualTo, Expression, GreaterThanOrEqual, LessThan, Literal}
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.LeftOuter
+import org.apache.spark.sql.catalyst.plans.logical.{DataFrameDropColumns, Filter, Join, JoinHint, LogicalPlan, Project, SubqueryAlias}
+import org.apache.spark.sql.streaming.StreamTest
+
+class FlintSparkPPLGeoipITSuite
+    extends QueryTest
+    with LogicalPlanTestUtils
+    with FlintPPLSuite
+    with StreamTest {
+
+  /** Test table and index name */
+  private val testTable = "spark_catalog.default.flint_ppl_test"
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    // Create test table
+    createGeoIpTestTable(testTable)
+    createGeoIpTable()
+  }
+
+  protected override def afterEach(): Unit = {
+    super.afterEach()
+    // Stop all streaming jobs if any
+    spark.streams.active.foreach { job =>
+      job.stop()
+      job.awaitTermination()
+    }
+  }
+
+  private def getGeoIpQueryPlan(
+      ipAddress: UnresolvedAttribute,
+      left: LogicalPlan,
+      right: LogicalPlan,
+      projectionProperties: Alias): LogicalPlan = {
+    val joinPlan = getJoinPlan(ipAddress, left, right)
+    getProjection(joinPlan, projectionProperties)
+  }
+
+  private def getJoinPlan(
+      ipAddress: UnresolvedAttribute,
+      left: LogicalPlan,
+      right: LogicalPlan): LogicalPlan = {
+    val is_ipv4 = visit("is_ipv4", util.List.of[Expression](ipAddress))
+    val ip_to_int = visit("ip_to_int", util.List.of[Expression](ipAddress))
+
+    val t1 = SubqueryAlias("t1", left)
+    val t2 = SubqueryAlias("t2", right)
+
+    val joinCondition = And(
+      And(
+        GreaterThanOrEqual(ip_to_int, UnresolvedAttribute("t2.ip_range_start")),
+        LessThan(ip_to_int, UnresolvedAttribute("t2.ip_range_end"))),
+      EqualTo(is_ipv4, UnresolvedAttribute("t2.ipv4")))
+    Join(t1, t2, LeftOuter, Some(joinCondition), JoinHint.NONE)
+  }
+
+  private def getProjection(joinPlan: LogicalPlan, projectionProperties: Alias): LogicalPlan = {
+    val projection = Project(Seq(UnresolvedStar(None), projectionProperties), joinPlan)
+    val dropList = Seq(
+      "t2.country_iso_code",
+      "t2.country_name",
+      "t2.continent_name",
+      "t2.region_iso_code",
+      "t2.region_name",
+      "t2.city_name",
+      "t2.time_zone",
+      "t2.location",
+      "t2.cidr",
+      "t2.ip_range_start",
+      "t2.ip_range_end",
+      "t2.ipv4").map(UnresolvedAttribute(_))
+    DataFrameDropColumns(dropList, projection)
+  }
+
+  test("test geoip with no parameters") {
+    val frame = sql(s"""
+         | source = $testTable | where isValid = true | eval a = geoip(ip) | fields ip, a
+         | """.stripMargin)
+
+    // Retrieve the results
+    val results: Array[Row] = frame.collect()
+
+    // Define the expected results
+    val expectedResults: Array[Row] = Array(
+      Row(
+        "66.249.157.90",
+        Row(
+          "JM",
+          "Jamaica",
+          "North America",
+          "14",
+          "Saint Catherine Parish",
+          "Portmore",
+          "America/Jamaica",
+          "17.9686,-76.8827")),
+      Row(
+        "2a09:bac2:19f8:2ac3::",
+        Row(
+          "CA",
+          "Canada",
+          "North America",
+          "PE",
+          "Prince Edward Island",
+          "Charlottetown",
+          "America/Halifax",
+          "46.2396,-63.1355")))
+
+    // Compare the results
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, String](_.getAs[String](0))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    // Compare the logical plans
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val sourceTable: LogicalPlan = Filter(
+      EqualTo(UnresolvedAttribute("isValid"), Literal(true)),
+      UnresolvedRelation(testTable.split("\\.").toSeq))
+    val geoTable: LogicalPlan = UnresolvedRelation(seq("geoip"))
+    val projectionStruct = CreateNamedStruct(
+      Seq(
+        Literal("country_iso_code"),
+        UnresolvedAttribute("t2.country_iso_code"),
+        Literal("country_name"),
+        UnresolvedAttribute("t2.country_name"),
+        Literal("continent_name"),
+        UnresolvedAttribute("t2.continent_name"),
+        Literal("region_iso_code"),
+        UnresolvedAttribute("t2.region_iso_code"),
+        Literal("region_name"),
+        UnresolvedAttribute("t2.region_name"),
+        Literal("city_name"),
+        UnresolvedAttribute("t2.city_name"),
+        Literal("time_zone"),
+        UnresolvedAttribute("t2.time_zone"),
+        Literal("location"),
+        UnresolvedAttribute("t2.location")))
+    val structProjection = Alias(projectionStruct, "a")()
+    val geoIpPlan =
+      getGeoIpQueryPlan(UnresolvedAttribute("ip"), sourceTable, geoTable, structProjection)
+    val expectedPlan: LogicalPlan =
+      Project(Seq(UnresolvedAttribute("ip"), UnresolvedAttribute("a")), geoIpPlan)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+  }
+
+  test("test geoip with one parameters") {
+    val frame = sql(s"""
+         | source = $testTable | where isValid = true | eval a = geoip(ip, country_name) | fields ip, a
+         | """.stripMargin)
+
+    // Retrieve the results
+    val results: Array[Row] = frame.collect()
+    // Define the expected results
+    val expectedResults: Array[Row] =
+      Array(Row("66.249.157.90", "Jamaica"), Row("2a09:bac2:19f8:2ac3::", "Canada"))
+
+    // Compare the results
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, String](_.getAs[String](0))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    // Compare the logical plans
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val sourceTable: LogicalPlan = Filter(
+      EqualTo(UnresolvedAttribute("isValid"), Literal(true)),
+      UnresolvedRelation(testTable.split("\\.").toSeq))
+    val geoTable: LogicalPlan = UnresolvedRelation(seq("geoip"))
+    val structProjection = Alias(UnresolvedAttribute("t2.country_name"), "a")()
+    val geoIpPlan =
+      getGeoIpQueryPlan(UnresolvedAttribute("ip"), sourceTable, geoTable, structProjection)
+    val expectedPlan: LogicalPlan =
+      Project(Seq(UnresolvedAttribute("ip"), UnresolvedAttribute("a")), geoIpPlan)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+  }
+
+  test("test geoip with multiple parameters") {
+    val frame = sql(s"""
+         | source = $testTable | where isValid = true | eval a = geoip(ip, country_name, city_name) | fields ip, a
+         | """.stripMargin)
+
+    // Retrieve the results
+    val results: Array[Row] = frame.collect()
+    // Define the expected results
+    val expectedResults: Array[Row] = Array(
+      Row("66.249.157.90", Row("Jamaica", "Portmore")),
+      Row("2a09:bac2:19f8:2ac3::", Row("Canada", "Charlottetown")))
+
+    // Compare the results
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, String](_.getAs[String](0))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    // Compare the logical plans
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val sourceTable: LogicalPlan = Filter(
+      EqualTo(UnresolvedAttribute("isValid"), Literal(true)),
+      UnresolvedRelation(testTable.split("\\.").toSeq))
+    val geoTable: LogicalPlan = UnresolvedRelation(seq("geoip"))
+    val projectionStruct = CreateNamedStruct(
+      Seq(
+        Literal("country_name"),
+        UnresolvedAttribute("t2.country_name"),
+        Literal("city_name"),
+        UnresolvedAttribute("t2.city_name")))
+    val structProjection = Alias(projectionStruct, "a")()
+    val geoIpPlan =
+      getGeoIpQueryPlan(UnresolvedAttribute("ip"), sourceTable, geoTable, structProjection)
+    val expectedPlan: LogicalPlan =
+      Project(Seq(UnresolvedAttribute("ip"), UnresolvedAttribute("a")), geoIpPlan)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+  }
+
+  test("test geoip with partial projection on evaluated fields") {
+    val frame = sql(s"""
+         | source = $testTable | where isValid = true | eval a = geoip(ip, city_name), b = geoip(ip, country_name) | fields ip, b
+         | """.stripMargin)
+
+    // Retrieve the results
+    val results: Array[Row] = frame.collect()
+    // Define the expected results
+    val expectedResults: Array[Row] =
+      Array(Row("66.249.157.90", "Jamaica"), Row("2a09:bac2:19f8:2ac3::", "Canada"))
+
+    // Compare the results
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, String](_.getAs[String](0))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    // Compare the logical plans
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val sourceTable: LogicalPlan = Filter(
+      EqualTo(UnresolvedAttribute("isValid"), Literal(true)),
+      UnresolvedRelation(testTable.split("\\.").toSeq))
+    val geoTable: LogicalPlan = UnresolvedRelation(seq("geoip"))
+
+    val structProjectionA = Alias(UnresolvedAttribute("t2.city_name"), "a")()
+    val geoIpPlanA =
+      getGeoIpQueryPlan(UnresolvedAttribute("ip"), sourceTable, geoTable, structProjectionA)
+
+    val structProjectionB = Alias(UnresolvedAttribute("t2.country_name"), "b")()
+    val geoIpPlanB =
+      getGeoIpQueryPlan(UnresolvedAttribute("ip"), geoIpPlanA, geoTable, structProjectionB)
+
+    val expectedPlan: LogicalPlan =
+      Project(Seq(UnresolvedAttribute("ip"), UnresolvedAttribute("b")), geoIpPlanB)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+  }
+
+  test("test geoip with projection on field that exists in both source and geoip table") {
+    val frame = sql(s"""
+         | source = $testTable | where isValid = true | eval a = geoip(ip, country_name) | fields ipv4, a
+         | """.stripMargin)
+
+    // Retrieve the results
+    val results: Array[Row] = frame.collect()
+    // Define the expected results
+    val expectedResults: Array[Row] =
+      Array(Row("66.249.157.90", "Jamaica"), Row("Given IPv6 is not mapped to IPv4", "Canada"))
+
+    // Compare the results
+    implicit val rowOrdering: Ordering[Row] = Ordering.by[Row, String](_.getAs[String](0))
+    assert(results.sorted.sameElements(expectedResults.sorted))
+
+    // Compare the logical plans
+    val logicalPlan: LogicalPlan = frame.queryExecution.logical
+
+    val sourceTable: LogicalPlan = Filter(
+      EqualTo(UnresolvedAttribute("isValid"), Literal(true)),
+      UnresolvedRelation(testTable.split("\\.").toSeq))
+    val geoTable: LogicalPlan = UnresolvedRelation(seq("geoip"))
+    val structProjection = Alias(UnresolvedAttribute("t2.country_name"), "a")()
+    val geoIpPlan =
+      getGeoIpQueryPlan(UnresolvedAttribute("ip"), sourceTable, geoTable, structProjection)
+    val expectedPlan: LogicalPlan =
+      Project(Seq(UnresolvedAttribute("ipv4"), UnresolvedAttribute("a")), geoIpPlan)
+
+    comparePlans(logicalPlan, expectedPlan, checkAnalysis = false)
+  }
+
+  test("test geoip with invalid parameter") {
+    assertThrows[ParseException](sql(s"""
+         | source = $testTable | where isValid = true | eval a = geoip(ip, invalid_param) | fields ip, a
+         | """.stripMargin))
+  }
+
+  test("test geoip with invalid ip address provided") {
+    val frame = sql(s"""
+         | source = $testTable | eval a = geoip(ip) | fields ip, a
+         | """.stripMargin)
+
+    // Retrieve the results
+    assertThrows[SparkException](frame.collect())
+  }
+}
diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4
index ae0c4c73e..8b762dffa 100644
--- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4
+++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLLexer.g4
@@ -425,9 +425,6 @@ ISPRESENT:                          'ISPRESENT';
 BETWEEN:                            'BETWEEN';
 CIDRMATCH:                          'CIDRMATCH';
 
-// Geo Loction 
-GEOIP:                              'GEOIP';
-
 // FLOWCONTROL FUNCTIONS
 IFNULL:                             'IFNULL';
 NULLIF:                             'NULLIF';
@@ -437,6 +434,18 @@ TYPEOF:                             'TYPEOF';
 //OTHER CONDITIONAL EXPRESSIONS
 COALESCE:                           'COALESCE';
 
+//GEOLOCATION FUNCTIONS
+GEOIP:                              'GEOIP';
+
+//GEOLOCATION PROPERTIES
+COUNTRY_ISO_CODE:                   'COUNTRY_ISO_CODE';
+COUNTRY_NAME:                       'COUNTRY_NAME';
+CONTINENT_NAME:                     'CONTINENT_NAME';
+REGION_ISO_CODE:                    'REGION_ISO_CODE';
+REGION_NAME:                        'REGION_NAME';
+CITY_NAME:                          'CITY_NAME';
+LOCATION:                           'LOCATION';
+
 // RELEVANCE FUNCTIONS AND PARAMETERS
 MATCH:                              'MATCH';
 MATCH_PHRASE:                       'MATCH_PHRASE';
diff --git a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4 b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
index e461b1c15..c4e30f0d3 100644
--- a/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
+++ b/ppl-spark-integration/src/main/antlr4/OpenSearchPPLParser.g4
@@ -415,6 +415,11 @@ sortbyClause
 
 evalClause
    : fieldExpression EQUAL expression
+   | geoipCommand
+   ;
+
+geoipCommand
+   : fieldExpression EQUAL GEOIP LT_PRTHS ipAddress = functionArg (COMMA properties = geoIpPropertyList)? RT_PRTHS
    ;
 
 // aggregation terms
@@ -474,7 +479,6 @@ valueExpression
    | positionFunction                                                                           # positionFunctionCall
    | caseFunction                                                                               # caseExpr
    | timestampFunction                                                                          # timestampFunctionCall
-   | geoipFunction                                                                              # geoFunctionCall
    | LT_PRTHS valueExpression RT_PRTHS                                                          # parentheticValueExpr
    | LT_SQR_PRTHS subSearch RT_SQR_PRTHS                                                        # scalarSubqueryExpr
    | ident ARROW expression                                                                     # lambda
@@ -572,11 +576,6 @@ dataTypeFunctionCall
    : CAST LT_PRTHS expression AS convertedDataType RT_PRTHS
    ;
 
-// geoip function
-geoipFunction
-   : GEOIP LT_PRTHS (datasource = functionArg COMMA)? ipAddress = functionArg (COMMA properties = stringLiteral)? RT_PRTHS
-   ;
-
 // boolean functions
 booleanFunctionCall
    : conditionFunctionBase LT_PRTHS functionArgs RT_PRTHS
@@ -610,7 +609,6 @@ evalFunctionName
    | cryptographicFunctionName
    | jsonFunctionName
    | collectionFunctionName
-   | geoipFunctionName
    | lambdaFunctionName
    ;
 
@@ -928,10 +926,6 @@ lambdaFunctionName
    | TRANSFORM
    | REDUCE
    ;
-
-geoipFunctionName
-   : GEOIP
-   ; 
     
 positionFunctionName
    : POSITION
@@ -941,6 +935,21 @@ coalesceFunctionName
    : COALESCE
    ;
 
+geoIpPropertyList
+   : geoIpProperty (COMMA geoIpProperty)*
+   ;
+
+geoIpProperty
+   : COUNTRY_ISO_CODE
+   | COUNTRY_NAME
+   | CONTINENT_NAME
+   | REGION_ISO_CODE
+   | REGION_NAME
+   | CITY_NAME
+   | TIME_ZONE
+   | LOCATION
+   ;
+
 // operators
  comparisonOperator
    : EQUAL
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java
index db191a86c..31841430c 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/AbstractNodeVisitor.java
@@ -346,10 +346,15 @@ public T visitExistsSubquery(ExistsSubquery node, C context) {
   public T visitWindow(Window node, C context) {
     return visitChildren(node, context);
   }
+
   public T visitCidr(Cidr node, C context) {
     return visitChildren(node, context);
   }
 
+  public T visitGeoIp(GeoIp node, C context) {
+    return visitChildren(node, context);
+  }
+
   public T visitFlatten(Flatten flatten, C context) {
     return visitChildren(flatten, context);
   }
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Eval.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Eval.java
index 0cc27b6a9..c8482a4ff 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Eval.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/Eval.java
@@ -12,7 +12,7 @@
 import lombok.Setter;
 import lombok.ToString;
 import org.opensearch.sql.ast.AbstractNodeVisitor;
-import org.opensearch.sql.ast.expression.Let;
+import org.opensearch.sql.ast.Node;
 
 import java.util.List;
 
@@ -23,7 +23,7 @@
 @EqualsAndHashCode(callSuper = false)
 @RequiredArgsConstructor
 public class Eval extends UnresolvedPlan {
-  private final List<Let> expressionList;
+  private final List<Node> expressionList;
   private UnresolvedPlan child;
 
   @Override
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/GeoIp.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/GeoIp.java
new file mode 100644
index 000000000..feefa6929
--- /dev/null
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ast/tree/GeoIp.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.ast.tree;
+
+import com.google.common.collect.ImmutableList;
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+import lombok.RequiredArgsConstructor;
+import lombok.ToString;
+import org.opensearch.sql.ast.AbstractNodeVisitor;
+import org.opensearch.sql.ast.Node;
+import org.opensearch.sql.ast.expression.AttributeList;
+import org.opensearch.sql.ast.expression.Field;
+import org.opensearch.sql.ast.expression.UnresolvedExpression;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+@Getter
+@RequiredArgsConstructor
+@EqualsAndHashCode(callSuper = false)
+public class GeoIp extends UnresolvedPlan {
+    private UnresolvedPlan child;
+    private final Field field;
+    private final UnresolvedExpression ipAddress;
+    private final AttributeList properties;
+
+    @Override
+    public List<? extends Node> getChild() {
+        return ImmutableList.of(child);
+    }
+
+    @Override
+    public <T,C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
+        return nodeVisitor.visitGeoIp(this, context);
+    }
+
+    @Override
+    public UnresolvedPlan attach(UnresolvedPlan child) {
+        this.child = child;
+        return this;
+    }
+}
\ No newline at end of file
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/SerializableUdf.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/SerializableUdf.java
index e80a26bc4..e931175ff 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/SerializableUdf.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/expression/function/SerializableUdf.java
@@ -11,13 +11,18 @@
 import org.apache.spark.sql.catalyst.expressions.Expression;
 import org.apache.spark.sql.catalyst.expressions.ScalaUDF;
 import org.apache.spark.sql.types.DataTypes;
+import scala.Function1;
 import scala.Function2;
 import scala.Option;
 import scala.Serializable;
+import scala.runtime.AbstractFunction1;
+import scala.runtime.AbstractFunction2;
 import scala.collection.JavaConverters;
 import scala.collection.mutable.WrappedArray;
-import scala.runtime.AbstractFunction2;
 
+import java.math.BigInteger;
+import java.net.InetAddress;
+import java.net.UnknownHostException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
@@ -28,7 +33,6 @@
 import static org.opensearch.sql.expression.function.JsonUtils.removeNestedKey;
 import static org.opensearch.sql.ppl.utils.DataTypeTransformer.seq;
 
-
 public interface SerializableUdf {
 
 
@@ -142,11 +146,66 @@ public Boolean apply(String ipAddress, String cidrBlock) {
         }
     };
 
+    class geoIpUtils {
+        /**
+         * Checks if provided ip string is ipv4 or ipv6.
+         *
+         * @param ipAddress     To input ip string.
+         * @return true if ipAddress is ipv4, false if ipaddress is ipv6, AddressString Exception if invalid ip.
+         */
+        public static Function1<String,Boolean> isIpv4 = new SerializableAbstractFunction1<>() {
+
+            IPAddressStringParameters valOptions = new IPAddressStringParameters.Builder()
+                    .allowEmpty(false)
+                    .setEmptyAsLoopback(false)
+                    .allow_inet_aton(false)
+                    .allowSingleSegment(false)
+                    .toParams();
+
+            @Override
+            public Boolean apply(String ipAddress) {
+                IPAddressString parsedIpAddress = new IPAddressString(ipAddress, valOptions);
+
+                try {
+                    parsedIpAddress.validate();
+                } catch (AddressStringException e) {
+                    throw new RuntimeException("The given ipAddress '"+ipAddress+"' is invalid. It must be a valid IPv4 or IPv6 address. Error details: "+e.getMessage());
+                }
+
+                return parsedIpAddress.isIPv4();
+            }
+        };
+
+        /**
+         * Convert ipAddress string to interger representation
+         *
+         * @param ipAddress    The input ip string.
+         * @return converted BigInteger from ipAddress string.
+         */
+        public static Function1<String,BigInteger> ipToInt = new SerializableAbstractFunction1<>() {
+            @Override
+            public BigInteger apply(String ipAddress) {
+                try {
+                    InetAddress inetAddress = InetAddress.getByName(ipAddress);
+                    byte[] addressBytes = inetAddress.getAddress();
+                    return new BigInteger(1, addressBytes);
+                } catch (UnknownHostException e) {
+                    System.err.println("Invalid IP address: " + e.getMessage());
+                }
+                return null;
+            }
+        };
+    }
+
+    abstract class SerializableAbstractFunction1<T1,R> extends AbstractFunction1<T1,R>
+            implements Serializable {
+    }
+
     /**
-     * get the function reference according to its name
+     * Get the function reference according to its name
      *
-     * @param funcName
-     * @return
+     * @param funcName      string representing function to retrieve.
+     * @return relevant ScalaUDF for given function name.
      */
     static ScalaUDF visit(String funcName, List<Expression> expressions) {
         switch (funcName) {
@@ -177,6 +236,24 @@ static ScalaUDF visit(String funcName, List<Expression> expressions) {
                         Option.apply("json_append"),
                         false,
                         true);
+            case "is_ipv4":
+                return new ScalaUDF(geoIpUtils.isIpv4,
+                        DataTypes.BooleanType,
+                        seq(expressions),
+                        seq(),
+                        Option.empty(),
+                        Option.apply("is_ipv4"),
+                        false,
+                        true);
+            case "ip_to_int":
+                return new ScalaUDF(geoIpUtils.ipToInt,
+                        DataTypes.createDecimalType(38,0),
+                        seq(expressions),
+                        seq(),
+                        Option.empty(),
+                        Option.apply("ip_to_int"),
+                        false,
+                        true);
             default:
                 return null;
         }
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java
index 6d50e4298..22beab605 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/CatalystQueryPlanVisitor.java
@@ -57,6 +57,7 @@
 import org.opensearch.sql.ast.tree.FillNull;
 import org.opensearch.sql.ast.tree.Filter;
 import org.opensearch.sql.ast.tree.Flatten;
+import org.opensearch.sql.ast.tree.GeoIp;
 import org.opensearch.sql.ast.tree.Head;
 import org.opensearch.sql.ast.tree.Join;
 import org.opensearch.sql.ast.tree.Kmeans;
@@ -70,9 +71,11 @@
 import org.opensearch.sql.ast.tree.Sort;
 import org.opensearch.sql.ast.tree.SubqueryAlias;
 import org.opensearch.sql.ast.tree.Trendline;
+import org.opensearch.sql.ast.tree.UnresolvedPlan;
 import org.opensearch.sql.ast.tree.Window;
 import org.opensearch.sql.common.antlr.SyntaxCheckException;
 import org.opensearch.sql.ppl.utils.FieldSummaryTransformer;
+import org.opensearch.sql.ppl.utils.GeoIpCatalystLogicalPlanTranslator;
 import org.opensearch.sql.ppl.utils.ParseTransformer;
 import org.opensearch.sql.ppl.utils.ViewUtils;
 import org.opensearch.sql.ppl.utils.SortUtils;
@@ -570,19 +573,63 @@ public LogicalPlan visitRename(Rename node, CatalystPlanContext context) {
     public LogicalPlan visitEval(Eval node, CatalystPlanContext context) {
         visitFirstChild(node, context);
         List<UnresolvedExpression> aliases = new ArrayList<>();
-        List<Let> letExpressions = node.getExpressionList();
-        for (Let let : letExpressions) {
-            Alias alias = new Alias(let.getVar().getField().toString(), let.getExpression());
-            aliases.add(alias);
+        List<Node> expressions = node.getExpressionList();
+
+        // Geoip function modifies logical plan and is treated as QueryPlanVisitor instead of ExpressionVisitor
+        for (Node expr : expressions) {
+            if (expr instanceof Let) {
+                Let let = (Let) expr;
+                Alias alias = new Alias(let.getVar().getField().toString(), let.getExpression());
+                aliases.add(alias);
+            } else if (expr instanceof UnresolvedPlan) {
+                expr.accept(this, context);
+            } else {
+                throw new SyntaxCheckException("Unexpected node type when visiting EVAL");
+            }
         }
-        if (context.getNamedParseExpressions().isEmpty()) {
-            // Create an UnresolvedStar for all-fields projection
-            context.getNamedParseExpressions().push(UnresolvedStar$.MODULE$.apply(Option.<Seq<String>>empty()));
+
+        if (!aliases.isEmpty()) {
+            if (context.getNamedParseExpressions().isEmpty()) {
+                // Create an UnresolvedStar for all-fields projection
+                context.getNamedParseExpressions().push(UnresolvedStar$.MODULE$.apply(Option.<Seq<String>>empty()));
+            }
+
+            visitExpressionList(aliases, context);
+            Seq<NamedExpression> projectExpressions = context.retainAllNamedParseExpressions(p -> (NamedExpression) p);
+            // build the plan with the projection step
+            return context.apply(p -> new org.apache.spark.sql.catalyst.plans.logical.Project(projectExpressions, p));
+        } else {
+            return context.getPlan();
         }
-        List<Expression> expressionList = visitExpressionList(aliases, context);
-        Seq<NamedExpression> projectExpressions = context.retainAllNamedParseExpressions(p -> (NamedExpression) p);
-        // build the plan with the projection step
-        return context.apply(p -> new org.apache.spark.sql.catalyst.plans.logical.Project(projectExpressions, p));
+    }
+
+    @Override
+    public LogicalPlan visitGeoIp(GeoIp node, CatalystPlanContext context) {
+        visitExpression(node.getProperties(), context);
+        List<String> attributeList = new ArrayList<>();
+
+        while (!context.getNamedParseExpressions().isEmpty()) {
+            Expression nextExpression = context.getNamedParseExpressions().pop();
+            String attributeName = nextExpression.toString();
+
+            if (attributeList.contains(attributeName)) {
+                throw new IllegalStateException("Duplicate attribute in GEOIP attribute list");
+            }
+
+            attributeList.add(0, attributeName);
+        }
+
+        String fieldExpression = node.getField().getField().toString();
+        Expression ipAddressExpression = visitExpression(node.getIpAddress(), context);
+
+        return GeoIpCatalystLogicalPlanTranslator.getGeoipLogicalPlan(
+                new GeoIpCatalystLogicalPlanTranslator.GeoIpParameters(
+                        fieldExpression,
+                        ipAddressExpression,
+                        attributeList
+                ),
+                context
+        );
     }
 
     @Override
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java
index fd9240622..bfc45f50e 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java
@@ -339,10 +339,18 @@ public UnresolvedPlan visitSortCommand(OpenSearchPPLParser.SortCommandContext ct
   public UnresolvedPlan visitEvalCommand(OpenSearchPPLParser.EvalCommandContext ctx) {
     return new Eval(
         ctx.evalClause().stream()
-            .map(ct -> (Let) internalVisitExpression(ct))
+            .map(ct -> (ct.geoipCommand() != null) ? visit(ct.geoipCommand()) : (Let) internalVisitExpression(ct))
             .collect(Collectors.toList()));
   }
 
+  @Override
+  public UnresolvedPlan visitGeoipCommand(OpenSearchPPLParser.GeoipCommandContext ctx) {
+    Field field = (Field) internalVisitExpression(ctx.fieldExpression());
+    UnresolvedExpression ipAddress = internalVisitExpression(ctx.ipAddress);
+    AttributeList properties = ctx.properties == null ? new AttributeList(Collections.emptyList()) : (AttributeList) internalVisitExpression(ctx.properties);
+    return new GeoIp(field, ipAddress, properties);
+  }
+
   private List<UnresolvedExpression> getGroupByList(OpenSearchPPLParser.ByClauseContext ctx) {
     return ctx.fieldList().fieldExpression().stream()
         .map(this::internalVisitExpression)
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java
index 19f7002b6..da1fa40aa 100644
--- a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/parser/AstExpressionBuilder.java
@@ -51,6 +51,7 @@
 import org.opensearch.sql.common.antlr.SyntaxCheckException;
 import org.opensearch.sql.common.utils.StringUtils;
 import org.opensearch.sql.ppl.utils.ArgumentFactory;
+import org.opensearch.sql.ppl.utils.GeoIpCatalystLogicalPlanTranslator;
 
 import java.util.Arrays;
 import java.util.Collections;
@@ -454,6 +455,20 @@ public UnresolvedExpression visitLambda(OpenSearchPPLParser.LambdaContext ctx) {
         return new LambdaFunction(function, arguments);
     }
 
+    @Override
+    public UnresolvedExpression visitGeoIpPropertyList(OpenSearchPPLParser.GeoIpPropertyListContext ctx) {
+        ImmutableList.Builder<UnresolvedExpression> properties = ImmutableList.builder();
+        if (ctx != null) {
+            for (OpenSearchPPLParser.GeoIpPropertyContext property : ctx.geoIpProperty()) {
+                String propertyName = property.getText().toUpperCase();
+                GeoIpCatalystLogicalPlanTranslator.validateGeoIpProperty(propertyName);
+                properties.add(new Literal(propertyName, DataType.STRING));
+            }
+        }
+
+        return new AttributeList(properties.build());
+    }
+
     private List<UnresolvedExpression> timestampFunctionArguments(
             OpenSearchPPLParser.TimestampFunctionCallContext ctx) {
         List<UnresolvedExpression> args =
diff --git a/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/GeoIpCatalystLogicalPlanTranslator.java b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/GeoIpCatalystLogicalPlanTranslator.java
new file mode 100644
index 000000000..cedc00846
--- /dev/null
+++ b/ppl-spark-integration/src/main/java/org/opensearch/sql/ppl/utils/GeoIpCatalystLogicalPlanTranslator.java
@@ -0,0 +1,222 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.ppl.utils;
+
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+import org.apache.spark.SparkEnv;
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$;
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation;
+import org.apache.spark.sql.catalyst.analysis.UnresolvedStar$;
+import org.apache.spark.sql.catalyst.expressions.Alias$;
+import org.apache.spark.sql.catalyst.expressions.And;
+import org.apache.spark.sql.catalyst.expressions.CreateStruct;
+import org.apache.spark.sql.catalyst.expressions.EqualTo;
+import org.apache.spark.sql.catalyst.expressions.Expression;
+import org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual;
+import org.apache.spark.sql.catalyst.expressions.LessThan;
+import org.apache.spark.sql.catalyst.expressions.NamedExpression;
+import org.apache.spark.sql.catalyst.plans.logical.DataFrameDropColumns;
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan;
+import org.apache.spark.sql.catalyst.plans.logical.Project;
+import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias$;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+import org.opensearch.sql.ast.tree.Join;
+import org.opensearch.sql.expression.function.SerializableUdf;
+import org.opensearch.sql.ppl.CatalystPlanContext;
+import scala.Option;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Locale;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+import static java.util.List.of;
+
+import static org.opensearch.sql.ppl.utils.DataTypeTransformer.seq;
+import static org.opensearch.sql.ppl.utils.JoinSpecTransformer.join;
+
+public interface GeoIpCatalystLogicalPlanTranslator {
+    String SPARK_CONF_KEY = "spark.geoip.tablename";
+    String DEFAULT_GEOIP_TABLE_NAME = "geoip";
+    String GEOIP_CIDR_COLUMN_NAME = "cidr";
+    String GEOIP_IP_RANGE_START_COLUMN_NAME = "ip_range_start";
+    String GEOIP_IP_RANGE_END_COLUMN_NAME = "ip_range_end";
+    String GEOIP_IPV4_COLUMN_NAME = "ipv4";
+    String SOURCE_TABLE_ALIAS = "t1";
+    String GEOIP_TABLE_ALIAS = "t2";
+    List<String> GEOIP_TABLE_COLUMNS = Arrays.stream(GeoIpProperty.values())
+            .map(Enum::name)
+            .collect(Collectors.toList());
+
+    /**
+     * Responsible to produce a Spark Logical Plan with given GeoIp command arguments, below is the sample logical plan
+     * with configuration [source=users, field=a, ipAddress=ip, properties=[country_name, city_name]]
+     * +- 'DataFrameDropColumns ['t2.country_iso_code, 't2.country_name, 't2.continent_name, 't2.region_iso_code, 't2.region_name, 't2.city_name, 't2.time_zone, 't2.location, 't2.cidr, 't2.start, 't2.end, 't2.ipv4]
+     * -- +- 'Project [*, named_struct(country_name, 't2.country_name, city_name, 't2.city_name) AS a#0]
+     * -- -- +- 'Join LeftOuter, (((ip_to_int('ip) >= 't2.start) AND (ip_to_int('ip) < 't2.end)) AND (is_ipv4('ip) = 't2.ipv4))
+     * -- -- -- :- 'SubqueryAlias t1
+     * -- -- -- -- :  +- 'UnresolvedRelation [users], [], false
+     * -- -- -- +- 'SubqueryAlias t2
+     * -- -- -- -- -- +- 'UnresolvedRelation [geoip], [], false
+     * .
+     * And the corresponded SQL query:
+     * .
+     * SELECT users.*, struct(geoip.country_name, geoip.city_name) AS a
+     * FROM users, geoip
+     * WHERE geoip.ip_range_start <= ip_to_int(users.ip)
+     *   AND geoip.ip_range_end > ip_to_int(users.ip)
+     *   AND geoip.ip_type = is_ipv4(users.ip);
+     *
+     * @param parameters GeoIp function parameters.
+     * @param context Context instance to retrieved Expression in resolved form.
+     * @return a LogicalPlan which will project new col with geoip location based on given ipAddresses.
+     */
+    static LogicalPlan getGeoipLogicalPlan(GeoIpParameters parameters, CatalystPlanContext context) {
+        applyJoin(parameters.getIpAddress(), context);
+        return applyProjection(parameters.getField(), parameters.getProperties(), context);
+    }
+
+    /**
+     * Responsible to produce join plan for GeoIp command, below is the sample logical plan
+     * with configuration [source=users, ipAddress=ip]
+     * +- 'Join LeftOuter, (((ip_to_int('ip) >= 't2.start) AND (ip_to_int('ip) < 't2.end)) AND (is_ipv4('ip) = 't2.ipv4))
+     * -- :- 'SubqueryAlias t1
+     * -- -- :  +- 'UnresolvedRelation [users], [], false
+     * -- +- 'SubqueryAlias t2
+     * -- -- -- +- 'UnresolvedRelation [geoip], [], false
+     *
+     * @param ipAddress Expression representing ip addresses to be queried.
+     * @param context Context instance to retrieved Expression in resolved form.
+     * @return a LogicalPlan which will perform join based on ip within cidr range in geoip table.
+     */
+    static private LogicalPlan applyJoin(Expression ipAddress, CatalystPlanContext context) {
+        return context.apply(left -> {
+            LogicalPlan right = new UnresolvedRelation(seq(getGeoipTableName()), CaseInsensitiveStringMap.empty(), false);
+            LogicalPlan leftAlias = SubqueryAlias$.MODULE$.apply(SOURCE_TABLE_ALIAS, left);
+            LogicalPlan rightAlias = SubqueryAlias$.MODULE$.apply(GEOIP_TABLE_ALIAS, right);
+            Optional<Expression> joinCondition = Optional.of(new And(
+                    new And(
+                            new GreaterThanOrEqual(
+                                    SerializableUdf.visit("ip_to_int", of(ipAddress)),
+                                    UnresolvedAttribute$.MODULE$.apply(seq(GEOIP_TABLE_ALIAS,GEOIP_IP_RANGE_START_COLUMN_NAME))
+                            ),
+                            new LessThan(
+                                    SerializableUdf.visit("ip_to_int", of(ipAddress)),
+                                    UnresolvedAttribute$.MODULE$.apply(seq(GEOIP_TABLE_ALIAS,GEOIP_IP_RANGE_END_COLUMN_NAME))
+                            )
+                    ),
+                    new EqualTo(
+                            SerializableUdf.visit("is_ipv4", of(ipAddress)),
+                            UnresolvedAttribute$.MODULE$.apply(seq(GEOIP_TABLE_ALIAS,GEOIP_IPV4_COLUMN_NAME))
+                    )
+            ));
+            context.retainAllNamedParseExpressions(p -> p);
+            context.retainAllPlans(p -> p);
+            return join(leftAlias,
+                    rightAlias,
+                    Join.JoinType.LEFT,
+                    joinCondition,
+                    new Join.JoinHint());
+        });
+    }
+
+    /**
+     * Responsible to produce a Spark Logical Plan with given GeoIp command arguments, below is the sample logical plan
+     * with configuration [source=users, field=a, properties=[country_name, city_name]]
+     * +- 'DataFrameDropColumns ['t2.country_iso_code, 't2.country_name, 't2.continent_name, 't2.region_iso_code, 't2.region_name, 't2.city_name, 't2.time_zone, 't2.location, 't2.cidr, 't2.start, 't2.end, 't2.ipv4]
+     * -- +- 'Project [*, named_struct(country_name, 't2.country_name, city_name, 't2.city_name) AS a#0]
+     *
+     * @param field Name of new eval geoip column.
+     * @param properties List of geo properties to be returned.
+     * @param context Context instance to retrieved Expression in resolved form.
+     * @return a LogicalPlan which will return source table and new eval geoip column.
+     */
+    static private LogicalPlan applyProjection(String field, List<String> properties, CatalystPlanContext context) {
+        List<NamedExpression> projectExpressions = new ArrayList<>();
+        projectExpressions.add(UnresolvedStar$.MODULE$.apply(Option.empty()));
+
+        List<Expression> geoIpStructFields = createGeoIpStructFields(properties);
+        Expression columnValue = (geoIpStructFields.size() == 1)?
+                geoIpStructFields.get(0) : CreateStruct.apply(seq(geoIpStructFields));
+
+        NamedExpression geoCol = Alias$.MODULE$.apply(
+                columnValue,
+                field,
+                NamedExpression.newExprId(),
+                seq(new ArrayList<>()),
+                Option.empty(),
+                seq(new ArrayList<>()));
+
+        projectExpressions.add(geoCol);
+
+        List<Expression> dropList = createGeoIpStructFields(new ArrayList<>());
+        dropList.addAll(List.of(
+                UnresolvedAttribute$.MODULE$.apply(seq(GEOIP_TABLE_ALIAS, GEOIP_CIDR_COLUMN_NAME)),
+                UnresolvedAttribute$.MODULE$.apply(seq(GEOIP_TABLE_ALIAS, GEOIP_IP_RANGE_START_COLUMN_NAME)),
+                UnresolvedAttribute$.MODULE$.apply(seq(GEOIP_TABLE_ALIAS, GEOIP_IP_RANGE_END_COLUMN_NAME)),
+                UnresolvedAttribute$.MODULE$.apply(seq(GEOIP_TABLE_ALIAS, GEOIP_IPV4_COLUMN_NAME))
+        ));
+
+        context.apply(p -> new Project(seq(projectExpressions), p));
+        return context.apply(p -> new DataFrameDropColumns(seq(dropList), p));
+    }
+
+    static private List<Expression> createGeoIpStructFields(List<String> attributeList) {
+        List<String> attributeListToUse;
+        if (attributeList == null || attributeList.isEmpty()) {
+            attributeListToUse = GEOIP_TABLE_COLUMNS;
+        } else {
+            attributeListToUse = attributeList;
+        }
+
+        return attributeListToUse.stream()
+                .map(a -> UnresolvedAttribute$.MODULE$.apply(seq(
+                        GEOIP_TABLE_ALIAS,
+                        a.toLowerCase(Locale.ROOT)
+                )))
+                .collect(Collectors.toList());
+    }
+
+    static private String getGeoipTableName() {
+        String tableName = DEFAULT_GEOIP_TABLE_NAME;
+
+        if (SparkEnv.get() != null && SparkEnv.get().conf() != null) {
+            tableName = SparkEnv.get().conf().get(SPARK_CONF_KEY, DEFAULT_GEOIP_TABLE_NAME);
+        }
+
+        return tableName;
+    }
+
+    @Getter
+    @AllArgsConstructor
+    class GeoIpParameters {
+        private final String field;
+        private final Expression ipAddress;
+        private final List<String> properties;
+    }
+
+    enum GeoIpProperty {
+        COUNTRY_ISO_CODE,
+        COUNTRY_NAME,
+        CONTINENT_NAME,
+        REGION_ISO_CODE,
+        REGION_NAME,
+        CITY_NAME,
+        TIME_ZONE,
+        LOCATION
+    }
+
+    public static void validateGeoIpProperty(String propertyName) {
+        try {
+            GeoIpProperty.valueOf(propertyName);
+        } catch (NullPointerException | IllegalArgumentException e) {
+            throw new IllegalArgumentException("Invalid properties used.");
+        }
+    }
+}
diff --git a/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanGeoipFunctionTranslatorTestSuite.scala b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanGeoipFunctionTranslatorTestSuite.scala
new file mode 100644
index 000000000..460b9769c
--- /dev/null
+++ b/ppl-spark-integration/src/test/scala/org/opensearch/flint/spark/ppl/PPLLogicalPlanGeoipFunctionTranslatorTestSuite.scala
@@ -0,0 +1,332 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.flint.spark.ppl
+
+import java.util
+
+import org.opensearch.flint.spark.ppl.PlaneUtils.plan
+import org.opensearch.sql.expression.function.SerializableUdf.visit
+import org.opensearch.sql.ppl.{CatalystPlanContext, CatalystQueryPlanVisitor}
+import org.opensearch.sql.ppl.utils.DataTypeTransformer.seq
+import org.scalatest.matchers.should.Matchers
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, UnresolvedFunction, UnresolvedRelation, UnresolvedStar}
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, CreateNamedStruct, Descending, EqualTo, Expression, ExprId, GreaterThanOrEqual, In, LessThan, Literal, NamedExpression, ScalaUDF, SortOrder}
+import org.apache.spark.sql.catalyst.plans.{LeftOuter, PlanTest}
+import org.apache.spark.sql.catalyst.plans.logical.{DataFrameDropColumns, Join, JoinHint, LogicalPlan, Project, Sort, SubqueryAlias}
+import org.apache.spark.sql.types.DataTypes
+
+class PPLLogicalPlanGeoipFunctionTranslatorTestSuite
+    extends SparkFunSuite
+    with PlanTest
+    with LogicalPlanTestUtils
+    with Matchers {
+
+  private val planTransformer = new CatalystQueryPlanVisitor()
+  private val pplParser = new PPLSyntaxParser()
+
+  private def getGeoIpQueryPlan(
+      ipAddress: UnresolvedAttribute,
+      left: LogicalPlan,
+      right: LogicalPlan,
+      projectionProperties: Alias): LogicalPlan = {
+    val joinPlan = getJoinPlan(ipAddress, left, right)
+    getProjection(joinPlan, projectionProperties)
+  }
+
+  private def getJoinPlan(
+      ipAddress: UnresolvedAttribute,
+      left: LogicalPlan,
+      right: LogicalPlan): LogicalPlan = {
+    val is_ipv4 = visit("is_ipv4", util.List.of[Expression](ipAddress))
+    val ip_to_int = visit("ip_to_int", util.List.of[Expression](ipAddress))
+
+    val t1 = SubqueryAlias("t1", left)
+    val t2 = SubqueryAlias("t2", right)
+
+    val joinCondition = And(
+      And(
+        GreaterThanOrEqual(ip_to_int, UnresolvedAttribute("t2.ip_range_start")),
+        LessThan(ip_to_int, UnresolvedAttribute("t2.ip_range_end"))),
+      EqualTo(is_ipv4, UnresolvedAttribute("t2.ipv4")))
+    Join(t1, t2, LeftOuter, Some(joinCondition), JoinHint.NONE)
+  }
+
+  private def getProjection(joinPlan: LogicalPlan, projectionProperties: Alias): LogicalPlan = {
+    val projection = Project(Seq(UnresolvedStar(None), projectionProperties), joinPlan)
+    val dropList = Seq(
+      "t2.country_iso_code",
+      "t2.country_name",
+      "t2.continent_name",
+      "t2.region_iso_code",
+      "t2.region_name",
+      "t2.city_name",
+      "t2.time_zone",
+      "t2.location",
+      "t2.cidr",
+      "t2.ip_range_start",
+      "t2.ip_range_end",
+      "t2.ipv4").map(UnresolvedAttribute(_))
+    DataFrameDropColumns(dropList, projection)
+  }
+
+  test("test geoip function - only ip_address provided") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(pplParser, "source = users | eval a = geoip(ip_address)"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("ip_address")
+    val sourceTable = UnresolvedRelation(seq("users"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+
+    val projectionStruct = CreateNamedStruct(
+      Seq(
+        Literal("country_iso_code"),
+        UnresolvedAttribute("t2.country_iso_code"),
+        Literal("country_name"),
+        UnresolvedAttribute("t2.country_name"),
+        Literal("continent_name"),
+        UnresolvedAttribute("t2.continent_name"),
+        Literal("region_iso_code"),
+        UnresolvedAttribute("t2.region_iso_code"),
+        Literal("region_name"),
+        UnresolvedAttribute("t2.region_name"),
+        Literal("city_name"),
+        UnresolvedAttribute("t2.city_name"),
+        Literal("time_zone"),
+        UnresolvedAttribute("t2.time_zone"),
+        Literal("location"),
+        UnresolvedAttribute("t2.location")))
+    val structProjection = Alias(projectionStruct, "a")()
+
+    val geoIpPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjection)
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), geoIpPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+
+  test("test geoip function - source has same name as join alias") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(pplParser, "source=t1 | eval a = geoip(ip_address, country_name)"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("ip_address")
+    val sourceTable = UnresolvedRelation(seq("t1"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+    val structProjection = Alias(UnresolvedAttribute("t2.country_name"), "a")()
+
+    val geoIpPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjection)
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), geoIpPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+
+  test("test geoip function - ipAddress col exist in geoip table") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(pplParser, "source=t1 | eval a = geoip(cidr, country_name)"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("cidr")
+    val sourceTable = UnresolvedRelation(seq("t1"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+    val structProjection = Alias(UnresolvedAttribute("t2.country_name"), "a")()
+
+    val geoIpPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjection)
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), geoIpPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+
+  test("test geoip function - duplicate parameters") {
+    val context = new CatalystPlanContext
+
+    val exception = intercept[IllegalStateException] {
+      planTransformer.visit(
+        plan(pplParser, "source=t1 | eval a = geoip(cidr, country_name, country_name)"),
+        context)
+    }
+
+    assert(exception.getMessage.contains("Duplicate attribute in GEOIP attribute list"))
+  }
+
+  test("test geoip function - one property provided") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(pplParser, "source=users | eval a = geoip(ip_address, country_name)"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("ip_address")
+    val sourceTable = UnresolvedRelation(seq("users"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+    val structProjection = Alias(UnresolvedAttribute("t2.country_name"), "a")()
+
+    val geoIpPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjection)
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), geoIpPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+
+  test("test geoip function - multiple properties provided") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(pplParser, "source=users | eval a = geoip(ip_address,country_name,location)"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("ip_address")
+    val sourceTable = UnresolvedRelation(seq("users"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+    val projectionStruct = CreateNamedStruct(
+      Seq(
+        Literal("country_name"),
+        UnresolvedAttribute("t2.country_name"),
+        Literal("location"),
+        UnresolvedAttribute("t2.location")))
+    val structProjection = Alias(projectionStruct, "a")()
+
+    val geoIpPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjection)
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), geoIpPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+
+  test("test geoip function - multiple geoip calls") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          "source=t | eval a = geoip(ip_address, country_iso_code), b = geoip(ip_address, region_iso_code)"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("ip_address")
+    val sourceTable = UnresolvedRelation(seq("t"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+
+    val structProjectionA = Alias(UnresolvedAttribute("t2.country_iso_code"), "a")()
+    val colAPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjectionA)
+
+    val structProjectionB = Alias(UnresolvedAttribute("t2.region_iso_code"), "b")()
+    val colBPlan = getGeoIpQueryPlan(ipAddress, colAPlan, geoTable, structProjectionB)
+
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), colBPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+
+  test("test geoip function - other eval function used between geoip") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          "source=t | eval a = geoip(ip_address, time_zone), b = rand(), c = geoip(ip_address, region_name)"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("ip_address")
+    val sourceTable = UnresolvedRelation(seq("t"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+
+    val structProjectionA = Alias(UnresolvedAttribute("t2.time_zone"), "a")()
+    val colAPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjectionA)
+
+    val structProjectionC = Alias(UnresolvedAttribute("t2.region_name"), "c")()
+    val colCPlan = getGeoIpQueryPlan(ipAddress, colAPlan, geoTable, structProjectionC)
+
+    val randProjectList: Seq[NamedExpression] = Seq(
+      UnresolvedStar(None),
+      Alias(UnresolvedFunction("rand", Seq.empty, isDistinct = false), "b")())
+    val colBPlan = Project(randProjectList, colCPlan)
+
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), colBPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+
+  test("test geoip function - other eval function used before geoip") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(pplParser, "source=t | eval a = rand(), b = geoip(ip_address, city_name)"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("ip_address")
+    val sourceTable = UnresolvedRelation(seq("t"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+
+    val structProjectionB = Alias(UnresolvedAttribute("t2.city_name"), "b")()
+    val colBPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjectionB)
+
+    val randProjectList: Seq[NamedExpression] = Seq(
+      UnresolvedStar(None),
+      Alias(UnresolvedFunction("rand", Seq.empty, isDistinct = false), "a")())
+    val colAPlan = Project(randProjectList, colBPlan)
+
+    val expectedPlan = Project(Seq(UnresolvedStar(None)), colAPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+
+  test("test geoip function - projection on evaluated field") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(pplParser, "source=users | eval a = geoip(ip_address, country_name) | fields a"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("ip_address")
+    val sourceTable = UnresolvedRelation(seq("users"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+    val structProjection = Alias(UnresolvedAttribute("t2.country_name"), "a")()
+
+    val geoIpPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjection)
+    val expectedPlan = Project(Seq(UnresolvedAttribute("a")), geoIpPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+
+  test("test geoip with partial projection on evaluated fields") {
+    val context = new CatalystPlanContext
+
+    val logPlan =
+      planTransformer.visit(
+        plan(
+          pplParser,
+          "source=t | eval a = geoip(ip_address, country_iso_code), b = geoip(ip_address, region_iso_code) | fields b"),
+        context)
+
+    val ipAddress = UnresolvedAttribute("ip_address")
+    val sourceTable = UnresolvedRelation(seq("t"))
+    val geoTable = UnresolvedRelation(seq("geoip"))
+
+    val structProjectionA = Alias(UnresolvedAttribute("t2.country_iso_code"), "a")()
+    val colAPlan = getGeoIpQueryPlan(ipAddress, sourceTable, geoTable, structProjectionA)
+
+    val structProjectionB = Alias(UnresolvedAttribute("t2.region_iso_code"), "b")()
+    val colBPlan = getGeoIpQueryPlan(ipAddress, colAPlan, geoTable, structProjectionB)
+
+    val expectedPlan = Project(Seq(UnresolvedAttribute("b")), colBPlan)
+
+    comparePlans(expectedPlan, logPlan, checkAnalysis = false)
+  }
+}