Skip to content

Commit

Permalink
fix(ingest/powerbi): support comments in m-query grammar (datahub-pro…
Browse files Browse the repository at this point in the history
  • Loading branch information
sid-acryl authored Dec 21, 2024
1 parent 8e9fc20 commit 0b4d96e
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@
// | empty_string
// | empty_string "," argument_list
// - Added sql_string in any_literal
// - Added WS_INLINE? in field expression
// Added to ignore any comments
// %ignore WS // Ignore whitespace
// %ignore CPP_COMMENT // Ignore single-line comments
// %ignore C_COMMENT // Ignore multi-line comments

lexical_unit: lexical_elements?

Expand Down Expand Up @@ -245,6 +250,8 @@ operator_or_punctuator: ","
| "=>"
| ".."
| "..."
| "{{"
| "}}"

document: section_document
| expression_document
Expand Down Expand Up @@ -275,6 +282,7 @@ expression: logical_or_expression
| if_expression
| error_raising_expression
| error_handling_expression
| outer_expression


logical_or_expression: logical_and_expression
Expand Down Expand Up @@ -376,6 +384,8 @@ sql_content: /(?:[^\"\\]|\\[\"]|\"\"|\#\(lf\))+/

sql_string: "\"" sql_content "\""

outer_expression: "{{" expression "}}"

argument_list: WS_INLINE? expression
| WS_INLINE? expression WS_INLINE? "," WS_INLINE? argument_list
| WS_INLINE? sql_string
Expand Down Expand Up @@ -409,7 +419,7 @@ record_expression: "[" field_list? "]"
field_list: field
| field "," field_list

field: field_name WS_INLINE? "=" WS_INLINE? expression
field: WS_INLINE? field_name WS_INLINE? "=" WS_INLINE? expression

field_name: generalized_identifier
| quoted_identifier
Expand Down Expand Up @@ -621,4 +631,8 @@ any_literal: record_literal
%import common.DIGIT
%import common.LF
%import common.CR
%import common.ESCAPED_STRING
%import common.ESCAPED_STRING

%ignore WS // Ignore whitespace
%ignore CPP_COMMENT // Ignore single-line comments
%ignore C_COMMENT // Ignore multi-line comments
36 changes: 36 additions & 0 deletions metadata-ingestion/tests/integration/powerbi/test_m_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,3 +1171,39 @@ def test_m_query_timeout(mock_get_lark_parser):
assert (
is_entry_present
), 'Warning message "M-Query Parsing Timeout" should be present in reporter'


def test_comments_in_m_query():
q: str = 'let\n Source = Snowflake.Databases("xaa48144.snowflakecomputing.com", "COMPUTE_WH", [Role="ACCOUNTADMIN"]),\n SNOWFLAKE_SAMPLE_DATA_Database = Source{[Name="SNOWFLAKE_SAMPLE_DATA", Kind="Database"]}[Data],\n TPCDS_SF100TCL_Schema = SNOWFLAKE_SAMPLE_DATA_Database{[Name="TPCDS_SF100TCL", Kind="Schema"]}[Data],\n ITEM_Table = TPCDS_SF100TCL_Schema{[Name="ITEM", Kind="Table"]}[Data],\n \n // Group by I_BRAND and calculate the count\n BrandCountsTable = Table.Group(ITEM_Table, {"I_BRAND"}, {{"BrandCount", each Table.RowCount(_), Int64.Type}})\nin\n BrandCountsTable'

table: powerbi_data_classes.Table = powerbi_data_classes.Table(
columns=[],
measures=[],
expression=q,
name="pet_price_index",
full_name="datalake.sandbox_pet.pet_price_index",
)

reporter = PowerBiDashboardSourceReport()

ctx, config, platform_instance_resolver = get_default_instances()

data_platform_tables: List[DataPlatformTable] = parser.get_upstream_tables(
table,
reporter,
ctx=ctx,
config=config,
platform_instance_resolver=platform_instance_resolver,
parameters={
"hostname": "xyz.databricks.com",
"http_path": "/sql/1.0/warehouses/abc",
"catalog": "cat",
"schema": "public",
},
)[0].upstreams

assert len(data_platform_tables) == 1
assert (
data_platform_tables[0].urn
== "urn:li:dataset:(urn:li:dataPlatform:snowflake,snowflake_sample_data.tpcds_sf100tcl.item,PROD)"
)

0 comments on commit 0b4d96e

Please sign in to comment.