From 5f710cdce4c2921e9cbc79c8e587f7201c36b5c3 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 24 Jun 2024 17:27:27 +0200 Subject: [PATCH 01/76] adql query parser function --- cdci_data_analysis/analysis/ivoa_helper.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 cdci_data_analysis/analysis/ivoa_helper.py diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py new file mode 100644 index 00000000..8dcf18ec --- /dev/null +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -0,0 +1,21 @@ +from queryparser.adql import ADQLQueryTranslator +from queryparser.postgresql import PostgreSQLQueryProcessor + +from ..app_logging import app_logging + + +logger = app_logging.getLogger('ivoa_helper') + + +def parse_adql_query(query): + adt = ADQLQueryTranslator(query) + qp = PostgreSQLQueryProcessor() + qp.set_query(adt.to_postgresql()) + qp.process_query() + + output_obj = dict( + columns = qp.columns, + display_columns = qp.display_columns, + ) + + return output_obj From 5526e612509b22fa7cc137eb52cc758bba922be1 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 25 Jun 2024 10:04:04 +0200 Subject: [PATCH 02/76] queryparser-python3 lib --- requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index cecbd478..d9bed0af 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,7 @@ GitPython nbformat sentry-sdk pytest-sentry +queryparser-python3 -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index de2adfba..b0b1c85f 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ "black>=22.10.0", "bs4", "GitPython", + "queryparser-python3", "nbformat", "giturlparse", "sentry-sdk", From 7d099aa1805e8d198fbe205995140bd69a44c0d8 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 25 Jun 2024 10:04:22 +0200 Subject: [PATCH 03/76] more error check --- cdci_data_analysis/analysis/ivoa_helper.py | 27 ++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 8dcf18ec..b7c09f64 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,5 +1,6 @@ from queryparser.adql import ADQLQueryTranslator from queryparser.postgresql import PostgreSQLQueryProcessor +from queryparser.exceptions import QuerySyntaxError from ..app_logging import app_logging @@ -8,14 +9,22 @@ def parse_adql_query(query): - adt = ADQLQueryTranslator(query) - qp = PostgreSQLQueryProcessor() - qp.set_query(adt.to_postgresql()) - qp.process_query() - - output_obj = dict( - columns = qp.columns, - display_columns = qp.display_columns, - ) + try: + adt = ADQLQueryTranslator(query) + qp = PostgreSQLQueryProcessor() + qp.set_query(adt.to_postgresql()) + qp.process_query() + output_obj = dict( + columns = qp.columns, + display_columns = qp.display_columns, + tables = qp.tables, + ) + except QuerySyntaxError as qe: + logger.error(f'Error parsing ADQL query: {qe}') + output_obj = dict( + columns = [], + display_columns = [], + tables = [], + ) return output_obj From fb87a08e1ee2ce9f877b08c94e456b586303c482 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 8 Jul 2024 19:26:49 +0200 Subject: [PATCH 04/76] adding info --- cdci_data_analysis/analysis/ivoa_helper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index b7c09f64..f086f4ba 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -19,6 +19,7 @@ def parse_adql_query(query): columns = qp.columns, display_columns = qp.display_columns, tables = qp.tables, + rest = qp ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') From a90cc05646a001a3ec2c2900f523e16c5bbbfeaf Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 18 Jul 2024 17:07:07 +0200 Subject: [PATCH 05/76] using sqlparse library --- cdci_data_analysis/analysis/ivoa_helper.py | 69 +++++++++++++++++----- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index f086f4ba..38f445ed 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,31 +1,70 @@ -from queryparser.adql import ADQLQueryTranslator -from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.exceptions import QuerySyntaxError +import sqlparse +import json + from ..app_logging import app_logging +from ..analysis import drupal_helper + logger = app_logging.getLogger('ivoa_helper') def parse_adql_query(query): try: - adt = ADQLQueryTranslator(query) - qp = PostgreSQLQueryProcessor() - qp.set_query(adt.to_postgresql()) - qp.process_query() + output_obj = dict() + parsed_query_obj = sqlparse.parse(query)[0] + from_seen = False + for t in parsed_query_obj.tokens: + if isinstance(t, sqlparse.sql.Where): + output_obj['where_token'] = t + if from_seen: + if isinstance(t, sqlparse.sql.Identifier): + output_obj['tables'] = [t.get_name()] + elif isinstance(t, sqlparse.sql.IdentifierList): + output_obj['tables'] = [x.get_name() for x in t.get_identifiers()] + if t.is_keyword and t.ttype is sqlparse.tokens.Keyword and t.value.upper() == 'FROM': + from_seen = True - output_obj = dict( - columns = qp.columns, - display_columns = qp.display_columns, - tables = qp.tables, - rest = qp - ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - columns = [], - display_columns = [], - tables = [], + where_token = None, + tables = None ) return output_obj + + +def run_ivoa_query(query, sentry_dsn=None, **kwargs): + result_list = [] + parsed_query_obj = parse_adql_query(query) + + tables = parsed_query_obj.get('tables', []) + if len(tables) == 1 and tables[0] == 'product_gallery': + logger.info('Query is a product_gallery query') + product_gallery_url = kwargs.get('product_gallery_url', None) + gallery_jwt_token = kwargs.get('gallery_jwt_token', None) + if product_gallery_url and gallery_jwt_token: + result_list = run_ivoa_query_from_product_gallery( + product_gallery_url, + gallery_jwt_token, + sentry_dsn=sentry_dsn, + **kwargs + ) + return result_list + + +def run_ivoa_query_from_product_gallery(product_gallery_url, + gallery_jwt_token, + sentry_dsn=None, + **kwargs): + output_get = drupal_helper.get_data_product_list_by_source_name_with_conditions( + product_gallery_url=product_gallery_url, + gallery_jwt_token=gallery_jwt_token, + sentry_dsn=sentry_dsn, + **kwargs) + + output_list = json.dumps(output_get) + + return output_list \ No newline at end of file From ba0849680faaddc0835ded94bc25c9e60649d412 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 18 Jul 2024 17:08:32 +0200 Subject: [PATCH 06/76] requirements --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index d9bed0af..0a7c613c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ GitPython nbformat sentry-sdk pytest-sentry -queryparser-python3 +sqlparse -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index b0b1c85f..43f39654 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ "black>=22.10.0", "bs4", "GitPython", - "queryparser-python3", + "sqlparse", "nbformat", "giturlparse", "sentry-sdk", From 6dcc1e2653ab36a57b33c89defcc40cd18435923 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 18 Jul 2024 17:23:59 +0200 Subject: [PATCH 07/76] using the two libraries combined --- cdci_data_analysis/analysis/ivoa_helper.py | 36 ++++++++++++++++------ requirements.txt | 1 + setup.py | 1 + 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 38f445ed..4d444c58 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,3 +1,5 @@ +from queryparser.adql import ADQLQueryTranslator +from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.exceptions import QuerySyntaxError import sqlparse @@ -13,25 +15,39 @@ def parse_adql_query(query): try: - output_obj = dict() + adt = ADQLQueryTranslator(query) + qp = PostgreSQLQueryProcessor() + qp.set_query(adt.to_postgresql()) + qp.process_query() + + output_obj = dict( + columns = qp.columns, + display_columns = qp.display_columns, + tables = qp.tables, + rest = qp + ) + # output_obj = dict() parsed_query_obj = sqlparse.parse(query)[0] - from_seen = False + # from_seen = False for t in parsed_query_obj.tokens: if isinstance(t, sqlparse.sql.Where): output_obj['where_token'] = t - if from_seen: - if isinstance(t, sqlparse.sql.Identifier): - output_obj['tables'] = [t.get_name()] - elif isinstance(t, sqlparse.sql.IdentifierList): - output_obj['tables'] = [x.get_name() for x in t.get_identifiers()] - if t.is_keyword and t.ttype is sqlparse.tokens.Keyword and t.value.upper() == 'FROM': - from_seen = True + # if from_seen: + # if isinstance(t, sqlparse.sql.Identifier): + # output_obj['tables'] = [t.get_name()] + # elif isinstance(t, sqlparse.sql.IdentifierList): + # output_obj['tables'] = [x.get_name() for x in t.get_identifiers()] + # if t.is_keyword and t.ttype is sqlparse.tokens.Keyword and t.value.upper() == 'FROM': + # from_seen = True except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( where_token = None, - tables = None + tables = None, + columns = None, + display_columns = None, + rest = None ) return output_obj diff --git a/requirements.txt b/requirements.txt index 0a7c613c..272c6fff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,6 +30,7 @@ nbformat sentry-sdk pytest-sentry sqlparse +queryparser-python3 -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index 43f39654..9ae39326 100644 --- a/setup.py +++ b/setup.py @@ -46,6 +46,7 @@ "bs4", "GitPython", "sqlparse", + "queryparser-python3", "nbformat", "giturlparse", "sentry-sdk", From eec215c9a9a0e36bd6538df152cd035e12dc5889 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 23 Jul 2024 19:00:31 +0200 Subject: [PATCH 08/76] some where clauses extraction --- cdci_data_analysis/analysis/ivoa_helper.py | 63 +++++++++++++++------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 4d444c58..5aa32464 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,7 +1,11 @@ +import antlr4 from queryparser.adql import ADQLQueryTranslator from queryparser.postgresql import PostgreSQLQueryProcessor +from queryparser.postgresql.PostgreSQLParser import PostgreSQLParser from queryparser.exceptions import QuerySyntaxError +from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener + import sqlparse import json @@ -9,45 +13,64 @@ from ..analysis import drupal_helper - logger = app_logging.getLogger('ivoa_helper') +class WhereClauseListener(PostgreSQLParserListener): + def __init__(self): + self.where_clause = None + + def enterWhere_clause(self, ctx): + conditions = self.extract_elements(ctx) + self.where_clause = conditions + + def extract_elements(self, node): + elements = [] + for child in node.getChildren(): + if isinstance(child, PostgreSQLParser.ExpressionContext): + elements.extend(self.extract_elements(child)) + else: + elements.append(child.getText()) + return elements + + def parse_adql_query(query): try: + # queryparser adt = ADQLQueryTranslator(query) qp = PostgreSQLQueryProcessor() + where_listener = WhereClauseListener() qp.set_query(adt.to_postgresql()) qp.process_query() + inpt = antlr4.InputStream(query) + lexer = qp.lexer(inpt) + stream = antlr4.CommonTokenStream(lexer) + parser = qp.parser(stream) + tree = parser.query() + qp.walker.walk(where_listener, tree) + output_obj = dict( - columns = qp.columns, - display_columns = qp.display_columns, - tables = qp.tables, - rest = qp + columns=qp.display_columns, + tables=qp.tables, + rest=qp, + where_clause=where_listener.where_clause ) - # output_obj = dict() + + # sqlparse parsed_query_obj = sqlparse.parse(query)[0] - # from_seen = False + for t in parsed_query_obj.tokens: if isinstance(t, sqlparse.sql.Where): output_obj['where_token'] = t - # if from_seen: - # if isinstance(t, sqlparse.sql.Identifier): - # output_obj['tables'] = [t.get_name()] - # elif isinstance(t, sqlparse.sql.IdentifierList): - # output_obj['tables'] = [x.get_name() for x in t.get_identifiers()] - # if t.is_keyword and t.ttype is sqlparse.tokens.Keyword and t.value.upper() == 'FROM': - # from_seen = True except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - where_token = None, - tables = None, - columns = None, - display_columns = None, - rest = None + where_clause=None, + tables=None, + columns=None, + rest=None ) return output_obj @@ -83,4 +106,4 @@ def run_ivoa_query_from_product_gallery(product_gallery_url, output_list = json.dumps(output_get) - return output_list \ No newline at end of file + return output_list From 6759c7aecc13e06bdd83ce13f3869c5e05d6064f Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 26 Jul 2024 19:46:24 +0200 Subject: [PATCH 09/76] testing breadth-first --- cdci_data_analysis/analysis/ivoa_helper.py | 74 ++++++++++++++++++++-- 1 file changed, 67 insertions(+), 7 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 5aa32464..a93d5e65 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -3,6 +3,7 @@ from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.postgresql.PostgreSQLParser import PostgreSQLParser from queryparser.exceptions import QuerySyntaxError +from collections import deque from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener @@ -21,17 +22,76 @@ def __init__(self): self.where_clause = None def enterWhere_clause(self, ctx): - conditions = self.extract_elements(ctx) + conditions = self.analyze_expressions(ctx) self.where_clause = conditions - def extract_elements(self, node): - elements = [] + def analyze_expressions(self, node): + output_obj = dict() for child in node.getChildren(): if isinstance(child, PostgreSQLParser.ExpressionContext): - elements.extend(self.extract_elements(child)) - else: - elements.append(child.getText()) - return elements + output_obj['conditions'] = self.extract_conditions_from_hierarchy(child) + return output_obj + + # def extract_conditions_from_hierarchy(self, context, level=0, conditions=None): + # bottom_reached = False + # if conditions is None: + # conditions = [] + # if isinstance(context, antlr4.ParserRuleContext): + # print(f"{' ' * level} - {type(context).__name__}, level: {level}") + # if isinstance(context, PostgreSQLParser.Bool_primaryContext): + # print("Bool_primaryContext reached") + # conditions.append({}) + # elif isinstance(context, PostgreSQLParser.Column_nameContext): + # print("Column_nameContext reached") + # # conditions[-1]['column'] = context.getText() + # bottom_reached = True + # elif isinstance(context, PostgreSQLParser.Relational_opContext): + # print("Relational_opContext reached") + # bottom_reached = True + # # conditions[-1]['operator'] = context.getText() + # elif isinstance(context, PostgreSQLParser.Number_literalContext): + # print("Number_literalContext reached") + # # conditions[-1]['value'] = context.getText() + # bottom_reached = True + # if not bottom_reached: + # for child in context.children: + # print(f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") + # conditions.extend(self.extract_conditions_from_hierarchy(child, level + 1, conditions=conditions)) + # return conditions + + from collections import deque + + def extract_conditions_from_hierarchy(self, context, conditions=None): + if conditions is None: + conditions = [] + + queue = deque([(context, 0)]) + + while queue: + context, level = queue.popleft() + + if isinstance(context, antlr4.ParserRuleContext): + print(f"{' ' * level} - {type(context).__name__}, level: {level}") + if isinstance(context, PostgreSQLParser.Bool_primaryContext): + print("Bool_primaryContext reached") + conditions.append({}) + elif isinstance(context, PostgreSQLParser.Column_nameContext): + print("Column_nameContext reached") + conditions[-1]['column'] = context.getText() + elif isinstance(context, PostgreSQLParser.Relational_opContext): + print("Relational_opContext reached") + conditions[-1]['operator'] = context.getText() + elif isinstance(context, PostgreSQLParser.Number_literalContext): + print("Number_literalContext reached") + conditions[-1]['value'] = context.getText() + # else: + # Enqueue all children of the current node, with their level increased by 1 + for child in context.children: + print( + f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") + queue.append((child, level + 1)) + + return conditions def parse_adql_query(query): From 90c8e4f9b884ad1ac90e1d5cc36bbd015487e4df Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 29 Jul 2024 18:02:48 +0200 Subject: [PATCH 10/76] using breadth-first --- cdci_data_analysis/analysis/ivoa_helper.py | 42 ++++------------------ 1 file changed, 7 insertions(+), 35 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index a93d5e65..647c5916 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -32,41 +32,12 @@ def analyze_expressions(self, node): output_obj['conditions'] = self.extract_conditions_from_hierarchy(child) return output_obj - # def extract_conditions_from_hierarchy(self, context, level=0, conditions=None): - # bottom_reached = False - # if conditions is None: - # conditions = [] - # if isinstance(context, antlr4.ParserRuleContext): - # print(f"{' ' * level} - {type(context).__name__}, level: {level}") - # if isinstance(context, PostgreSQLParser.Bool_primaryContext): - # print("Bool_primaryContext reached") - # conditions.append({}) - # elif isinstance(context, PostgreSQLParser.Column_nameContext): - # print("Column_nameContext reached") - # # conditions[-1]['column'] = context.getText() - # bottom_reached = True - # elif isinstance(context, PostgreSQLParser.Relational_opContext): - # print("Relational_opContext reached") - # bottom_reached = True - # # conditions[-1]['operator'] = context.getText() - # elif isinstance(context, PostgreSQLParser.Number_literalContext): - # print("Number_literalContext reached") - # # conditions[-1]['value'] = context.getText() - # bottom_reached = True - # if not bottom_reached: - # for child in context.children: - # print(f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") - # conditions.extend(self.extract_conditions_from_hierarchy(child, level + 1, conditions=conditions)) - # return conditions - - from collections import deque - def extract_conditions_from_hierarchy(self, context, conditions=None): if conditions is None: conditions = [] queue = deque([(context, 0)]) - + column_level = relation_level = number_literal_level = 0 while queue: context, level = queue.popleft() @@ -77,15 +48,16 @@ def extract_conditions_from_hierarchy(self, context, conditions=None): conditions.append({}) elif isinstance(context, PostgreSQLParser.Column_nameContext): print("Column_nameContext reached") - conditions[-1]['column'] = context.getText() + conditions[column_level]['column'] = context.getText() + column_level += 1 elif isinstance(context, PostgreSQLParser.Relational_opContext): print("Relational_opContext reached") - conditions[-1]['operator'] = context.getText() + conditions[relation_level]['operator'] = context.getText() + relation_level += 1 elif isinstance(context, PostgreSQLParser.Number_literalContext): print("Number_literalContext reached") - conditions[-1]['value'] = context.getText() - # else: - # Enqueue all children of the current node, with their level increased by 1 + conditions[number_literal_level]['value'] = context.getText() + number_literal_level += 1 for child in context.children: print( f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") From 2c0030bae61a2979c9d97dd4fb4b3bb79fa80c01 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 30 Jul 2024 18:30:48 +0200 Subject: [PATCH 11/76] no sqlparse --- cdci_data_analysis/analysis/ivoa_helper.py | 45 +++++++++------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 647c5916..0a76596f 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -2,14 +2,12 @@ from queryparser.adql import ADQLQueryTranslator from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.postgresql.PostgreSQLParser import PostgreSQLParser +from queryparser.mysql import MySQLQueryProcessor from queryparser.exceptions import QuerySyntaxError from collections import deque from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener -import sqlparse -import json - from ..app_logging import app_logging from ..analysis import drupal_helper @@ -70,32 +68,25 @@ def parse_adql_query(query): try: # queryparser adt = ADQLQueryTranslator(query) - qp = PostgreSQLQueryProcessor() - where_listener = WhereClauseListener() - qp.set_query(adt.to_postgresql()) + qp = MySQLQueryProcessor() + qp.set_query(adt.to_mysql()) qp.process_query() - inpt = antlr4.InputStream(query) - lexer = qp.lexer(inpt) - stream = antlr4.CommonTokenStream(lexer) - parser = qp.parser(stream) - tree = parser.query() - qp.walker.walk(where_listener, tree) + # where_listener = WhereClauseListener() + # inpt = antlr4.InputStream(query) + # lexer = qp.lexer(inpt) + # stream = antlr4.CommonTokenStream(lexer) + # parser = qp.parser(stream) + # tree = parser.query() + # qp.walker.walk(where_listener, tree) output_obj = dict( columns=qp.display_columns, tables=qp.tables, rest=qp, - where_clause=where_listener.where_clause + # where_clause=where_listener.where_clause ) - # sqlparse - parsed_query_obj = sqlparse.parse(query)[0] - - for t in parsed_query_obj.tokens: - if isinstance(t, sqlparse.sql.Where): - output_obj['where_token'] = t - except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( @@ -130,12 +121,12 @@ def run_ivoa_query_from_product_gallery(product_gallery_url, gallery_jwt_token, sentry_dsn=None, **kwargs): - output_get = drupal_helper.get_data_product_list_by_source_name_with_conditions( - product_gallery_url=product_gallery_url, - gallery_jwt_token=gallery_jwt_token, - sentry_dsn=sentry_dsn, - **kwargs) - - output_list = json.dumps(output_get) + # output_get = drupal_helper.get_data_product_list_by_source_name_with_conditions( + # product_gallery_url=product_gallery_url, + # gallery_jwt_token=gallery_jwt_token, + # sentry_dsn=sentry_dsn, + # **kwargs) + # + # output_list = json.dumps(output_get) return output_list From 16580c26c11b34a24cc163c1aa73a091a1683adb Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 30 Jul 2024 19:11:51 +0200 Subject: [PATCH 12/76] querying mysql gallery database --- cdci_data_analysis/analysis/ivoa_helper.py | 69 +++++++++++----------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 0a76596f..d78b0dc3 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -6,6 +6,8 @@ from queryparser.exceptions import QuerySyntaxError from collections import deque +from mysql.connector import connect, Error + from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener from ..app_logging import app_logging @@ -72,28 +74,22 @@ def parse_adql_query(query): qp.set_query(adt.to_mysql()) qp.process_query() - # where_listener = WhereClauseListener() - # inpt = antlr4.InputStream(query) - # lexer = qp.lexer(inpt) - # stream = antlr4.CommonTokenStream(lexer) - # parser = qp.parser(stream) - # tree = parser.query() - # qp.walker.walk(where_listener, tree) - output_obj = dict( columns=qp.display_columns, tables=qp.tables, rest=qp, + mysql_query=qp.query # where_clause=where_listener.where_clause ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - where_clause=None, + # where_clause=None, tables=None, columns=None, - rest=None + rest=None, + mysql_query=None ) return output_obj @@ -102,31 +98,36 @@ def run_ivoa_query(query, sentry_dsn=None, **kwargs): result_list = [] parsed_query_obj = parse_adql_query(query) - tables = parsed_query_obj.get('tables', []) - if len(tables) == 1 and tables[0] == 'product_gallery': - logger.info('Query is a product_gallery query') - product_gallery_url = kwargs.get('product_gallery_url', None) - gallery_jwt_token = kwargs.get('gallery_jwt_token', None) - if product_gallery_url and gallery_jwt_token: - result_list = run_ivoa_query_from_product_gallery( - product_gallery_url, - gallery_jwt_token, - sentry_dsn=sentry_dsn, - **kwargs - ) + # tables = parsed_query_obj.get('tables', []) + # if len(tables) == 1 and tables[0] == 'product_gallery': + logger.info('Performing query on the product_gallery') + # product_gallery_url = kwargs.get('product_gallery_url', None) + # gallery_jwt_token = kwargs.get('gallery_jwt_token', None) + # if product_gallery_url and gallery_jwt_token: + result_list = run_ivoa_query_from_product_gallery(parsed_query_obj) return result_list -def run_ivoa_query_from_product_gallery(product_gallery_url, - gallery_jwt_token, - sentry_dsn=None, - **kwargs): - # output_get = drupal_helper.get_data_product_list_by_source_name_with_conditions( - # product_gallery_url=product_gallery_url, - # gallery_jwt_token=gallery_jwt_token, - # sentry_dsn=sentry_dsn, - # **kwargs) - # - # output_list = json.dumps(output_get) +def run_ivoa_query_from_product_gallery(parsed_query_obj): + result_list = [] + + try: + with connect( + # TODO: Add the connection details reading from the config file + host="", + user="", + password="", + database="" + ) as connection: + print(connection) + + create_db_query = parsed_query_obj.get('mysql_query') + with connection.cursor() as cursor: + cursor.execute(create_db_query) + for db in cursor: + print(db) + + except Error as e: + print(e) - return output_list + return result_list From 30d1ed8ce395de9fe827e3c7c40b1785767fb7a5 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 10:45:28 +0200 Subject: [PATCH 13/76] todo and removed commented lines --- cdci_data_analysis/analysis/ivoa_helper.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index d78b0dc3..e65fe0d8 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -79,13 +79,11 @@ def parse_adql_query(query): tables=qp.tables, rest=qp, mysql_query=qp.query - # where_clause=where_listener.where_clause ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - # where_clause=None, tables=None, columns=None, rest=None, @@ -95,9 +93,9 @@ def parse_adql_query(query): def run_ivoa_query(query, sentry_dsn=None, **kwargs): - result_list = [] parsed_query_obj = parse_adql_query(query) + # TODO use a specific dedicated table and schema to refer to the product_gallery DB ? # tables = parsed_query_obj.get('tables', []) # if len(tables) == 1 and tables[0] == 'product_gallery': logger.info('Performing query on the product_gallery') From 27708b20b739d318da64a31b289a2067fc984bb6 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 12:05:48 +0200 Subject: [PATCH 14/76] vo options in the dispatcher config --- cdci_data_analysis/config_dir/conf_env.yml.example | 8 +++++++- cdci_data_analysis/configurer.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/cdci_data_analysis/config_dir/conf_env.yml.example b/cdci_data_analysis/config_dir/conf_env.yml.example index 2dd65868..82f2cd0f 100644 --- a/cdci_data_analysis/config_dir/conf_env.yml.example +++ b/cdci_data_analysis/config_dir/conf_env.yml.example @@ -122,4 +122,10 @@ dispatcher: # url for the conversion of a given time, in UTC format, to the correspondent REVNUM converttime_revnum_service_url: COVERTTIME_REVNUM_SERVICE_URL - + # virtual observatory related configurations (eg mysql credentials) + vo_options: + # mysql credentials + vo_mysql_pg_host: MYSQL_PG_HOST + vo_mysql_pg_user: MYSQL_PG_USER + vo_mysql_pg_password: MYSQL_PG_PASSWORD + vo_mysql_pg_db: MYSQL_PG_DB diff --git a/cdci_data_analysis/configurer.py b/cdci_data_analysis/configurer.py index d35e1489..a84f3b86 100644 --- a/cdci_data_analysis/configurer.py +++ b/cdci_data_analysis/configurer.py @@ -266,6 +266,10 @@ def __init__(self, cfg_dict, origin=None): disp_dict.get('renku_options', {}).get('renku_gitlab_repository_url', None), disp_dict.get('renku_options', {}).get('renku_base_project_url', None), disp_dict.get('renku_options', {}).get('ssh_key_path', None), + disp_dict.get('vo_options', {}).get('vo_mysql_pg_host', None), + disp_dict.get('vo_options', {}).get('vo_mysql_pg_user', None), + disp_dict.get('vo_options', {}).get('vo_mysql_pg_password', None), + disp_dict.get('vo_options', {}).get('vo_mysql_pg_db', None), ) # not used? @@ -344,6 +348,10 @@ def set_conf_dispatcher(self, renku_gitlab_repository_url, renku_base_project_url, renku_gitlab_ssh_key_path, + vo_mysql_pg_host, + vo_mysql_pg_user, + vo_mysql_pg_password, + vo_mysql_pg_db ): # Generic to dispatcher #print(dispatcher_url, dispatcher_port) @@ -395,6 +403,10 @@ def set_conf_dispatcher(self, self.renku_gitlab_repository_url = renku_gitlab_repository_url self.renku_gitlab_ssh_key_path = renku_gitlab_ssh_key_path self.renku_base_project_url = renku_base_project_url + self.vo_mysql_pg_host = vo_mysql_pg_host + self.vo_mysql_pg_user = vo_mysql_pg_user + self.vo_mysql_pg_password = vo_mysql_pg_password + self.vo_mysql_pg_db = vo_mysql_pg_db def get_data_serve_conf(self, instr_name): if instr_name in self.data_server_conf_dict.keys(): From 83d119aacfc6c9e5207eb2d73c30820ba35a2f78 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 12:06:06 +0200 Subject: [PATCH 15/76] extracting mysql parameters from config --- cdci_data_analysis/analysis/ivoa_helper.py | 31 +++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index e65fe0d8..0cead573 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -99,26 +99,33 @@ def run_ivoa_query(query, sentry_dsn=None, **kwargs): # tables = parsed_query_obj.get('tables', []) # if len(tables) == 1 and tables[0] == 'product_gallery': logger.info('Performing query on the product_gallery') - # product_gallery_url = kwargs.get('product_gallery_url', None) - # gallery_jwt_token = kwargs.get('gallery_jwt_token', None) - # if product_gallery_url and gallery_jwt_token: - result_list = run_ivoa_query_from_product_gallery(parsed_query_obj) + vo_mysql_pg_host = kwargs.get('vo_mysql_pg_host', None) + vo_mysql_pg_user = kwargs.get('vo_mysql_pg_user', None) + vo_mysql_pg_password = kwargs.get('vo_mysql_pg_password', None) + vo_mysql_pg_db = kwargs.get('vo_mysql_pg_db', None) + result_list = run_ivoa_query_from_product_gallery(parsed_query_obj, + vo_mysql_pg_host=vo_mysql_pg_host, + vo_mysql_pg_user=vo_mysql_pg_user, + vo_mysql_pg_password=vo_mysql_pg_password, + vo_mysql_pg_db=vo_mysql_pg_db) return result_list -def run_ivoa_query_from_product_gallery(parsed_query_obj): +def run_ivoa_query_from_product_gallery(parsed_query_obj, + vo_mysql_pg_host, + vo_mysql_pg_user, + vo_mysql_pg_password, + vo_mysql_pg_db + ): result_list = [] try: with connect( - # TODO: Add the connection details reading from the config file - host="", - user="", - password="", - database="" + host=vo_mysql_pg_host, + user=vo_mysql_pg_user, + password=vo_mysql_pg_password, + database=vo_mysql_pg_db ) as connection: - print(connection) - create_db_query = parsed_query_obj.get('mysql_query') with connection.cursor() as cursor: cursor.execute(create_db_query) From 89f46d6f00a5d0d9da032cc05e5dd155ccd21920 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 12:09:24 +0200 Subject: [PATCH 16/76] no need for breadth first search --- cdci_data_analysis/analysis/ivoa_helper.py | 51 ---------------------- 1 file changed, 51 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 0cead573..ddc8c159 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -12,60 +12,9 @@ from ..app_logging import app_logging -from ..analysis import drupal_helper - logger = app_logging.getLogger('ivoa_helper') -class WhereClauseListener(PostgreSQLParserListener): - def __init__(self): - self.where_clause = None - - def enterWhere_clause(self, ctx): - conditions = self.analyze_expressions(ctx) - self.where_clause = conditions - - def analyze_expressions(self, node): - output_obj = dict() - for child in node.getChildren(): - if isinstance(child, PostgreSQLParser.ExpressionContext): - output_obj['conditions'] = self.extract_conditions_from_hierarchy(child) - return output_obj - - def extract_conditions_from_hierarchy(self, context, conditions=None): - if conditions is None: - conditions = [] - - queue = deque([(context, 0)]) - column_level = relation_level = number_literal_level = 0 - while queue: - context, level = queue.popleft() - - if isinstance(context, antlr4.ParserRuleContext): - print(f"{' ' * level} - {type(context).__name__}, level: {level}") - if isinstance(context, PostgreSQLParser.Bool_primaryContext): - print("Bool_primaryContext reached") - conditions.append({}) - elif isinstance(context, PostgreSQLParser.Column_nameContext): - print("Column_nameContext reached") - conditions[column_level]['column'] = context.getText() - column_level += 1 - elif isinstance(context, PostgreSQLParser.Relational_opContext): - print("Relational_opContext reached") - conditions[relation_level]['operator'] = context.getText() - relation_level += 1 - elif isinstance(context, PostgreSQLParser.Number_literalContext): - print("Number_literalContext reached") - conditions[number_literal_level]['value'] = context.getText() - number_literal_level += 1 - for child in context.children: - print( - f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") - queue.append((child, level + 1)) - - return conditions - - def parse_adql_query(query): try: # queryparser From b8289103d440b0f1254c01551d8f45aadaf7c54e Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 12:39:17 +0200 Subject: [PATCH 17/76] sentry in case of error --- cdci_data_analysis/analysis/ivoa_helper.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index ddc8c159..ae12777e 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,15 +1,11 @@ import antlr4 from queryparser.adql import ADQLQueryTranslator -from queryparser.postgresql import PostgreSQLQueryProcessor -from queryparser.postgresql.PostgreSQLParser import PostgreSQLParser from queryparser.mysql import MySQLQueryProcessor from queryparser.exceptions import QuerySyntaxError -from collections import deque from mysql.connector import connect, Error -from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener - +from ..flask_app.sentry import sentry from ..app_logging import app_logging logger = app_logging.getLogger('ivoa_helper') @@ -41,7 +37,7 @@ def parse_adql_query(query): return output_obj -def run_ivoa_query(query, sentry_dsn=None, **kwargs): +def run_ivoa_query(query, **kwargs): parsed_query_obj = parse_adql_query(query) # TODO use a specific dedicated table and schema to refer to the product_gallery DB ? @@ -79,9 +75,10 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, with connection.cursor() as cursor: cursor.execute(create_db_query) for db in cursor: - print(db) + logger.info(db) except Error as e: - print(e) + sentry.capture_message(f"Error when connecting to MySQL or performing the query: {str(e)}") + logger.error(f"Error when connecting to MySQL or performing the query: {str(e)}") return result_list From 34bdea841a4f02c24f3bce242e1b987d61e39477 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 13:00:50 +0200 Subject: [PATCH 18/76] capturing general exception --- cdci_data_analysis/analysis/ivoa_helper.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index ae12777e..e73da1b3 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -76,9 +76,14 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, cursor.execute(create_db_query) for db in cursor: logger.info(db) + result_list.append(db) except Error as e: - sentry.capture_message(f"Error when connecting to MySQL or performing the query: {str(e)}") - logger.error(f"Error when connecting to MySQL or performing the query: {str(e)}") + sentry.capture_message(f"Error when connecting to MySQL: {str(e)}") + logger.error(f"Error when connecting to MySQL: {str(e)}") + + except Exception as e: + sentry.capture_message(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") + logger.error(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") return result_list From 1ee31a11e8378e7bc6d4062378a7fa8059c54519 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 17:30:19 +0200 Subject: [PATCH 19/76] dispatcher endpoint --- cdci_data_analysis/flask_app/app.py | 33 ++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 3188949f..37b9550f 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -27,7 +27,7 @@ import time as _time from urllib.parse import urlencode, urlparse -from cdci_data_analysis.analysis import drupal_helper, tokenHelper, renku_helper, email_helper, matrix_helper +from cdci_data_analysis.analysis import drupal_helper, tokenHelper, renku_helper, email_helper, matrix_helper, ivoa_helper from .logstash import logstash_message from .schemas import QueryOutJSON, dispatcher_strict_validate from marshmallow.exceptions import ValidationError @@ -415,6 +415,37 @@ def push_renku_branch(): "Our team is notified and is working on it.") +@app.route('/run_adql_query') +def run_adql_query(): + logger.info("request.args: %s ", request.args) + + token = request.args.get('token', None) + app_config = app.config.get('conf') + secret_key = app_config.secret_key + + output, output_code = tokenHelper.validate_token_from_request(token=token, secret_key=secret_key, + required_roles=['ivoa_user'], + action="run an ADQL query") + + if output_code is not None: + return make_response(output, output_code) + + adql_query = request.args.get('adql_query', None) + vo_mysql_pg_host = app_config.vo_mysql_pg_host + vo_mysql_pg_user = app_config.vo_mysql_pg_user + vo_mysql_pg_password = app_config.vo_mysql_pg_password + vo_mysql_pg_db = app_config.vo_mysql_pg_db + + result_query = ivoa_helper.run_ivoa_query(adql_query, + vo_mysql_pg_host=vo_mysql_pg_host, + vo_mysql_pg_user=vo_mysql_pg_user, + vo_mysql_pg_password=vo_mysql_pg_password, + vo_mysql_pg_db=vo_mysql_pg_db) + + output_request = json.dumps(result_query) + + return output_request + @app.route('/run_analysis', methods=['POST', 'GET']) def run_analysis(): From 40dc17e6ef4268b71315ecf405950f1d87030ebb Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 18:20:38 +0200 Subject: [PATCH 20/76] build product gallery path and jsonify the response --- cdci_data_analysis/analysis/ivoa_helper.py | 37 ++++++++++++++++++---- cdci_data_analysis/flask_app/app.py | 8 ++--- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index e73da1b3..b865fcf2 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,4 +1,7 @@ -import antlr4 +import json +import os.path + +from black.lines import append_leaves from queryparser.adql import ADQLQueryTranslator from queryparser.mysql import MySQLQueryProcessor from queryparser.exceptions import QuerySyntaxError @@ -48,11 +51,13 @@ def run_ivoa_query(query, **kwargs): vo_mysql_pg_user = kwargs.get('vo_mysql_pg_user', None) vo_mysql_pg_password = kwargs.get('vo_mysql_pg_password', None) vo_mysql_pg_db = kwargs.get('vo_mysql_pg_db', None) + product_gallery_url = kwargs.get('product_gallery_url', None) result_list = run_ivoa_query_from_product_gallery(parsed_query_obj, vo_mysql_pg_host=vo_mysql_pg_host, vo_mysql_pg_user=vo_mysql_pg_user, vo_mysql_pg_password=vo_mysql_pg_password, - vo_mysql_pg_db=vo_mysql_pg_db) + vo_mysql_pg_db=vo_mysql_pg_db, + product_gallery_url=product_gallery_url) return result_list @@ -60,7 +65,8 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, vo_mysql_pg_host, vo_mysql_pg_user, vo_mysql_pg_password, - vo_mysql_pg_db + vo_mysql_pg_db, + product_gallery_url=None ): result_list = [] @@ -72,11 +78,23 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, database=vo_mysql_pg_db ) as connection: create_db_query = parsed_query_obj.get('mysql_query') - with connection.cursor() as cursor: + with connection.cursor(dictionary=True) as cursor: cursor.execute(create_db_query) - for db in cursor: - logger.info(db) - result_list.append(db) + for row in cursor: + if product_gallery_url is not None: + path = row.get('path', None) + if path is not None: + if path.startswith('/'): + path = path[1:] + row['path'] = os.path.join(product_gallery_url, path) + path_alias = row.get('path_alias', None) + if path_alias is not None: + if path_alias.startswith('/'): + path_alias = path_alias[1:] + row['path_alias'] = os.path.join(product_gallery_url, path_alias) + result_list.append(row) + # result_obj = cursor.fetchall() + except Error as e: sentry.capture_message(f"Error when connecting to MySQL: {str(e)}") @@ -86,4 +104,9 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, sentry.capture_message(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") logger.error(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") + finally: + if connection is not None and connection.is_connected(): + connection.close() + logger.info('MySQL connection closed') + return result_list diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 37b9550f..b89ccd42 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -435,16 +435,16 @@ def run_adql_query(): vo_mysql_pg_user = app_config.vo_mysql_pg_user vo_mysql_pg_password = app_config.vo_mysql_pg_password vo_mysql_pg_db = app_config.vo_mysql_pg_db + product_gallery_url = app_config.product_gallery_url result_query = ivoa_helper.run_ivoa_query(adql_query, vo_mysql_pg_host=vo_mysql_pg_host, vo_mysql_pg_user=vo_mysql_pg_user, vo_mysql_pg_password=vo_mysql_pg_password, - vo_mysql_pg_db=vo_mysql_pg_db) - - output_request = json.dumps(result_query) + vo_mysql_pg_db=vo_mysql_pg_db, + product_gallery_url=product_gallery_url) - return output_request + return jsonify(result_query) @app.route('/run_analysis', methods=['POST', 'GET']) From 60c9bff2698493e379df206777e8b8a1e9c85ed8 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 12 Aug 2024 14:11:41 +0200 Subject: [PATCH 21/76] var renaming --- cdci_data_analysis/analysis/ivoa_helper.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index b865fcf2..be642171 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -77,9 +77,9 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, password=vo_mysql_pg_password, database=vo_mysql_pg_db ) as connection: - create_db_query = parsed_query_obj.get('mysql_query') + db_query = parsed_query_obj.get('mysql_query') with connection.cursor(dictionary=True) as cursor: - cursor.execute(create_db_query) + cursor.execute(db_query) for row in cursor: if product_gallery_url is not None: path = row.get('path', None) @@ -93,7 +93,6 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, path_alias = path_alias[1:] row['path_alias'] = os.path.join(product_gallery_url, path_alias) result_list.append(row) - # result_obj = cursor.fetchall() except Error as e: From bdc61b44321804e7c0e7ced73501e494568c022e Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 16 Aug 2024 09:13:23 +0200 Subject: [PATCH 22/76] removed unused imports --- cdci_data_analysis/analysis/ivoa_helper.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index be642171..461bb978 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,7 +1,5 @@ -import json import os.path -from black.lines import append_leaves from queryparser.adql import ADQLQueryTranslator from queryparser.mysql import MySQLQueryProcessor from queryparser.exceptions import QuerySyntaxError @@ -94,7 +92,6 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, row['path_alias'] = os.path.join(product_gallery_url, path_alias) result_list.append(row) - except Error as e: sentry.capture_message(f"Error when connecting to MySQL: {str(e)}") logger.error(f"Error when connecting to MySQL: {str(e)}") From 5342a7a23126ed98d32f16be239093f3eac5e1cd Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 26 Sep 2024 16:29:25 +0200 Subject: [PATCH 23/76] using postgresql --- cdci_data_analysis/analysis/ivoa_helper.py | 88 +++++++++++----------- cdci_data_analysis/configurer.py | 14 +++- cdci_data_analysis/flask_app/app.py | 16 ++-- 3 files changed, 63 insertions(+), 55 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 461bb978..32eda88e 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,10 +1,10 @@ import os.path from queryparser.adql import ADQLQueryTranslator -from queryparser.mysql import MySQLQueryProcessor +from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.exceptions import QuerySyntaxError -from mysql.connector import connect, Error +from psycopg2 import connect, DatabaseError from ..flask_app.sentry import sentry from ..app_logging import app_logging @@ -16,15 +16,16 @@ def parse_adql_query(query): try: # queryparser adt = ADQLQueryTranslator(query) - qp = MySQLQueryProcessor() - qp.set_query(adt.to_mysql()) - qp.process_query() + qp = PostgreSQLQueryProcessor() + qp.set_query(adt.to_postgresql()) + qp.process_query(replace_schema_name={'mmoda_pg_dev': 'public'}) output_obj = dict( columns=qp.display_columns, tables=qp.tables, rest=qp, - mysql_query=qp.query + mysql_query=None, + psql_query=qp.query ) except QuerySyntaxError as qe: @@ -33,7 +34,8 @@ def parse_adql_query(query): tables=None, columns=None, rest=None, - mysql_query=None + mysql_query=None, + psql_query=None ) return output_obj @@ -45,64 +47,58 @@ def run_ivoa_query(query, **kwargs): # tables = parsed_query_obj.get('tables', []) # if len(tables) == 1 and tables[0] == 'product_gallery': logger.info('Performing query on the product_gallery') - vo_mysql_pg_host = kwargs.get('vo_mysql_pg_host', None) - vo_mysql_pg_user = kwargs.get('vo_mysql_pg_user', None) - vo_mysql_pg_password = kwargs.get('vo_mysql_pg_password', None) - vo_mysql_pg_db = kwargs.get('vo_mysql_pg_db', None) + vo_psql_pg_host = kwargs.get('vo_psql_pg_host', None) + vo_psql_pg_user = kwargs.get('vo_psql_pg_user', None) + vo_psql_pg_password = kwargs.get('vo_psql_pg_password', None) + vo_psql_pg_db = kwargs.get('vo_psql_pg_db', None) product_gallery_url = kwargs.get('product_gallery_url', None) result_list = run_ivoa_query_from_product_gallery(parsed_query_obj, - vo_mysql_pg_host=vo_mysql_pg_host, - vo_mysql_pg_user=vo_mysql_pg_user, - vo_mysql_pg_password=vo_mysql_pg_password, - vo_mysql_pg_db=vo_mysql_pg_db, + vo_psql_pg_host=vo_psql_pg_host, + vo_psql_pg_user=vo_psql_pg_user, + vo_psql_pg_password=vo_psql_pg_password, + vo_psql_pg_db=vo_psql_pg_db, product_gallery_url=product_gallery_url) return result_list def run_ivoa_query_from_product_gallery(parsed_query_obj, - vo_mysql_pg_host, - vo_mysql_pg_user, - vo_mysql_pg_password, - vo_mysql_pg_db, + vo_psql_pg_host, + vo_psql_pg_user, + vo_psql_pg_password, + vo_psql_pg_db, product_gallery_url=None ): result_list = [] try: with connect( - host=vo_mysql_pg_host, - user=vo_mysql_pg_user, - password=vo_mysql_pg_password, - database=vo_mysql_pg_db + host=vo_psql_pg_host, + database=vo_psql_pg_db, + user=vo_psql_pg_user, + password=vo_psql_pg_password ) as connection: - db_query = parsed_query_obj.get('mysql_query') - with connection.cursor(dictionary=True) as cursor: + db_query = parsed_query_obj.get('psql_query') + with connection.cursor() as cursor: cursor.execute(db_query) for row in cursor: + list_row = list(row) if product_gallery_url is not None: - path = row.get('path', None) - if path is not None: - if path.startswith('/'): - path = path[1:] - row['path'] = os.path.join(product_gallery_url, path) - path_alias = row.get('path_alias', None) - if path_alias is not None: - if path_alias.startswith('/'): - path_alias = path_alias[1:] - row['path_alias'] = os.path.join(product_gallery_url, path_alias) - result_list.append(row) - - except Error as e: - sentry.capture_message(f"Error when connecting to MySQL: {str(e)}") - logger.error(f"Error when connecting to MySQL: {str(e)}") - - except Exception as e: - sentry.capture_message(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") - logger.error(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") + for index, value in enumerate(list_row): + description = cursor.description[index] + if description.name in {'path', 'path_alias'}: + if list_row[index].startswith('/'): + list_row[index] = row[index][1:] + list_row[index] = os.path.join(product_gallery_url, list_row[index]) + result_list.append(list_row) + + except (Exception, DatabaseError) as e: + sentry.capture_message(f"Error when querying to the Postgresql server: {str(e)}") + logger.error(f"Error when querying to the Postgresql server: {str(e)}") finally: - if connection is not None and connection.is_connected(): + if connection is not None: + cursor.close() connection.close() - logger.info('MySQL connection closed') + logger.info('Database connection closed') return result_list diff --git a/cdci_data_analysis/configurer.py b/cdci_data_analysis/configurer.py index a84f3b86..2e939c8b 100644 --- a/cdci_data_analysis/configurer.py +++ b/cdci_data_analysis/configurer.py @@ -270,6 +270,10 @@ def __init__(self, cfg_dict, origin=None): disp_dict.get('vo_options', {}).get('vo_mysql_pg_user', None), disp_dict.get('vo_options', {}).get('vo_mysql_pg_password', None), disp_dict.get('vo_options', {}).get('vo_mysql_pg_db', None), + disp_dict.get('vo_options', {}).get('vo_psql_pg_host', None), + disp_dict.get('vo_options', {}).get('vo_psql_pg_user', None), + disp_dict.get('vo_options', {}).get('vo_psql_pg_password', None), + disp_dict.get('vo_options', {}).get('vo_psql_pg_db', None) ) # not used? @@ -351,7 +355,11 @@ def set_conf_dispatcher(self, vo_mysql_pg_host, vo_mysql_pg_user, vo_mysql_pg_password, - vo_mysql_pg_db + vo_mysql_pg_db, + vo_psql_pg_host, + vo_psql_pg_user, + vo_psql_pg_password, + vo_psql_pg_db ): # Generic to dispatcher #print(dispatcher_url, dispatcher_port) @@ -407,6 +415,10 @@ def set_conf_dispatcher(self, self.vo_mysql_pg_user = vo_mysql_pg_user self.vo_mysql_pg_password = vo_mysql_pg_password self.vo_mysql_pg_db = vo_mysql_pg_db + self.vo_psql_pg_host = vo_psql_pg_host + self.vo_psql_pg_user = vo_psql_pg_user + self.vo_psql_pg_password = vo_psql_pg_password + self.vo_psql_pg_db = vo_psql_pg_db def get_data_serve_conf(self, instr_name): if instr_name in self.data_server_conf_dict.keys(): diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 3ef840f9..1df2aeb9 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -446,17 +446,17 @@ def run_adql_query(): return make_response(output, output_code) adql_query = request.args.get('adql_query', None) - vo_mysql_pg_host = app_config.vo_mysql_pg_host - vo_mysql_pg_user = app_config.vo_mysql_pg_user - vo_mysql_pg_password = app_config.vo_mysql_pg_password - vo_mysql_pg_db = app_config.vo_mysql_pg_db + vo_psql_pg_host = app_config.vo_psql_pg_host + vo_psql_pg_user = app_config.vo_psql_pg_user + vo_psql_pg_password = app_config.vo_psql_pg_password + vo_psql_pg_db = app_config.vo_psql_pg_db product_gallery_url = app_config.product_gallery_url result_query = ivoa_helper.run_ivoa_query(adql_query, - vo_mysql_pg_host=vo_mysql_pg_host, - vo_mysql_pg_user=vo_mysql_pg_user, - vo_mysql_pg_password=vo_mysql_pg_password, - vo_mysql_pg_db=vo_mysql_pg_db, + vo_psql_pg_host=vo_psql_pg_host, + vo_psql_pg_user=vo_psql_pg_user, + vo_psql_pg_password=vo_psql_pg_password, + vo_psql_pg_db=vo_psql_pg_db, product_gallery_url=product_gallery_url) return jsonify(result_query) From bf1cd4977f675431ecc4b8875b6ddc23f22c1ab2 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 26 Sep 2024 16:50:39 +0200 Subject: [PATCH 24/76] sanitize request values --- cdci_data_analysis/flask_app/app.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 1df2aeb9..5179aa14 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -432,9 +432,13 @@ def push_renku_branch(): @app.route('/run_adql_query') def run_adql_query(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_request_values = sanitize_dict_before_log(par_dic) + logger.info('\033[32m===========================> run_adql_query\033[0m') - token = request.args.get('token', None) + logger.info('\033[33m raw request values: %s \033[0m', dict(sanitized_request_values)) + + token = par_dic.get('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -445,7 +449,7 @@ def run_adql_query(): if output_code is not None: return make_response(output, output_code) - adql_query = request.args.get('adql_query', None) + adql_query = par_dic.get('adql_query', None) vo_psql_pg_host = app_config.vo_psql_pg_host vo_psql_pg_user = app_config.vo_psql_pg_user vo_psql_pg_password = app_config.vo_psql_pg_password From 5d53425640763c06445fd84f7f102e9ded1eb546 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 14:36:02 +0200 Subject: [PATCH 25/76] using value var --- cdci_data_analysis/analysis/ivoa_helper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 32eda88e..2192f310 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -85,10 +85,10 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, if product_gallery_url is not None: for index, value in enumerate(list_row): description = cursor.description[index] - if description.name in {'path', 'path_alias'}: - if list_row[index].startswith('/'): - list_row[index] = row[index][1:] - list_row[index] = os.path.join(product_gallery_url, list_row[index]) + if description.name in {'path', 'path_alias'} and value is not None and isinstance(value, str): + if value.startswith('/'): + value = value[1:] + list_row[index] = os.path.join(product_gallery_url, value) result_list.append(list_row) except (Exception, DatabaseError) as e: From 309e2adb4601bc07d9d19ba52a6f83821028d73e Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 14:36:13 +0200 Subject: [PATCH 26/76] postgresql connector library --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9ae39326..7bed627e 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,8 @@ "giturlparse", "sentry-sdk", "validators==0.28.3", - "jsonschema<=4.17.3" + "jsonschema<=4.17.3", + 'psycopg2' ] test_req = [ From 08ad2ab3d8cbd01c894f1c412f804961d6c73b8c Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 14:55:48 +0200 Subject: [PATCH 27/76] no query parsing --- cdci_data_analysis/analysis/ivoa_helper.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 2192f310..96913bd2 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -14,26 +14,16 @@ def parse_adql_query(query): try: - # queryparser adt = ADQLQueryTranslator(query) - qp = PostgreSQLQueryProcessor() - qp.set_query(adt.to_postgresql()) - qp.process_query(replace_schema_name={'mmoda_pg_dev': 'public'}) output_obj = dict( - columns=qp.display_columns, - tables=qp.tables, - rest=qp, mysql_query=None, - psql_query=qp.query + psql_query=adt.to_postgresql() ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - tables=None, - columns=None, - rest=None, mysql_query=None, psql_query=None ) From ec8d01857524ce30caba05b2a74401a68d2dac0b Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 15:01:17 +0200 Subject: [PATCH 28/76] not needed import --- cdci_data_analysis/analysis/ivoa_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 96913bd2..db01cce1 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,7 +1,6 @@ import os.path from queryparser.adql import ADQLQueryTranslator -from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.exceptions import QuerySyntaxError from psycopg2 import connect, DatabaseError From f5df2839511be5fe17ab7aacf16d795c52b08817 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 15:25:03 +0200 Subject: [PATCH 29/76] adapted conf example --- cdci_data_analysis/config_dir/conf_env.yml.example | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cdci_data_analysis/config_dir/conf_env.yml.example b/cdci_data_analysis/config_dir/conf_env.yml.example index 82f2cd0f..dd8771cc 100644 --- a/cdci_data_analysis/config_dir/conf_env.yml.example +++ b/cdci_data_analysis/config_dir/conf_env.yml.example @@ -129,3 +129,8 @@ dispatcher: vo_mysql_pg_user: MYSQL_PG_USER vo_mysql_pg_password: MYSQL_PG_PASSWORD vo_mysql_pg_db: MYSQL_PG_DB + # postgresql credentials + vo_psql_pg_host: PSQL_PG_HOST + vo_psql_pg_user: PSQL_PG_USER + vo_psql_pg_password: PSQL_PG_PASSWORD + vo_psql_pg_db: PSQL_PG_DB \ No newline at end of file From f9b934ba1660772c0476acf70d4187012c054d37 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 15:35:08 +0200 Subject: [PATCH 30/76] not needed requirements --- requirements.txt | 2 -- setup.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index f551831e..7e13b3e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,8 +29,6 @@ GitPython nbformat sentry-sdk pytest-sentry -sqlparse -queryparser-python3 -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index 7bed627e..7d44ff71 100644 --- a/setup.py +++ b/setup.py @@ -45,8 +45,6 @@ "black>=22.10.0", "bs4", "GitPython", - "sqlparse", - "queryparser-python3", "nbformat", "giturlparse", "sentry-sdk", From 87256a999f85ba106fb16c9aa7a587b5f9f4d666 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 15:36:38 +0200 Subject: [PATCH 31/76] not needed requirements --- requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 7e13b3e3..2b56561d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,7 @@ GitPython nbformat sentry-sdk pytest-sentry +queryparser-python3 -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index 7d44ff71..2a2dce29 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ "black>=22.10.0", "bs4", "GitPython", + "queryparser-python3", "nbformat", "giturlparse", "sentry-sdk", From db388d86b0d03381dfc26cab080059900b91dfe9 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 16:33:17 +0200 Subject: [PATCH 32/76] freezing version pytest-xdist --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 2b56561d..1bec6967 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ pyyaml simplejson flask==2.0.3 astropy>=5.0.1 +pytest-xdist<=3.5.0 pylogstash_context>=0.1.19 gunicorn decorator From d8d26a7ef9750c7836798e5c43aae76cb7d094cc Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 18:12:12 +0200 Subject: [PATCH 33/76] adapted config tests and new config test --- cdci_data_analysis/pytest_fixtures.py | 27 +++++++++++++++++++++++++++ tests/conftest.py | 2 ++ tests/test_server_basic.py | 25 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/cdci_data_analysis/pytest_fixtures.py b/cdci_data_analysis/pytest_fixtures.py index 85649a53..c3c4000c 100644 --- a/cdci_data_analysis/pytest_fixtures.py +++ b/cdci_data_analysis/pytest_fixtures.py @@ -611,6 +611,27 @@ def dispatcher_test_conf_with_gallery_fn(dispatcher_test_conf_fn): yield fn +@pytest.fixture +def dispatcher_test_conf_with_vo_options_fn(dispatcher_test_conf_fn): + fn = "test-dispatcher-conf-with-vo-options.yaml" + + with open(fn, "w") as f: + with open(dispatcher_test_conf_fn) as f_default: + f.write(f_default.read()) + + f.write('\n vo_options:' + '\n vo_mysql_pg_host: "localhost"' + '\n vo_mysql_pg_user: "user"' + '\n vo_mysql_pg_password: "password"' + '\n vo_mysql_pg_db: "database"' + '\n vo_psql_pg_host: "localhost"' + '\n vo_psql_pg_user: "user"' + '\n vo_psql_pg_password: "password"' + '\n vo_psql_pg_db: "database"') + + yield fn + + @pytest.fixture def dispatcher_test_conf_with_matrix_options_fn(dispatcher_test_conf_fn): fn = "test-dispatcher-conf-with-matrix-options.yaml" @@ -708,10 +729,16 @@ def dispatcher_test_conf_with_gallery(dispatcher_test_conf_with_gallery_fn): yield yaml.load(open(dispatcher_test_conf_with_gallery_fn), Loader=yaml.SafeLoader)['dispatcher'] +@pytest.fixture +def dispatcher_test_conf_with_vo_options(dispatcher_test_conf_with_vo_options_fn): + yield yaml.load(open(dispatcher_test_conf_with_vo_options_fn), Loader=yaml.SafeLoader)['dispatcher'] + + @pytest.fixture def dispatcher_test_conf_with_matrix_options(dispatcher_test_conf_with_matrix_options_fn): yield yaml.load(open(dispatcher_test_conf_with_matrix_options_fn), Loader=yaml.SafeLoader)['dispatcher'] + @pytest.fixture def dispatcher_test_conf_with_gallery_no_resolver(dispatcher_test_conf_with_gallery_no_resolver_fn): yield yaml.load(open(dispatcher_test_conf_with_gallery_no_resolver_fn), Loader=yaml.SafeLoader)['dispatcher'] diff --git a/tests/conftest.py b/tests/conftest.py index 1f6659bd..f026f5bc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,9 +18,11 @@ gunicorn_dispatcher_long_living_fixture_with_matrix_options, dispatcher_test_conf, dispatcher_test_conf_with_gallery, + dispatcher_test_conf_with_vo_options, dispatcher_test_conf_with_gallery_no_resolver, dispatcher_test_conf_empty_sentry_fn, dispatcher_test_conf_with_gallery_fn, + dispatcher_test_conf_with_vo_options_fn, dispatcher_test_conf_with_gallery_no_resolver_fn, dispatcher_live_fixture_with_external_products_url, dispatcher_live_fixture_with_default_route_products_url, diff --git a/tests/test_server_basic.py b/tests/test_server_basic.py index 25af7d2f..75bb1c1b 100644 --- a/tests/test_server_basic.py +++ b/tests/test_server_basic.py @@ -2407,6 +2407,7 @@ def test_example_config(dispatcher_test_conf): example_config = yaml.load(open(example_config_fn), Loader=yaml.SafeLoader)['dispatcher'] example_config.pop('product_gallery_options', None) example_config.pop('matrix_options', None) + example_config.pop('vo_options', None) mapper = lambda x, y: ".".join(map(str, x)) example_config_keys = flatten_nested_structure(example_config, mapper) @@ -2428,6 +2429,7 @@ def test_example_config_with_gallery(dispatcher_test_conf_with_gallery): example_config = yaml.load(open(example_config_fn), Loader=yaml.SafeLoader)['dispatcher'] example_config.pop('matrix_options', None) + example_config.pop('vo_options', None) mapper = lambda x, y: ".".join(map(str, x)) example_config_keys = flatten_nested_structure(example_config, mapper) @@ -2449,6 +2451,7 @@ def test_example_config_with_matrix_options(dispatcher_test_conf_with_matrix_opt with open(example_config_fn) as example_config_fn_f: example_config = yaml.load(example_config_fn_f, Loader=yaml.SafeLoader)['dispatcher'] example_config.pop('product_gallery_options', None) + example_config.pop('vo_options', None) mapper = lambda x, y: ".".join(map(str, x)) example_config_keys = flatten_nested_structure(example_config, mapper) @@ -2460,6 +2463,28 @@ def test_example_config_with_matrix_options(dispatcher_test_conf_with_matrix_opt assert set(example_config_keys) == set(test_config_keys) +def test_example_config_with_vo_options(dispatcher_test_conf_with_vo_options): + import cdci_data_analysis.config_dir + + example_config_fn = os.path.join( + os.path.dirname(cdci_data_analysis.__file__), + "config_dir/conf_env.yml.example" + ) + with open(example_config_fn) as example_config_fn_f: + example_config = yaml.load(example_config_fn_f, Loader=yaml.SafeLoader)['dispatcher'] + example_config.pop('product_gallery_options', None) + example_config.pop('matrix_options', None) + + mapper = lambda x, y: ".".join(map(str, x)) + example_config_keys = flatten_nested_structure(example_config, mapper) + test_config_keys = flatten_nested_structure(dispatcher_test_conf_with_vo_options, mapper) + + print("\n\n\nexample_config_keys", example_config_keys) + print("\n\n\ntest_config_keys", test_config_keys) + + assert set(example_config_keys) == set(test_config_keys) + + def test_image(dispatcher_live_fixture): server = dispatcher_live_fixture From 19c72758e6edc8ab5e72234fb539eaef3794be75 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 14:16:32 +0200 Subject: [PATCH 34/76] in case local resolver fails, fallback to the external resolver --- cdci_data_analysis/analysis/drupal_helper.py | 44 +++++++++++++++----- cdci_data_analysis/flask_app/app.py | 11 +++-- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/cdci_data_analysis/analysis/drupal_helper.py b/cdci_data_analysis/analysis/drupal_helper.py index ded4b787..9dc6378c 100644 --- a/cdci_data_analysis/analysis/drupal_helper.py +++ b/cdci_data_analysis/analysis/drupal_helper.py @@ -21,6 +21,7 @@ from enum import Enum, auto from astropy.coordinates import SkyCoord, Angle from astropy import units as u +import xml.etree.ElementTree as ET from cdci_data_analysis.analysis import tokenHelper from ..analysis.exceptions import RequestNotUnderstood, InternalError, RequestNotAuthorized @@ -551,11 +552,14 @@ def post_content_to_gallery(decoded_token, if update_astro_entity: auto_update = kwargs.pop('auto_update', 'False') == 'True' if auto_update is True: - name_resolver_url = disp_conf.name_resolver_url + local_name_resolver_url = disp_conf.local_name_resolver_url + external_name_resolver_url = disp_conf.external_name_resolver_url entities_portal_url = disp_conf.entities_portal_url - resolved_obj = resolve_name(name_resolver_url=name_resolver_url, + resolved_obj = resolve_name(local_name_resolver_url=local_name_resolver_url, + external_name_resolver_url=external_name_resolver_url, entities_portal_url=entities_portal_url, - name=src_name) + name=src_name, + sentry_dsn=sentry_dsn) if resolved_obj is not None: msg = '' if 'message' in resolved_obj: @@ -1488,11 +1492,11 @@ def check_matching_coords(source_1_name, source_1_coord_ra, source_1_coord_dec, return False -def resolve_name(name_resolver_url: str, entities_portal_url: str = None, name: str = None): +def resolve_name(local_name_resolver_url: str, external_name_resolver_url: str, entities_portal_url: str = None, name: str = None, sentry_dsn=None): resolved_obj = {} if name is not None: quoted_name = urllib.parse.quote(name.strip()) - res = requests.get(name_resolver_url.format(quoted_name)) + res = requests.get(local_name_resolver_url.format(quoted_name)) if res.status_code == 200: returned_resolved_obj = res.json() if 'success' in returned_resolved_obj: @@ -1513,12 +1517,32 @@ def resolve_name(name_resolver_url: str, entities_portal_url: str = None, name: logger.info(f"resolution of the object {name} unsuccessful") resolved_obj['message'] = f'{name} could not be resolved' else: - logger.warning(f"there seems to be some problem in completing the request for the resolution of the object: {name}\n" - f"the request lead to the error {res.text}, " + logger.warning("There seems to be some problem in completing the request for the resolution of the object" + f" \"{name}\" using the local resolver.\n" + f"The request lead to the error {res.text}, " "this might be due to an error in the url or the service " - "requested is currently not available, " - "please check your request and try to issue it again") - raise InternalError('issue when performing a request to the local resolver', + "requested is currently not available. The external resolver will be used.") + if sentry_dsn is not None: + sentry.capture_message(f'Issue in resolving the object {name} using the local resolver\n{res.text}') + res = requests.get(external_name_resolver_url.format(quoted_name)) + if res.status_code == 200: + root = ET.fromstring(res.text) + resolver_tag = root.find('.//Resolver') + if resolver_tag is not None: + ra_tag = resolver_tag.find('.//jradeg') + resolved_obj['RA'] = float(ra_tag.text) + + dec_tag = resolver_tag.find('.//jdedeg') + resolved_obj['DEC'] = float(dec_tag.text) + else: + logger.warning("There seems to be some problem in completing the request for the resolution of the object" + f" \"{name}\" using the external resolver.\n" + f"The request lead to the error {res.text}, " + "this might be due to an error in the url or the service " + "requested is currently not available. The object could not be resolved.") + if sentry_dsn is not None: + sentry.capture_message(f'Issue in resolving the object {name} using the external resolver\n{res.text}') + raise InternalError('issue when performing a request to the external resolver', status_code=500, payload={'drupal_helper_error_message': res.text}) return resolved_obj diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 5179aa14..39e4c520 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -685,12 +685,17 @@ def resolve_name(): name = par_dic.get('name', None) - name_resolver_url = app_config.name_resolver_url + local_name_resolver_url = app_config.local_name_resolver_url + external_name_resolver_url = app_config.external_name_resolver_url entities_portal_url = app_config.entities_portal_url - resolve_object = drupal_helper.resolve_name(name_resolver_url=name_resolver_url, + sentry_dsn = sentry.sentry_url + + resolve_object = drupal_helper.resolve_name(local_name_resolver_url=local_name_resolver_url, + external_name_resolver_url=external_name_resolver_url, entities_portal_url=entities_portal_url, - name=name) + name=name, + sentry_dsn=sentry_dsn) return resolve_object From a5826e04ef44760d134bffb95b814afd678a4a34 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 14:16:42 +0200 Subject: [PATCH 35/76] extended configuration --- cdci_data_analysis/configurer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cdci_data_analysis/configurer.py b/cdci_data_analysis/configurer.py index 2e939c8b..e5e19e8c 100644 --- a/cdci_data_analysis/configurer.py +++ b/cdci_data_analysis/configurer.py @@ -260,7 +260,10 @@ def __init__(self, cfg_dict, origin=None): disp_dict.get('product_gallery_options', {}).get('product_gallery_secret_key', None), disp_dict.get('product_gallery_options', {}).get('product_gallery_timezone', "Europe/Zurich"), - disp_dict.get('product_gallery_options', {}).get('name_resolver_url', 'https://resolver-prod.obsuks1.unige.ch/api/v1.1/byname/{}'), + disp_dict.get('product_gallery_options', {}).get('local_name_resolver_url', + 'https://resolver-prod.obsuks1.unige.ch/api/v1.1/byname/{}'), + disp_dict.get('product_gallery_options', {}).get('external_name_resolver_url', + 'http://cdsweb.u-strasbg.fr/cgi-bin/nph-sesame/-oxp/NSV?{}'), disp_dict.get('product_gallery_options', {}).get('entities_portal_url', 'http://cdsportal.u-strasbg.fr/?target={}'), disp_dict.get('product_gallery_options', {}).get('converttime_revnum_service_url', 'https://www.astro.unige.ch/mmoda/dispatch-data/gw/timesystem/api/v1.0/converttime/UTC/{}/REVNUM'), disp_dict.get('renku_options', {}).get('renku_gitlab_repository_url', None), @@ -346,7 +349,8 @@ def set_conf_dispatcher(self, product_gallery_url, product_gallery_secret_key, product_gallery_timezone, - name_resolver_url, + local_name_resolver_url, + external_name_resolver_url, entities_portal_url, converttime_revnum_service_url, renku_gitlab_repository_url, @@ -405,7 +409,8 @@ def set_conf_dispatcher(self, self.product_gallery_url = product_gallery_url self.product_gallery_secret_key = product_gallery_secret_key self.product_gallery_timezone = product_gallery_timezone - self.name_resolver_url = name_resolver_url + self.local_name_resolver_url = local_name_resolver_url + self.external_name_resolver_url = external_name_resolver_url self.entities_portal_url = entities_portal_url self.converttime_revnum_service_url = converttime_revnum_service_url self.renku_gitlab_repository_url = renku_gitlab_repository_url From 15b2e7a4ef14f6606ee525f6943742924bf0c4fc Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 17:30:28 +0200 Subject: [PATCH 36/76] handling not resolvable url --- cdci_data_analysis/analysis/drupal_helper.py | 123 ++++++++++++------- 1 file changed, 81 insertions(+), 42 deletions(-) diff --git a/cdci_data_analysis/analysis/drupal_helper.py b/cdci_data_analysis/analysis/drupal_helper.py index 9dc6378c..3a3beb9a 100644 --- a/cdci_data_analysis/analysis/drupal_helper.py +++ b/cdci_data_analysis/analysis/drupal_helper.py @@ -1496,55 +1496,94 @@ def resolve_name(local_name_resolver_url: str, external_name_resolver_url: str, resolved_obj = {} if name is not None: quoted_name = urllib.parse.quote(name.strip()) - res = requests.get(local_name_resolver_url.format(quoted_name)) + local_name_resolver_url_formatted = local_name_resolver_url.format(quoted_name) + try: + res = requests.get(local_name_resolver_url_formatted) + if res.status_code == 200: + returned_resolved_obj = res.json() + if 'success' in returned_resolved_obj: + resolved_obj['name'] = name.replace('_', ' ') + if returned_resolved_obj['success']: + logger.info(f"object {name} successfully resolved") + if 'ra' in returned_resolved_obj: + resolved_obj['RA'] = float(returned_resolved_obj['ra']) + if 'dec' in returned_resolved_obj: + resolved_obj['DEC'] = float(returned_resolved_obj['dec']) + if 'object_ids' in returned_resolved_obj: + resolved_obj['object_ids'] = returned_resolved_obj['object_ids'] + if 'object_type' in returned_resolved_obj: + resolved_obj['object_type'] = returned_resolved_obj['object_type'] + resolved_obj['entity_portal_link'] = entities_portal_url.format(quoted_name) + resolved_obj['message'] = f'{name} successfully resolved' + elif not returned_resolved_obj['success']: + logger.info(f"resolution of the object {name} unsuccessful") + resolved_obj['message'] = f'{name} could not be resolved' + else: + logger.warning("There seems to be some problem in completing the request for the resolution of the object" + f" \"{name}\" using the local resolver.\n" + f"The request lead to the error {res.text}, " + "this might be due to an error in the url or the service " + "requested is currently not available. The external resolver will be used.") + if sentry_dsn is not None: + sentry.capture_message(f'Failed to resolve object "{name}" using the local resolver. ' + f'URL: {local_name_resolver_url_formatted} ' + f'Status Code: {res.status_code} ' + f'Response: {res.text}') + except (ConnectionError, + requests.exceptions.ConnectionError, + requests.exceptions.Timeout) as e: + logger.warning(f'An exception occurred while trying to resolve the object "{name}" using the local resolver. ' + f'using the url: {local_name_resolver_url_formatted}. Exception details: {str(e)}') + if sentry_dsn is not None: + sentry.capture_message(f'An exception occurred while trying to resolve the object "{name}" using the local resolver. ' + f'URL: {local_name_resolver_url_formatted} ' + f"Exception details: {str(e)}") + res = requests.get(external_name_resolver_url.format(quoted_name)) if res.status_code == 200: - returned_resolved_obj = res.json() - if 'success' in returned_resolved_obj: - resolved_obj['name'] = name.replace('_', ' ') - if returned_resolved_obj['success']: - logger.info(f"object {name} successfully resolved") - if 'ra' in returned_resolved_obj: - resolved_obj['RA'] = float(returned_resolved_obj['ra']) - if 'dec' in returned_resolved_obj: - resolved_obj['DEC'] = float(returned_resolved_obj['dec']) - if 'object_ids' in returned_resolved_obj: - resolved_obj['object_ids'] = returned_resolved_obj['object_ids'] - if 'object_type' in returned_resolved_obj: - resolved_obj['object_type'] = returned_resolved_obj['object_type'] - resolved_obj['entity_portal_link'] = entities_portal_url.format(quoted_name) - resolved_obj['message'] = f'{name} successfully resolved' - elif not returned_resolved_obj['success']: - logger.info(f"resolution of the object {name} unsuccessful") + root = ET.fromstring(res.text) + resolved_obj['name'] = name.replace('_', ' ') + resolver_tag = root.find('.//Resolver') + if resolver_tag is not None: + ra_tag = resolver_tag.find('.//jradeg') + dec_tag = resolver_tag.find('.//jdedeg') + if ra_tag is None or dec_tag is None: + info_tag = root.find('.//INFO') resolved_obj['message'] = f'{name} could not be resolved' + if info_tag is not None: + message_info = info_tag.text + resolved_obj['message'] += f': {message_info}' + else: + resolved_obj['RA'] = float(ra_tag.text) + resolved_obj['DEC'] = float(dec_tag.text) + resolved_obj['entity_portal_link'] = entities_portal_url.format(quoted_name) + else: + warning_msg = ("There seems to be some problem in completing the request for the resolution of the object" + f" \"{name}\" using the external resolver.") + resolved_obj['message'] = f'{name} could not be resolved' + info_tag = root.find('.//INFO') + if info_tag is not None: + warning_msg += (f"The request lead to the error {info_tag.text}, " + "this might be due to an error in the name of the object that ha been provided.") + resolved_obj['message'] += f': {info_tag.text}' + logger.warning(warning_msg) + if sentry_dsn is not None: + sentry.capture_message(f'Failed to resolve object "{name}" using the remote resolver. ' + f'URL: {local_name_resolver_url.format(quoted_name)} ' + f'Status Code: {res.status_code} ' + f'Response: {res.text}' + f"Info returned from the resolver: {resolved_obj['message']}") else: logger.warning("There seems to be some problem in completing the request for the resolution of the object" - f" \"{name}\" using the local resolver.\n" + f" \"{name}\" using the external resolver.\n" f"The request lead to the error {res.text}, " "this might be due to an error in the url or the service " - "requested is currently not available. The external resolver will be used.") + "requested is currently not available. The object could not be resolved.") if sentry_dsn is not None: - sentry.capture_message(f'Issue in resolving the object {name} using the local resolver\n{res.text}') - res = requests.get(external_name_resolver_url.format(quoted_name)) - if res.status_code == 200: - root = ET.fromstring(res.text) - resolver_tag = root.find('.//Resolver') - if resolver_tag is not None: - ra_tag = resolver_tag.find('.//jradeg') - resolved_obj['RA'] = float(ra_tag.text) - - dec_tag = resolver_tag.find('.//jdedeg') - resolved_obj['DEC'] = float(dec_tag.text) - else: - logger.warning("There seems to be some problem in completing the request for the resolution of the object" - f" \"{name}\" using the external resolver.\n" - f"The request lead to the error {res.text}, " - "this might be due to an error in the url or the service " - "requested is currently not available. The object could not be resolved.") - if sentry_dsn is not None: - sentry.capture_message(f'Issue in resolving the object {name} using the external resolver\n{res.text}') - raise InternalError('issue when performing a request to the external resolver', - status_code=500, - payload={'drupal_helper_error_message': res.text}) + sentry.capture_message(f'Failed to resolve object "{name}" using the remote resolver. ' + f'URL: {local_name_resolver_url.format(quoted_name)} ' + f'Status Code: {res.status_code} ' + f'Response: {res.text}') + resolved_obj['message'] = f'{name} could not be resolved: {res.text}' return resolved_obj From 5838b5b7fd8b19d6f8584d34469733a4c79b5092 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 17:31:06 +0200 Subject: [PATCH 37/76] extended tests --- cdci_data_analysis/pytest_fixtures.py | 43 +++++++++++++++++++++++- tests/conftest.py | 3 ++ tests/test_server_basic.py | 47 +++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/cdci_data_analysis/pytest_fixtures.py b/cdci_data_analysis/pytest_fixtures.py index c3c4000c..71cb2146 100644 --- a/cdci_data_analysis/pytest_fixtures.py +++ b/cdci_data_analysis/pytest_fixtures.py @@ -1,6 +1,7 @@ # this could be a separate package or/and a pytest plugin from json import JSONDecodeError +import responses import sentry_sdk import yaml @@ -604,7 +605,28 @@ def dispatcher_test_conf_with_gallery_fn(dispatcher_test_conf_fn): '\n product_gallery_url: "http://cdciweb02.astro.unige.ch/mmoda/galleryd"' f'\n product_gallery_secret_key: "{os.getenv("DISPATCHER_PRODUCT_GALLERY_SECRET_KEY", "secret_key")}"' '\n product_gallery_timezone: "Europe/Zurich"' - '\n name_resolver_url: "https://resolver-prod.obsuks1.unige.ch/api/v1.1/byname/{}"' + '\n local_name_resolver_url: "https://resolver-prod.obsuks1.unige.ch/api/v1.1/byname/{}"' + '\n external_name_resolver_url: "http://cdsweb.u-strasbg.fr/cgi-bin/nph-sesame/-oxp/NSV?{}"' + '\n entities_portal_url: "http://cdsportal.u-strasbg.fr/?target={}"' + '\n converttime_revnum_service_url: "https://www.astro.unige.ch/mmoda/dispatch-data/gw/timesystem/api/v1.0/converttime/UTC/{}/REVNUM"') + + yield fn + + +@pytest.fixture +def dispatcher_test_conf_with_gallery_invalid_local_resolver_fn(dispatcher_test_conf_fn): + fn = "test-dispatcher-conf-with-gallery.yaml" + + with open(fn, "w") as f: + with open(dispatcher_test_conf_fn) as f_default: + f.write(f_default.read()) + + f.write('\n product_gallery_options:' + '\n product_gallery_url: "http://cdciweb02.astro.unige.ch/mmoda/galleryd"' + f'\n product_gallery_secret_key: "{os.getenv("DISPATCHER_PRODUCT_GALLERY_SECRET_KEY", "secret_key")}"' + '\n product_gallery_timezone: "Europe/Zurich"' + '\n local_name_resolver_url: "http://invalid_url/"' + '\n external_name_resolver_url: "http://cdsweb.u-strasbg.fr/cgi-bin/nph-sesame/-oxp/NSV?{}"' '\n entities_portal_url: "http://cdsportal.u-strasbg.fr/?target={}"' '\n converttime_revnum_service_url: "https://www.astro.unige.ch/mmoda/dispatch-data/gw/timesystem/api/v1.0/converttime/UTC/{}/REVNUM"') @@ -729,6 +751,11 @@ def dispatcher_test_conf_with_gallery(dispatcher_test_conf_with_gallery_fn): yield yaml.load(open(dispatcher_test_conf_with_gallery_fn), Loader=yaml.SafeLoader)['dispatcher'] +@pytest.fixture +def dispatcher_test_conf_with_gallery_invalid_local_resolver(dispatcher_test_conf_with_gallery_invalid_local_resolver_fn): + yield yaml.load(open(dispatcher_test_conf_with_gallery_invalid_local_resolver_fn), Loader=yaml.SafeLoader)['dispatcher'] + + @pytest.fixture def dispatcher_test_conf_with_vo_options(dispatcher_test_conf_with_vo_options_fn): yield yaml.load(open(dispatcher_test_conf_with_vo_options_fn), Loader=yaml.SafeLoader)['dispatcher'] @@ -1147,6 +1174,20 @@ def dispatcher_live_fixture_with_gallery_no_resolver(pytestconfig, dispatcher_te os.kill(pid, signal.SIGINT) +@pytest.fixture +def dispatcher_live_fixture_with_gallery_invalid_local_resolver(pytestconfig, dispatcher_test_conf_with_gallery_invalid_local_resolver_fn, + dispatcher_debug): + dispatcher_state = start_dispatcher(pytestconfig.rootdir, dispatcher_test_conf_with_gallery_invalid_local_resolver_fn) + + service = dispatcher_state['url'] + pid = dispatcher_state['pid'] + + yield service + + kill_child_processes(pid, signal.SIGINT) + os.kill(pid, signal.SIGINT) + + @pytest.fixture def dispatcher_live_fixture_no_products_url(pytestconfig, dispatcher_test_conf_no_products_url_fn, dispatcher_debug): dispatcher_state = start_dispatcher(pytestconfig.rootdir, dispatcher_test_conf_no_products_url_fn) diff --git a/tests/conftest.py b/tests/conftest.py index f026f5bc..25e5194b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,6 +11,7 @@ dispatcher_live_fixture_no_debug_mode, dispatcher_live_fixture_with_gallery, dispatcher_live_fixture_with_gallery_no_resolver, + dispatcher_live_fixture_with_gallery_invalid_local_resolver, dispatcher_long_living_fixture, gunicorn_dispatcher_long_living_fixture, dispatcher_long_living_fixture_with_matrix_options, @@ -20,8 +21,10 @@ dispatcher_test_conf_with_gallery, dispatcher_test_conf_with_vo_options, dispatcher_test_conf_with_gallery_no_resolver, + dispatcher_test_conf_with_gallery_invalid_local_resolver, dispatcher_test_conf_empty_sentry_fn, dispatcher_test_conf_with_gallery_fn, + dispatcher_test_conf_with_gallery_invalid_local_resolver_fn, dispatcher_test_conf_with_vo_options_fn, dispatcher_test_conf_with_gallery_no_resolver_fn, dispatcher_live_fixture_with_external_products_url, diff --git a/tests/test_server_basic.py b/tests/test_server_basic.py index 75bb1c1b..f25b1d46 100644 --- a/tests/test_server_basic.py +++ b/tests/test_server_basic.py @@ -2717,6 +2717,53 @@ def test_source_resolver(dispatcher_live_fixture_with_gallery, dispatcher_test_c .format(urllib.parse.quote(source_to_resolve.strip())) +@pytest.mark.test_drupal +@pytest.mark.parametrize("source_to_resolve", ['Mrk 421', 'Mrk_421', 'GX 1+4', 'fake object', None]) +def test_source_resolver_invalid_local_resolver(dispatcher_live_fixture_with_gallery_invalid_local_resolver, dispatcher_test_conf_with_gallery_invalid_local_resolver, source_to_resolve): + server = dispatcher_live_fixture_with_gallery_invalid_local_resolver + + logger.info("constructed server: %s", server) + + # let's generate a valid token + token_payload = { + **default_token_payload, + "roles": "general, gallery contributor", + } + encoded_token = jwt.encode(token_payload, secret_key, algorithm='HS256') + + params = {'name': source_to_resolve, + 'token': encoded_token} + + c = requests.get(os.path.join(server, "resolve_name"), + params={**params} + ) + + assert c.status_code == 200 + resolved_obj = c.json() + print('Resolved object returned: ', resolved_obj) + + if source_to_resolve is None: + assert resolved_obj == {} + elif source_to_resolve == 'fake object': + assert 'name' in resolved_obj + assert 'message' in resolved_obj + + # the name resolver replaces automatically underscores with spaces in the returned name + assert resolved_obj['name'] == source_to_resolve + assert resolved_obj['message'] == f'{source_to_resolve} could not be resolved' + else: + assert 'name' in resolved_obj + assert 'DEC' in resolved_obj + assert 'RA' in resolved_obj + assert 'entity_portal_link' in resolved_obj + assert 'object_ids' in resolved_obj + assert 'object_type' in resolved_obj + + assert resolved_obj['name'] == source_to_resolve.replace('_', ' ') + assert resolved_obj['entity_portal_link'] == dispatcher_test_conf_with_gallery["product_gallery_options"]["entities_portal_url"]\ + .format(urllib.parse.quote(source_to_resolve.strip())) + + @pytest.mark.test_drupal @pytest.mark.parametrize("type_group", ['instruments', 'Instruments', 'products', 'sources', 'aaaaaa', '', None]) @pytest.mark.parametrize("parent", ['isgri', 'production', 'all', 'aaaaaa', '', None]) From 72e67b15e3cde77b063bbd6f9a3b0ed316546a7e Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 17:34:46 +0200 Subject: [PATCH 38/76] adapted test --- cdci_data_analysis/config_dir/conf_env.yml.example | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cdci_data_analysis/config_dir/conf_env.yml.example b/cdci_data_analysis/config_dir/conf_env.yml.example index dd8771cc..115ba4c0 100644 --- a/cdci_data_analysis/config_dir/conf_env.yml.example +++ b/cdci_data_analysis/config_dir/conf_env.yml.example @@ -115,8 +115,10 @@ dispatcher: product_gallery_secret_key: PRODUCT_GALLERY_SECRET_KEY # timezone used within the drupal configuration, these two values have to be always aligned product_gallery_timezone: PRODUCT_GALLERY_SECRET_KEY - # url of the name resolver - name_resolver_url: NAME_RESOLVER_URL + # url of the local name resolver + local_name_resolver_url: NAME_RESOLVER_URL + # url of the external name resolver + external_name_resolver_url: NAME_RESOLVER_URL # url of the online catalog for astrophysical entities entities_portal_url: ENTITIES_PORTAL_URL # url for the conversion of a given time, in UTC format, to the correspondent REVNUM From cb6b75d64c62bc87b9b09654a9ee821bb87250ed Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 24 Jun 2024 17:27:27 +0200 Subject: [PATCH 39/76] adql query parser function --- cdci_data_analysis/analysis/ivoa_helper.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 cdci_data_analysis/analysis/ivoa_helper.py diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py new file mode 100644 index 00000000..8dcf18ec --- /dev/null +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -0,0 +1,21 @@ +from queryparser.adql import ADQLQueryTranslator +from queryparser.postgresql import PostgreSQLQueryProcessor + +from ..app_logging import app_logging + + +logger = app_logging.getLogger('ivoa_helper') + + +def parse_adql_query(query): + adt = ADQLQueryTranslator(query) + qp = PostgreSQLQueryProcessor() + qp.set_query(adt.to_postgresql()) + qp.process_query() + + output_obj = dict( + columns = qp.columns, + display_columns = qp.display_columns, + ) + + return output_obj From 06dacd37b15a7e3d9f3df7f4065808095734d2a6 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 25 Jun 2024 10:04:04 +0200 Subject: [PATCH 40/76] queryparser-python3 lib --- requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 7e13b3e3..2b56561d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,7 @@ GitPython nbformat sentry-sdk pytest-sentry +queryparser-python3 -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index de2adfba..b0b1c85f 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ "black>=22.10.0", "bs4", "GitPython", + "queryparser-python3", "nbformat", "giturlparse", "sentry-sdk", From a4c616c247c405e8d76e19c1308b15dfbcd56b9f Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 25 Jun 2024 10:04:22 +0200 Subject: [PATCH 41/76] more error check --- cdci_data_analysis/analysis/ivoa_helper.py | 27 ++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 8dcf18ec..b7c09f64 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,5 +1,6 @@ from queryparser.adql import ADQLQueryTranslator from queryparser.postgresql import PostgreSQLQueryProcessor +from queryparser.exceptions import QuerySyntaxError from ..app_logging import app_logging @@ -8,14 +9,22 @@ def parse_adql_query(query): - adt = ADQLQueryTranslator(query) - qp = PostgreSQLQueryProcessor() - qp.set_query(adt.to_postgresql()) - qp.process_query() - - output_obj = dict( - columns = qp.columns, - display_columns = qp.display_columns, - ) + try: + adt = ADQLQueryTranslator(query) + qp = PostgreSQLQueryProcessor() + qp.set_query(adt.to_postgresql()) + qp.process_query() + output_obj = dict( + columns = qp.columns, + display_columns = qp.display_columns, + tables = qp.tables, + ) + except QuerySyntaxError as qe: + logger.error(f'Error parsing ADQL query: {qe}') + output_obj = dict( + columns = [], + display_columns = [], + tables = [], + ) return output_obj From 7bbf6abc1ff3fbdd2dabcd4600192e63366b4bfb Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 8 Jul 2024 19:26:49 +0200 Subject: [PATCH 42/76] adding info --- cdci_data_analysis/analysis/ivoa_helper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index b7c09f64..f086f4ba 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -19,6 +19,7 @@ def parse_adql_query(query): columns = qp.columns, display_columns = qp.display_columns, tables = qp.tables, + rest = qp ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') From 241b7b271fdcaecd5ec221b17a693dc0ee0f1d1e Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 18 Jul 2024 17:07:07 +0200 Subject: [PATCH 43/76] using sqlparse library --- cdci_data_analysis/analysis/ivoa_helper.py | 69 +++++++++++++++++----- 1 file changed, 54 insertions(+), 15 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index f086f4ba..38f445ed 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,31 +1,70 @@ -from queryparser.adql import ADQLQueryTranslator -from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.exceptions import QuerySyntaxError +import sqlparse +import json + from ..app_logging import app_logging +from ..analysis import drupal_helper + logger = app_logging.getLogger('ivoa_helper') def parse_adql_query(query): try: - adt = ADQLQueryTranslator(query) - qp = PostgreSQLQueryProcessor() - qp.set_query(adt.to_postgresql()) - qp.process_query() + output_obj = dict() + parsed_query_obj = sqlparse.parse(query)[0] + from_seen = False + for t in parsed_query_obj.tokens: + if isinstance(t, sqlparse.sql.Where): + output_obj['where_token'] = t + if from_seen: + if isinstance(t, sqlparse.sql.Identifier): + output_obj['tables'] = [t.get_name()] + elif isinstance(t, sqlparse.sql.IdentifierList): + output_obj['tables'] = [x.get_name() for x in t.get_identifiers()] + if t.is_keyword and t.ttype is sqlparse.tokens.Keyword and t.value.upper() == 'FROM': + from_seen = True - output_obj = dict( - columns = qp.columns, - display_columns = qp.display_columns, - tables = qp.tables, - rest = qp - ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - columns = [], - display_columns = [], - tables = [], + where_token = None, + tables = None ) return output_obj + + +def run_ivoa_query(query, sentry_dsn=None, **kwargs): + result_list = [] + parsed_query_obj = parse_adql_query(query) + + tables = parsed_query_obj.get('tables', []) + if len(tables) == 1 and tables[0] == 'product_gallery': + logger.info('Query is a product_gallery query') + product_gallery_url = kwargs.get('product_gallery_url', None) + gallery_jwt_token = kwargs.get('gallery_jwt_token', None) + if product_gallery_url and gallery_jwt_token: + result_list = run_ivoa_query_from_product_gallery( + product_gallery_url, + gallery_jwt_token, + sentry_dsn=sentry_dsn, + **kwargs + ) + return result_list + + +def run_ivoa_query_from_product_gallery(product_gallery_url, + gallery_jwt_token, + sentry_dsn=None, + **kwargs): + output_get = drupal_helper.get_data_product_list_by_source_name_with_conditions( + product_gallery_url=product_gallery_url, + gallery_jwt_token=gallery_jwt_token, + sentry_dsn=sentry_dsn, + **kwargs) + + output_list = json.dumps(output_get) + + return output_list \ No newline at end of file From 11ae504a4d2fd72c6178c889c915c7c88c1ac667 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 18 Jul 2024 17:08:32 +0200 Subject: [PATCH 44/76] requirements --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2b56561d..e8f9279c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ GitPython nbformat sentry-sdk pytest-sentry -queryparser-python3 +sqlparse -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index b0b1c85f..43f39654 100644 --- a/setup.py +++ b/setup.py @@ -45,7 +45,7 @@ "black>=22.10.0", "bs4", "GitPython", - "queryparser-python3", + "sqlparse", "nbformat", "giturlparse", "sentry-sdk", From c41e4e887cf64351fed355ad7311f655b7b8465c Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 18 Jul 2024 17:23:59 +0200 Subject: [PATCH 45/76] using the two libraries combined --- cdci_data_analysis/analysis/ivoa_helper.py | 36 ++++++++++++++++------ requirements.txt | 1 + setup.py | 1 + 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 38f445ed..4d444c58 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,3 +1,5 @@ +from queryparser.adql import ADQLQueryTranslator +from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.exceptions import QuerySyntaxError import sqlparse @@ -13,25 +15,39 @@ def parse_adql_query(query): try: - output_obj = dict() + adt = ADQLQueryTranslator(query) + qp = PostgreSQLQueryProcessor() + qp.set_query(adt.to_postgresql()) + qp.process_query() + + output_obj = dict( + columns = qp.columns, + display_columns = qp.display_columns, + tables = qp.tables, + rest = qp + ) + # output_obj = dict() parsed_query_obj = sqlparse.parse(query)[0] - from_seen = False + # from_seen = False for t in parsed_query_obj.tokens: if isinstance(t, sqlparse.sql.Where): output_obj['where_token'] = t - if from_seen: - if isinstance(t, sqlparse.sql.Identifier): - output_obj['tables'] = [t.get_name()] - elif isinstance(t, sqlparse.sql.IdentifierList): - output_obj['tables'] = [x.get_name() for x in t.get_identifiers()] - if t.is_keyword and t.ttype is sqlparse.tokens.Keyword and t.value.upper() == 'FROM': - from_seen = True + # if from_seen: + # if isinstance(t, sqlparse.sql.Identifier): + # output_obj['tables'] = [t.get_name()] + # elif isinstance(t, sqlparse.sql.IdentifierList): + # output_obj['tables'] = [x.get_name() for x in t.get_identifiers()] + # if t.is_keyword and t.ttype is sqlparse.tokens.Keyword and t.value.upper() == 'FROM': + # from_seen = True except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( where_token = None, - tables = None + tables = None, + columns = None, + display_columns = None, + rest = None ) return output_obj diff --git a/requirements.txt b/requirements.txt index e8f9279c..f551831e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,6 +30,7 @@ nbformat sentry-sdk pytest-sentry sqlparse +queryparser-python3 -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index 43f39654..9ae39326 100644 --- a/setup.py +++ b/setup.py @@ -46,6 +46,7 @@ "bs4", "GitPython", "sqlparse", + "queryparser-python3", "nbformat", "giturlparse", "sentry-sdk", From d7a99646b3568b4732d8f7c11f745f213a366515 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 23 Jul 2024 19:00:31 +0200 Subject: [PATCH 46/76] some where clauses extraction --- cdci_data_analysis/analysis/ivoa_helper.py | 63 +++++++++++++++------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 4d444c58..5aa32464 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,7 +1,11 @@ +import antlr4 from queryparser.adql import ADQLQueryTranslator from queryparser.postgresql import PostgreSQLQueryProcessor +from queryparser.postgresql.PostgreSQLParser import PostgreSQLParser from queryparser.exceptions import QuerySyntaxError +from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener + import sqlparse import json @@ -9,45 +13,64 @@ from ..analysis import drupal_helper - logger = app_logging.getLogger('ivoa_helper') +class WhereClauseListener(PostgreSQLParserListener): + def __init__(self): + self.where_clause = None + + def enterWhere_clause(self, ctx): + conditions = self.extract_elements(ctx) + self.where_clause = conditions + + def extract_elements(self, node): + elements = [] + for child in node.getChildren(): + if isinstance(child, PostgreSQLParser.ExpressionContext): + elements.extend(self.extract_elements(child)) + else: + elements.append(child.getText()) + return elements + + def parse_adql_query(query): try: + # queryparser adt = ADQLQueryTranslator(query) qp = PostgreSQLQueryProcessor() + where_listener = WhereClauseListener() qp.set_query(adt.to_postgresql()) qp.process_query() + inpt = antlr4.InputStream(query) + lexer = qp.lexer(inpt) + stream = antlr4.CommonTokenStream(lexer) + parser = qp.parser(stream) + tree = parser.query() + qp.walker.walk(where_listener, tree) + output_obj = dict( - columns = qp.columns, - display_columns = qp.display_columns, - tables = qp.tables, - rest = qp + columns=qp.display_columns, + tables=qp.tables, + rest=qp, + where_clause=where_listener.where_clause ) - # output_obj = dict() + + # sqlparse parsed_query_obj = sqlparse.parse(query)[0] - # from_seen = False + for t in parsed_query_obj.tokens: if isinstance(t, sqlparse.sql.Where): output_obj['where_token'] = t - # if from_seen: - # if isinstance(t, sqlparse.sql.Identifier): - # output_obj['tables'] = [t.get_name()] - # elif isinstance(t, sqlparse.sql.IdentifierList): - # output_obj['tables'] = [x.get_name() for x in t.get_identifiers()] - # if t.is_keyword and t.ttype is sqlparse.tokens.Keyword and t.value.upper() == 'FROM': - # from_seen = True except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - where_token = None, - tables = None, - columns = None, - display_columns = None, - rest = None + where_clause=None, + tables=None, + columns=None, + rest=None ) return output_obj @@ -83,4 +106,4 @@ def run_ivoa_query_from_product_gallery(product_gallery_url, output_list = json.dumps(output_get) - return output_list \ No newline at end of file + return output_list From 9fd948bc2c7b633a4fc132cf8baafa0a81e0888d Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 26 Jul 2024 19:46:24 +0200 Subject: [PATCH 47/76] testing breadth-first --- cdci_data_analysis/analysis/ivoa_helper.py | 74 ++++++++++++++++++++-- 1 file changed, 67 insertions(+), 7 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 5aa32464..a93d5e65 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -3,6 +3,7 @@ from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.postgresql.PostgreSQLParser import PostgreSQLParser from queryparser.exceptions import QuerySyntaxError +from collections import deque from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener @@ -21,17 +22,76 @@ def __init__(self): self.where_clause = None def enterWhere_clause(self, ctx): - conditions = self.extract_elements(ctx) + conditions = self.analyze_expressions(ctx) self.where_clause = conditions - def extract_elements(self, node): - elements = [] + def analyze_expressions(self, node): + output_obj = dict() for child in node.getChildren(): if isinstance(child, PostgreSQLParser.ExpressionContext): - elements.extend(self.extract_elements(child)) - else: - elements.append(child.getText()) - return elements + output_obj['conditions'] = self.extract_conditions_from_hierarchy(child) + return output_obj + + # def extract_conditions_from_hierarchy(self, context, level=0, conditions=None): + # bottom_reached = False + # if conditions is None: + # conditions = [] + # if isinstance(context, antlr4.ParserRuleContext): + # print(f"{' ' * level} - {type(context).__name__}, level: {level}") + # if isinstance(context, PostgreSQLParser.Bool_primaryContext): + # print("Bool_primaryContext reached") + # conditions.append({}) + # elif isinstance(context, PostgreSQLParser.Column_nameContext): + # print("Column_nameContext reached") + # # conditions[-1]['column'] = context.getText() + # bottom_reached = True + # elif isinstance(context, PostgreSQLParser.Relational_opContext): + # print("Relational_opContext reached") + # bottom_reached = True + # # conditions[-1]['operator'] = context.getText() + # elif isinstance(context, PostgreSQLParser.Number_literalContext): + # print("Number_literalContext reached") + # # conditions[-1]['value'] = context.getText() + # bottom_reached = True + # if not bottom_reached: + # for child in context.children: + # print(f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") + # conditions.extend(self.extract_conditions_from_hierarchy(child, level + 1, conditions=conditions)) + # return conditions + + from collections import deque + + def extract_conditions_from_hierarchy(self, context, conditions=None): + if conditions is None: + conditions = [] + + queue = deque([(context, 0)]) + + while queue: + context, level = queue.popleft() + + if isinstance(context, antlr4.ParserRuleContext): + print(f"{' ' * level} - {type(context).__name__}, level: {level}") + if isinstance(context, PostgreSQLParser.Bool_primaryContext): + print("Bool_primaryContext reached") + conditions.append({}) + elif isinstance(context, PostgreSQLParser.Column_nameContext): + print("Column_nameContext reached") + conditions[-1]['column'] = context.getText() + elif isinstance(context, PostgreSQLParser.Relational_opContext): + print("Relational_opContext reached") + conditions[-1]['operator'] = context.getText() + elif isinstance(context, PostgreSQLParser.Number_literalContext): + print("Number_literalContext reached") + conditions[-1]['value'] = context.getText() + # else: + # Enqueue all children of the current node, with their level increased by 1 + for child in context.children: + print( + f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") + queue.append((child, level + 1)) + + return conditions def parse_adql_query(query): From 201058b1557837ae16bb791a64bcc3a33dcaced3 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 29 Jul 2024 18:02:48 +0200 Subject: [PATCH 48/76] using breadth-first --- cdci_data_analysis/analysis/ivoa_helper.py | 42 ++++------------------ 1 file changed, 7 insertions(+), 35 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index a93d5e65..647c5916 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -32,41 +32,12 @@ def analyze_expressions(self, node): output_obj['conditions'] = self.extract_conditions_from_hierarchy(child) return output_obj - # def extract_conditions_from_hierarchy(self, context, level=0, conditions=None): - # bottom_reached = False - # if conditions is None: - # conditions = [] - # if isinstance(context, antlr4.ParserRuleContext): - # print(f"{' ' * level} - {type(context).__name__}, level: {level}") - # if isinstance(context, PostgreSQLParser.Bool_primaryContext): - # print("Bool_primaryContext reached") - # conditions.append({}) - # elif isinstance(context, PostgreSQLParser.Column_nameContext): - # print("Column_nameContext reached") - # # conditions[-1]['column'] = context.getText() - # bottom_reached = True - # elif isinstance(context, PostgreSQLParser.Relational_opContext): - # print("Relational_opContext reached") - # bottom_reached = True - # # conditions[-1]['operator'] = context.getText() - # elif isinstance(context, PostgreSQLParser.Number_literalContext): - # print("Number_literalContext reached") - # # conditions[-1]['value'] = context.getText() - # bottom_reached = True - # if not bottom_reached: - # for child in context.children: - # print(f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") - # conditions.extend(self.extract_conditions_from_hierarchy(child, level + 1, conditions=conditions)) - # return conditions - - from collections import deque - def extract_conditions_from_hierarchy(self, context, conditions=None): if conditions is None: conditions = [] queue = deque([(context, 0)]) - + column_level = relation_level = number_literal_level = 0 while queue: context, level = queue.popleft() @@ -77,15 +48,16 @@ def extract_conditions_from_hierarchy(self, context, conditions=None): conditions.append({}) elif isinstance(context, PostgreSQLParser.Column_nameContext): print("Column_nameContext reached") - conditions[-1]['column'] = context.getText() + conditions[column_level]['column'] = context.getText() + column_level += 1 elif isinstance(context, PostgreSQLParser.Relational_opContext): print("Relational_opContext reached") - conditions[-1]['operator'] = context.getText() + conditions[relation_level]['operator'] = context.getText() + relation_level += 1 elif isinstance(context, PostgreSQLParser.Number_literalContext): print("Number_literalContext reached") - conditions[-1]['value'] = context.getText() - # else: - # Enqueue all children of the current node, with their level increased by 1 + conditions[number_literal_level]['value'] = context.getText() + number_literal_level += 1 for child in context.children: print( f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") From 37ed4fd4d75795d500254bebb4eba103b67c9a0c Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 30 Jul 2024 18:30:48 +0200 Subject: [PATCH 49/76] no sqlparse --- cdci_data_analysis/analysis/ivoa_helper.py | 45 +++++++++------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 647c5916..0a76596f 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -2,14 +2,12 @@ from queryparser.adql import ADQLQueryTranslator from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.postgresql.PostgreSQLParser import PostgreSQLParser +from queryparser.mysql import MySQLQueryProcessor from queryparser.exceptions import QuerySyntaxError from collections import deque from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener -import sqlparse -import json - from ..app_logging import app_logging from ..analysis import drupal_helper @@ -70,32 +68,25 @@ def parse_adql_query(query): try: # queryparser adt = ADQLQueryTranslator(query) - qp = PostgreSQLQueryProcessor() - where_listener = WhereClauseListener() - qp.set_query(adt.to_postgresql()) + qp = MySQLQueryProcessor() + qp.set_query(adt.to_mysql()) qp.process_query() - inpt = antlr4.InputStream(query) - lexer = qp.lexer(inpt) - stream = antlr4.CommonTokenStream(lexer) - parser = qp.parser(stream) - tree = parser.query() - qp.walker.walk(where_listener, tree) + # where_listener = WhereClauseListener() + # inpt = antlr4.InputStream(query) + # lexer = qp.lexer(inpt) + # stream = antlr4.CommonTokenStream(lexer) + # parser = qp.parser(stream) + # tree = parser.query() + # qp.walker.walk(where_listener, tree) output_obj = dict( columns=qp.display_columns, tables=qp.tables, rest=qp, - where_clause=where_listener.where_clause + # where_clause=where_listener.where_clause ) - # sqlparse - parsed_query_obj = sqlparse.parse(query)[0] - - for t in parsed_query_obj.tokens: - if isinstance(t, sqlparse.sql.Where): - output_obj['where_token'] = t - except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( @@ -130,12 +121,12 @@ def run_ivoa_query_from_product_gallery(product_gallery_url, gallery_jwt_token, sentry_dsn=None, **kwargs): - output_get = drupal_helper.get_data_product_list_by_source_name_with_conditions( - product_gallery_url=product_gallery_url, - gallery_jwt_token=gallery_jwt_token, - sentry_dsn=sentry_dsn, - **kwargs) - - output_list = json.dumps(output_get) + # output_get = drupal_helper.get_data_product_list_by_source_name_with_conditions( + # product_gallery_url=product_gallery_url, + # gallery_jwt_token=gallery_jwt_token, + # sentry_dsn=sentry_dsn, + # **kwargs) + # + # output_list = json.dumps(output_get) return output_list From 4084298edfe0feb9deff021ebbd1371dbdc639ed Mon Sep 17 00:00:00 2001 From: burnout87 Date: Tue, 30 Jul 2024 19:11:51 +0200 Subject: [PATCH 50/76] querying mysql gallery database --- cdci_data_analysis/analysis/ivoa_helper.py | 69 +++++++++++----------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 0a76596f..d78b0dc3 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -6,6 +6,8 @@ from queryparser.exceptions import QuerySyntaxError from collections import deque +from mysql.connector import connect, Error + from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener from ..app_logging import app_logging @@ -72,28 +74,22 @@ def parse_adql_query(query): qp.set_query(adt.to_mysql()) qp.process_query() - # where_listener = WhereClauseListener() - # inpt = antlr4.InputStream(query) - # lexer = qp.lexer(inpt) - # stream = antlr4.CommonTokenStream(lexer) - # parser = qp.parser(stream) - # tree = parser.query() - # qp.walker.walk(where_listener, tree) - output_obj = dict( columns=qp.display_columns, tables=qp.tables, rest=qp, + mysql_query=qp.query # where_clause=where_listener.where_clause ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - where_clause=None, + # where_clause=None, tables=None, columns=None, - rest=None + rest=None, + mysql_query=None ) return output_obj @@ -102,31 +98,36 @@ def run_ivoa_query(query, sentry_dsn=None, **kwargs): result_list = [] parsed_query_obj = parse_adql_query(query) - tables = parsed_query_obj.get('tables', []) - if len(tables) == 1 and tables[0] == 'product_gallery': - logger.info('Query is a product_gallery query') - product_gallery_url = kwargs.get('product_gallery_url', None) - gallery_jwt_token = kwargs.get('gallery_jwt_token', None) - if product_gallery_url and gallery_jwt_token: - result_list = run_ivoa_query_from_product_gallery( - product_gallery_url, - gallery_jwt_token, - sentry_dsn=sentry_dsn, - **kwargs - ) + # tables = parsed_query_obj.get('tables', []) + # if len(tables) == 1 and tables[0] == 'product_gallery': + logger.info('Performing query on the product_gallery') + # product_gallery_url = kwargs.get('product_gallery_url', None) + # gallery_jwt_token = kwargs.get('gallery_jwt_token', None) + # if product_gallery_url and gallery_jwt_token: + result_list = run_ivoa_query_from_product_gallery(parsed_query_obj) return result_list -def run_ivoa_query_from_product_gallery(product_gallery_url, - gallery_jwt_token, - sentry_dsn=None, - **kwargs): - # output_get = drupal_helper.get_data_product_list_by_source_name_with_conditions( - # product_gallery_url=product_gallery_url, - # gallery_jwt_token=gallery_jwt_token, - # sentry_dsn=sentry_dsn, - # **kwargs) - # - # output_list = json.dumps(output_get) +def run_ivoa_query_from_product_gallery(parsed_query_obj): + result_list = [] + + try: + with connect( + # TODO: Add the connection details reading from the config file + host="", + user="", + password="", + database="" + ) as connection: + print(connection) + + create_db_query = parsed_query_obj.get('mysql_query') + with connection.cursor() as cursor: + cursor.execute(create_db_query) + for db in cursor: + print(db) + + except Error as e: + print(e) - return output_list + return result_list From 61df4c99bc0f4c816abc35800436421305ecc145 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 10:45:28 +0200 Subject: [PATCH 51/76] todo and removed commented lines --- cdci_data_analysis/analysis/ivoa_helper.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index d78b0dc3..e65fe0d8 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -79,13 +79,11 @@ def parse_adql_query(query): tables=qp.tables, rest=qp, mysql_query=qp.query - # where_clause=where_listener.where_clause ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - # where_clause=None, tables=None, columns=None, rest=None, @@ -95,9 +93,9 @@ def parse_adql_query(query): def run_ivoa_query(query, sentry_dsn=None, **kwargs): - result_list = [] parsed_query_obj = parse_adql_query(query) + # TODO use a specific dedicated table and schema to refer to the product_gallery DB ? # tables = parsed_query_obj.get('tables', []) # if len(tables) == 1 and tables[0] == 'product_gallery': logger.info('Performing query on the product_gallery') From d4614d6545477a5fe3df147c19a60dd718b63cfb Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 12:05:48 +0200 Subject: [PATCH 52/76] vo options in the dispatcher config --- cdci_data_analysis/config_dir/conf_env.yml.example | 8 +++++++- cdci_data_analysis/configurer.py | 12 ++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/cdci_data_analysis/config_dir/conf_env.yml.example b/cdci_data_analysis/config_dir/conf_env.yml.example index 2dd65868..82f2cd0f 100644 --- a/cdci_data_analysis/config_dir/conf_env.yml.example +++ b/cdci_data_analysis/config_dir/conf_env.yml.example @@ -122,4 +122,10 @@ dispatcher: # url for the conversion of a given time, in UTC format, to the correspondent REVNUM converttime_revnum_service_url: COVERTTIME_REVNUM_SERVICE_URL - + # virtual observatory related configurations (eg mysql credentials) + vo_options: + # mysql credentials + vo_mysql_pg_host: MYSQL_PG_HOST + vo_mysql_pg_user: MYSQL_PG_USER + vo_mysql_pg_password: MYSQL_PG_PASSWORD + vo_mysql_pg_db: MYSQL_PG_DB diff --git a/cdci_data_analysis/configurer.py b/cdci_data_analysis/configurer.py index d35e1489..a84f3b86 100644 --- a/cdci_data_analysis/configurer.py +++ b/cdci_data_analysis/configurer.py @@ -266,6 +266,10 @@ def __init__(self, cfg_dict, origin=None): disp_dict.get('renku_options', {}).get('renku_gitlab_repository_url', None), disp_dict.get('renku_options', {}).get('renku_base_project_url', None), disp_dict.get('renku_options', {}).get('ssh_key_path', None), + disp_dict.get('vo_options', {}).get('vo_mysql_pg_host', None), + disp_dict.get('vo_options', {}).get('vo_mysql_pg_user', None), + disp_dict.get('vo_options', {}).get('vo_mysql_pg_password', None), + disp_dict.get('vo_options', {}).get('vo_mysql_pg_db', None), ) # not used? @@ -344,6 +348,10 @@ def set_conf_dispatcher(self, renku_gitlab_repository_url, renku_base_project_url, renku_gitlab_ssh_key_path, + vo_mysql_pg_host, + vo_mysql_pg_user, + vo_mysql_pg_password, + vo_mysql_pg_db ): # Generic to dispatcher #print(dispatcher_url, dispatcher_port) @@ -395,6 +403,10 @@ def set_conf_dispatcher(self, self.renku_gitlab_repository_url = renku_gitlab_repository_url self.renku_gitlab_ssh_key_path = renku_gitlab_ssh_key_path self.renku_base_project_url = renku_base_project_url + self.vo_mysql_pg_host = vo_mysql_pg_host + self.vo_mysql_pg_user = vo_mysql_pg_user + self.vo_mysql_pg_password = vo_mysql_pg_password + self.vo_mysql_pg_db = vo_mysql_pg_db def get_data_serve_conf(self, instr_name): if instr_name in self.data_server_conf_dict.keys(): From a488f4cc9abd30b987563ead2e828a5532619174 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 12:06:06 +0200 Subject: [PATCH 53/76] extracting mysql parameters from config --- cdci_data_analysis/analysis/ivoa_helper.py | 31 +++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index e65fe0d8..0cead573 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -99,26 +99,33 @@ def run_ivoa_query(query, sentry_dsn=None, **kwargs): # tables = parsed_query_obj.get('tables', []) # if len(tables) == 1 and tables[0] == 'product_gallery': logger.info('Performing query on the product_gallery') - # product_gallery_url = kwargs.get('product_gallery_url', None) - # gallery_jwt_token = kwargs.get('gallery_jwt_token', None) - # if product_gallery_url and gallery_jwt_token: - result_list = run_ivoa_query_from_product_gallery(parsed_query_obj) + vo_mysql_pg_host = kwargs.get('vo_mysql_pg_host', None) + vo_mysql_pg_user = kwargs.get('vo_mysql_pg_user', None) + vo_mysql_pg_password = kwargs.get('vo_mysql_pg_password', None) + vo_mysql_pg_db = kwargs.get('vo_mysql_pg_db', None) + result_list = run_ivoa_query_from_product_gallery(parsed_query_obj, + vo_mysql_pg_host=vo_mysql_pg_host, + vo_mysql_pg_user=vo_mysql_pg_user, + vo_mysql_pg_password=vo_mysql_pg_password, + vo_mysql_pg_db=vo_mysql_pg_db) return result_list -def run_ivoa_query_from_product_gallery(parsed_query_obj): +def run_ivoa_query_from_product_gallery(parsed_query_obj, + vo_mysql_pg_host, + vo_mysql_pg_user, + vo_mysql_pg_password, + vo_mysql_pg_db + ): result_list = [] try: with connect( - # TODO: Add the connection details reading from the config file - host="", - user="", - password="", - database="" + host=vo_mysql_pg_host, + user=vo_mysql_pg_user, + password=vo_mysql_pg_password, + database=vo_mysql_pg_db ) as connection: - print(connection) - create_db_query = parsed_query_obj.get('mysql_query') with connection.cursor() as cursor: cursor.execute(create_db_query) From 0ba6618707dfc99766a16f0594864302e27518c5 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 12:09:24 +0200 Subject: [PATCH 54/76] no need for breadth first search --- cdci_data_analysis/analysis/ivoa_helper.py | 51 ---------------------- 1 file changed, 51 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 0cead573..ddc8c159 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -12,60 +12,9 @@ from ..app_logging import app_logging -from ..analysis import drupal_helper - logger = app_logging.getLogger('ivoa_helper') -class WhereClauseListener(PostgreSQLParserListener): - def __init__(self): - self.where_clause = None - - def enterWhere_clause(self, ctx): - conditions = self.analyze_expressions(ctx) - self.where_clause = conditions - - def analyze_expressions(self, node): - output_obj = dict() - for child in node.getChildren(): - if isinstance(child, PostgreSQLParser.ExpressionContext): - output_obj['conditions'] = self.extract_conditions_from_hierarchy(child) - return output_obj - - def extract_conditions_from_hierarchy(self, context, conditions=None): - if conditions is None: - conditions = [] - - queue = deque([(context, 0)]) - column_level = relation_level = number_literal_level = 0 - while queue: - context, level = queue.popleft() - - if isinstance(context, antlr4.ParserRuleContext): - print(f"{' ' * level} - {type(context).__name__}, level: {level}") - if isinstance(context, PostgreSQLParser.Bool_primaryContext): - print("Bool_primaryContext reached") - conditions.append({}) - elif isinstance(context, PostgreSQLParser.Column_nameContext): - print("Column_nameContext reached") - conditions[column_level]['column'] = context.getText() - column_level += 1 - elif isinstance(context, PostgreSQLParser.Relational_opContext): - print("Relational_opContext reached") - conditions[relation_level]['operator'] = context.getText() - relation_level += 1 - elif isinstance(context, PostgreSQLParser.Number_literalContext): - print("Number_literalContext reached") - conditions[number_literal_level]['value'] = context.getText() - number_literal_level += 1 - for child in context.children: - print( - f"{' ' * level} - {type(child).__name__}, level: {level}, childGetText: {child.getText()}, conditions size: {len(conditions)}") - queue.append((child, level + 1)) - - return conditions - - def parse_adql_query(query): try: # queryparser From 6b3fb193694e2eb929de54714440be5565ccdd85 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 12:39:17 +0200 Subject: [PATCH 55/76] sentry in case of error --- cdci_data_analysis/analysis/ivoa_helper.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index ddc8c159..ae12777e 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,15 +1,11 @@ import antlr4 from queryparser.adql import ADQLQueryTranslator -from queryparser.postgresql import PostgreSQLQueryProcessor -from queryparser.postgresql.PostgreSQLParser import PostgreSQLParser from queryparser.mysql import MySQLQueryProcessor from queryparser.exceptions import QuerySyntaxError -from collections import deque from mysql.connector import connect, Error -from queryparser.postgresql.PostgreSQLParserListener import PostgreSQLParserListener - +from ..flask_app.sentry import sentry from ..app_logging import app_logging logger = app_logging.getLogger('ivoa_helper') @@ -41,7 +37,7 @@ def parse_adql_query(query): return output_obj -def run_ivoa_query(query, sentry_dsn=None, **kwargs): +def run_ivoa_query(query, **kwargs): parsed_query_obj = parse_adql_query(query) # TODO use a specific dedicated table and schema to refer to the product_gallery DB ? @@ -79,9 +75,10 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, with connection.cursor() as cursor: cursor.execute(create_db_query) for db in cursor: - print(db) + logger.info(db) except Error as e: - print(e) + sentry.capture_message(f"Error when connecting to MySQL or performing the query: {str(e)}") + logger.error(f"Error when connecting to MySQL or performing the query: {str(e)}") return result_list From ed64d1c0862bd517a93efafda53d8131d534da9b Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 13:00:50 +0200 Subject: [PATCH 56/76] capturing general exception --- cdci_data_analysis/analysis/ivoa_helper.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index ae12777e..e73da1b3 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -76,9 +76,14 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, cursor.execute(create_db_query) for db in cursor: logger.info(db) + result_list.append(db) except Error as e: - sentry.capture_message(f"Error when connecting to MySQL or performing the query: {str(e)}") - logger.error(f"Error when connecting to MySQL or performing the query: {str(e)}") + sentry.capture_message(f"Error when connecting to MySQL: {str(e)}") + logger.error(f"Error when connecting to MySQL: {str(e)}") + + except Exception as e: + sentry.capture_message(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") + logger.error(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") return result_list From 4ff74d3978eda979bd822fec2104c7eabfeb2fc7 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 17:30:19 +0200 Subject: [PATCH 57/76] dispatcher endpoint --- cdci_data_analysis/flask_app/app.py | 33 ++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 8627ac8b..648f9c51 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -29,7 +29,7 @@ import time as _time from urllib.parse import urlencode, urlparse -from cdci_data_analysis.analysis import drupal_helper, tokenHelper, renku_helper, email_helper, matrix_helper +from cdci_data_analysis.analysis import drupal_helper, tokenHelper, renku_helper, email_helper, matrix_helper, ivoa_helper from .logstash import logstash_message from .schemas import QueryOutJSON, dispatcher_strict_validate from marshmallow.exceptions import ValidationError @@ -430,6 +430,37 @@ def push_renku_branch(): "Our team is notified and is working on it.") +@app.route('/run_adql_query') +def run_adql_query(): + logger.info("request.args: %s ", request.args) + + token = request.args.get('token', None) + app_config = app.config.get('conf') + secret_key = app_config.secret_key + + output, output_code = tokenHelper.validate_token_from_request(token=token, secret_key=secret_key, + required_roles=['ivoa_user'], + action="run an ADQL query") + + if output_code is not None: + return make_response(output, output_code) + + adql_query = request.args.get('adql_query', None) + vo_mysql_pg_host = app_config.vo_mysql_pg_host + vo_mysql_pg_user = app_config.vo_mysql_pg_user + vo_mysql_pg_password = app_config.vo_mysql_pg_password + vo_mysql_pg_db = app_config.vo_mysql_pg_db + + result_query = ivoa_helper.run_ivoa_query(adql_query, + vo_mysql_pg_host=vo_mysql_pg_host, + vo_mysql_pg_user=vo_mysql_pg_user, + vo_mysql_pg_password=vo_mysql_pg_password, + vo_mysql_pg_db=vo_mysql_pg_db) + + output_request = json.dumps(result_query) + + return output_request + @app.route('/run_analysis', methods=['POST', 'GET']) def run_analysis(): From 60409db907243ccac0054d2484031fbb6174c69b Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 31 Jul 2024 18:20:38 +0200 Subject: [PATCH 58/76] build product gallery path and jsonify the response --- cdci_data_analysis/analysis/ivoa_helper.py | 37 ++++++++++++++++++---- cdci_data_analysis/flask_app/app.py | 8 ++--- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index e73da1b3..b865fcf2 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,4 +1,7 @@ -import antlr4 +import json +import os.path + +from black.lines import append_leaves from queryparser.adql import ADQLQueryTranslator from queryparser.mysql import MySQLQueryProcessor from queryparser.exceptions import QuerySyntaxError @@ -48,11 +51,13 @@ def run_ivoa_query(query, **kwargs): vo_mysql_pg_user = kwargs.get('vo_mysql_pg_user', None) vo_mysql_pg_password = kwargs.get('vo_mysql_pg_password', None) vo_mysql_pg_db = kwargs.get('vo_mysql_pg_db', None) + product_gallery_url = kwargs.get('product_gallery_url', None) result_list = run_ivoa_query_from_product_gallery(parsed_query_obj, vo_mysql_pg_host=vo_mysql_pg_host, vo_mysql_pg_user=vo_mysql_pg_user, vo_mysql_pg_password=vo_mysql_pg_password, - vo_mysql_pg_db=vo_mysql_pg_db) + vo_mysql_pg_db=vo_mysql_pg_db, + product_gallery_url=product_gallery_url) return result_list @@ -60,7 +65,8 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, vo_mysql_pg_host, vo_mysql_pg_user, vo_mysql_pg_password, - vo_mysql_pg_db + vo_mysql_pg_db, + product_gallery_url=None ): result_list = [] @@ -72,11 +78,23 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, database=vo_mysql_pg_db ) as connection: create_db_query = parsed_query_obj.get('mysql_query') - with connection.cursor() as cursor: + with connection.cursor(dictionary=True) as cursor: cursor.execute(create_db_query) - for db in cursor: - logger.info(db) - result_list.append(db) + for row in cursor: + if product_gallery_url is not None: + path = row.get('path', None) + if path is not None: + if path.startswith('/'): + path = path[1:] + row['path'] = os.path.join(product_gallery_url, path) + path_alias = row.get('path_alias', None) + if path_alias is not None: + if path_alias.startswith('/'): + path_alias = path_alias[1:] + row['path_alias'] = os.path.join(product_gallery_url, path_alias) + result_list.append(row) + # result_obj = cursor.fetchall() + except Error as e: sentry.capture_message(f"Error when connecting to MySQL: {str(e)}") @@ -86,4 +104,9 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, sentry.capture_message(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") logger.error(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") + finally: + if connection is not None and connection.is_connected(): + connection.close() + logger.info('MySQL connection closed') + return result_list diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 648f9c51..3ef840f9 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -450,16 +450,16 @@ def run_adql_query(): vo_mysql_pg_user = app_config.vo_mysql_pg_user vo_mysql_pg_password = app_config.vo_mysql_pg_password vo_mysql_pg_db = app_config.vo_mysql_pg_db + product_gallery_url = app_config.product_gallery_url result_query = ivoa_helper.run_ivoa_query(adql_query, vo_mysql_pg_host=vo_mysql_pg_host, vo_mysql_pg_user=vo_mysql_pg_user, vo_mysql_pg_password=vo_mysql_pg_password, - vo_mysql_pg_db=vo_mysql_pg_db) - - output_request = json.dumps(result_query) + vo_mysql_pg_db=vo_mysql_pg_db, + product_gallery_url=product_gallery_url) - return output_request + return jsonify(result_query) @app.route('/run_analysis', methods=['POST', 'GET']) From b35e2aa0e0fac99f314414fd398f31d45504dc39 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Mon, 12 Aug 2024 14:11:41 +0200 Subject: [PATCH 59/76] var renaming --- cdci_data_analysis/analysis/ivoa_helper.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index b865fcf2..be642171 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -77,9 +77,9 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, password=vo_mysql_pg_password, database=vo_mysql_pg_db ) as connection: - create_db_query = parsed_query_obj.get('mysql_query') + db_query = parsed_query_obj.get('mysql_query') with connection.cursor(dictionary=True) as cursor: - cursor.execute(create_db_query) + cursor.execute(db_query) for row in cursor: if product_gallery_url is not None: path = row.get('path', None) @@ -93,7 +93,6 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, path_alias = path_alias[1:] row['path_alias'] = os.path.join(product_gallery_url, path_alias) result_list.append(row) - # result_obj = cursor.fetchall() except Error as e: From b8fb5883c31fd1fdf5c33acfb8cb90e90c070020 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 16 Aug 2024 09:13:23 +0200 Subject: [PATCH 60/76] removed unused imports --- cdci_data_analysis/analysis/ivoa_helper.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index be642171..461bb978 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,7 +1,5 @@ -import json import os.path -from black.lines import append_leaves from queryparser.adql import ADQLQueryTranslator from queryparser.mysql import MySQLQueryProcessor from queryparser.exceptions import QuerySyntaxError @@ -94,7 +92,6 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, row['path_alias'] = os.path.join(product_gallery_url, path_alias) result_list.append(row) - except Error as e: sentry.capture_message(f"Error when connecting to MySQL: {str(e)}") logger.error(f"Error when connecting to MySQL: {str(e)}") From 0c4214256a481c1e8640ca52bf508c71f85d41fd Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 26 Sep 2024 16:29:25 +0200 Subject: [PATCH 61/76] using postgresql --- cdci_data_analysis/analysis/ivoa_helper.py | 88 +++++++++++----------- cdci_data_analysis/configurer.py | 14 +++- cdci_data_analysis/flask_app/app.py | 16 ++-- 3 files changed, 63 insertions(+), 55 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 461bb978..32eda88e 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,10 +1,10 @@ import os.path from queryparser.adql import ADQLQueryTranslator -from queryparser.mysql import MySQLQueryProcessor +from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.exceptions import QuerySyntaxError -from mysql.connector import connect, Error +from psycopg2 import connect, DatabaseError from ..flask_app.sentry import sentry from ..app_logging import app_logging @@ -16,15 +16,16 @@ def parse_adql_query(query): try: # queryparser adt = ADQLQueryTranslator(query) - qp = MySQLQueryProcessor() - qp.set_query(adt.to_mysql()) - qp.process_query() + qp = PostgreSQLQueryProcessor() + qp.set_query(adt.to_postgresql()) + qp.process_query(replace_schema_name={'mmoda_pg_dev': 'public'}) output_obj = dict( columns=qp.display_columns, tables=qp.tables, rest=qp, - mysql_query=qp.query + mysql_query=None, + psql_query=qp.query ) except QuerySyntaxError as qe: @@ -33,7 +34,8 @@ def parse_adql_query(query): tables=None, columns=None, rest=None, - mysql_query=None + mysql_query=None, + psql_query=None ) return output_obj @@ -45,64 +47,58 @@ def run_ivoa_query(query, **kwargs): # tables = parsed_query_obj.get('tables', []) # if len(tables) == 1 and tables[0] == 'product_gallery': logger.info('Performing query on the product_gallery') - vo_mysql_pg_host = kwargs.get('vo_mysql_pg_host', None) - vo_mysql_pg_user = kwargs.get('vo_mysql_pg_user', None) - vo_mysql_pg_password = kwargs.get('vo_mysql_pg_password', None) - vo_mysql_pg_db = kwargs.get('vo_mysql_pg_db', None) + vo_psql_pg_host = kwargs.get('vo_psql_pg_host', None) + vo_psql_pg_user = kwargs.get('vo_psql_pg_user', None) + vo_psql_pg_password = kwargs.get('vo_psql_pg_password', None) + vo_psql_pg_db = kwargs.get('vo_psql_pg_db', None) product_gallery_url = kwargs.get('product_gallery_url', None) result_list = run_ivoa_query_from_product_gallery(parsed_query_obj, - vo_mysql_pg_host=vo_mysql_pg_host, - vo_mysql_pg_user=vo_mysql_pg_user, - vo_mysql_pg_password=vo_mysql_pg_password, - vo_mysql_pg_db=vo_mysql_pg_db, + vo_psql_pg_host=vo_psql_pg_host, + vo_psql_pg_user=vo_psql_pg_user, + vo_psql_pg_password=vo_psql_pg_password, + vo_psql_pg_db=vo_psql_pg_db, product_gallery_url=product_gallery_url) return result_list def run_ivoa_query_from_product_gallery(parsed_query_obj, - vo_mysql_pg_host, - vo_mysql_pg_user, - vo_mysql_pg_password, - vo_mysql_pg_db, + vo_psql_pg_host, + vo_psql_pg_user, + vo_psql_pg_password, + vo_psql_pg_db, product_gallery_url=None ): result_list = [] try: with connect( - host=vo_mysql_pg_host, - user=vo_mysql_pg_user, - password=vo_mysql_pg_password, - database=vo_mysql_pg_db + host=vo_psql_pg_host, + database=vo_psql_pg_db, + user=vo_psql_pg_user, + password=vo_psql_pg_password ) as connection: - db_query = parsed_query_obj.get('mysql_query') - with connection.cursor(dictionary=True) as cursor: + db_query = parsed_query_obj.get('psql_query') + with connection.cursor() as cursor: cursor.execute(db_query) for row in cursor: + list_row = list(row) if product_gallery_url is not None: - path = row.get('path', None) - if path is not None: - if path.startswith('/'): - path = path[1:] - row['path'] = os.path.join(product_gallery_url, path) - path_alias = row.get('path_alias', None) - if path_alias is not None: - if path_alias.startswith('/'): - path_alias = path_alias[1:] - row['path_alias'] = os.path.join(product_gallery_url, path_alias) - result_list.append(row) - - except Error as e: - sentry.capture_message(f"Error when connecting to MySQL: {str(e)}") - logger.error(f"Error when connecting to MySQL: {str(e)}") - - except Exception as e: - sentry.capture_message(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") - logger.error(f"Error when performing the mysql query to the product_gallery DB: {str(e)}") + for index, value in enumerate(list_row): + description = cursor.description[index] + if description.name in {'path', 'path_alias'}: + if list_row[index].startswith('/'): + list_row[index] = row[index][1:] + list_row[index] = os.path.join(product_gallery_url, list_row[index]) + result_list.append(list_row) + + except (Exception, DatabaseError) as e: + sentry.capture_message(f"Error when querying to the Postgresql server: {str(e)}") + logger.error(f"Error when querying to the Postgresql server: {str(e)}") finally: - if connection is not None and connection.is_connected(): + if connection is not None: + cursor.close() connection.close() - logger.info('MySQL connection closed') + logger.info('Database connection closed') return result_list diff --git a/cdci_data_analysis/configurer.py b/cdci_data_analysis/configurer.py index a84f3b86..2e939c8b 100644 --- a/cdci_data_analysis/configurer.py +++ b/cdci_data_analysis/configurer.py @@ -270,6 +270,10 @@ def __init__(self, cfg_dict, origin=None): disp_dict.get('vo_options', {}).get('vo_mysql_pg_user', None), disp_dict.get('vo_options', {}).get('vo_mysql_pg_password', None), disp_dict.get('vo_options', {}).get('vo_mysql_pg_db', None), + disp_dict.get('vo_options', {}).get('vo_psql_pg_host', None), + disp_dict.get('vo_options', {}).get('vo_psql_pg_user', None), + disp_dict.get('vo_options', {}).get('vo_psql_pg_password', None), + disp_dict.get('vo_options', {}).get('vo_psql_pg_db', None) ) # not used? @@ -351,7 +355,11 @@ def set_conf_dispatcher(self, vo_mysql_pg_host, vo_mysql_pg_user, vo_mysql_pg_password, - vo_mysql_pg_db + vo_mysql_pg_db, + vo_psql_pg_host, + vo_psql_pg_user, + vo_psql_pg_password, + vo_psql_pg_db ): # Generic to dispatcher #print(dispatcher_url, dispatcher_port) @@ -407,6 +415,10 @@ def set_conf_dispatcher(self, self.vo_mysql_pg_user = vo_mysql_pg_user self.vo_mysql_pg_password = vo_mysql_pg_password self.vo_mysql_pg_db = vo_mysql_pg_db + self.vo_psql_pg_host = vo_psql_pg_host + self.vo_psql_pg_user = vo_psql_pg_user + self.vo_psql_pg_password = vo_psql_pg_password + self.vo_psql_pg_db = vo_psql_pg_db def get_data_serve_conf(self, instr_name): if instr_name in self.data_server_conf_dict.keys(): diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 3ef840f9..1df2aeb9 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -446,17 +446,17 @@ def run_adql_query(): return make_response(output, output_code) adql_query = request.args.get('adql_query', None) - vo_mysql_pg_host = app_config.vo_mysql_pg_host - vo_mysql_pg_user = app_config.vo_mysql_pg_user - vo_mysql_pg_password = app_config.vo_mysql_pg_password - vo_mysql_pg_db = app_config.vo_mysql_pg_db + vo_psql_pg_host = app_config.vo_psql_pg_host + vo_psql_pg_user = app_config.vo_psql_pg_user + vo_psql_pg_password = app_config.vo_psql_pg_password + vo_psql_pg_db = app_config.vo_psql_pg_db product_gallery_url = app_config.product_gallery_url result_query = ivoa_helper.run_ivoa_query(adql_query, - vo_mysql_pg_host=vo_mysql_pg_host, - vo_mysql_pg_user=vo_mysql_pg_user, - vo_mysql_pg_password=vo_mysql_pg_password, - vo_mysql_pg_db=vo_mysql_pg_db, + vo_psql_pg_host=vo_psql_pg_host, + vo_psql_pg_user=vo_psql_pg_user, + vo_psql_pg_password=vo_psql_pg_password, + vo_psql_pg_db=vo_psql_pg_db, product_gallery_url=product_gallery_url) return jsonify(result_query) From f2fbc59263a729dc8b6a164887fea290b744c34a Mon Sep 17 00:00:00 2001 From: burnout87 Date: Thu, 26 Sep 2024 16:50:39 +0200 Subject: [PATCH 62/76] sanitize request values --- cdci_data_analysis/flask_app/app.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 1df2aeb9..5179aa14 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -432,9 +432,13 @@ def push_renku_branch(): @app.route('/run_adql_query') def run_adql_query(): - logger.info("request.args: %s ", request.args) + par_dic = request.values.to_dict() + sanitized_request_values = sanitize_dict_before_log(par_dic) + logger.info('\033[32m===========================> run_adql_query\033[0m') - token = request.args.get('token', None) + logger.info('\033[33m raw request values: %s \033[0m', dict(sanitized_request_values)) + + token = par_dic.get('token', None) app_config = app.config.get('conf') secret_key = app_config.secret_key @@ -445,7 +449,7 @@ def run_adql_query(): if output_code is not None: return make_response(output, output_code) - adql_query = request.args.get('adql_query', None) + adql_query = par_dic.get('adql_query', None) vo_psql_pg_host = app_config.vo_psql_pg_host vo_psql_pg_user = app_config.vo_psql_pg_user vo_psql_pg_password = app_config.vo_psql_pg_password From a660af07b3746561466723ba5deff2ea3d8eb7d2 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 14:36:02 +0200 Subject: [PATCH 63/76] using value var --- cdci_data_analysis/analysis/ivoa_helper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 32eda88e..2192f310 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -85,10 +85,10 @@ def run_ivoa_query_from_product_gallery(parsed_query_obj, if product_gallery_url is not None: for index, value in enumerate(list_row): description = cursor.description[index] - if description.name in {'path', 'path_alias'}: - if list_row[index].startswith('/'): - list_row[index] = row[index][1:] - list_row[index] = os.path.join(product_gallery_url, list_row[index]) + if description.name in {'path', 'path_alias'} and value is not None and isinstance(value, str): + if value.startswith('/'): + value = value[1:] + list_row[index] = os.path.join(product_gallery_url, value) result_list.append(list_row) except (Exception, DatabaseError) as e: From e995247001d64765928b25f38187c3c32dede2c0 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 14:36:13 +0200 Subject: [PATCH 64/76] postgresql connector library --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9ae39326..7bed627e 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,8 @@ "giturlparse", "sentry-sdk", "validators==0.28.3", - "jsonschema<=4.17.3" + "jsonschema<=4.17.3", + 'psycopg2' ] test_req = [ From d2c5acd99f00d41c39a425e47bf72308d284b5a6 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 14:55:48 +0200 Subject: [PATCH 65/76] no query parsing --- cdci_data_analysis/analysis/ivoa_helper.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 2192f310..96913bd2 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -14,26 +14,16 @@ def parse_adql_query(query): try: - # queryparser adt = ADQLQueryTranslator(query) - qp = PostgreSQLQueryProcessor() - qp.set_query(adt.to_postgresql()) - qp.process_query(replace_schema_name={'mmoda_pg_dev': 'public'}) output_obj = dict( - columns=qp.display_columns, - tables=qp.tables, - rest=qp, mysql_query=None, - psql_query=qp.query + psql_query=adt.to_postgresql() ) except QuerySyntaxError as qe: logger.error(f'Error parsing ADQL query: {qe}') output_obj = dict( - tables=None, - columns=None, - rest=None, mysql_query=None, psql_query=None ) From 081f08f76ca6d33f85e247c36cd25894f7dae669 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 15:01:17 +0200 Subject: [PATCH 66/76] not needed import --- cdci_data_analysis/analysis/ivoa_helper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cdci_data_analysis/analysis/ivoa_helper.py b/cdci_data_analysis/analysis/ivoa_helper.py index 96913bd2..db01cce1 100644 --- a/cdci_data_analysis/analysis/ivoa_helper.py +++ b/cdci_data_analysis/analysis/ivoa_helper.py @@ -1,7 +1,6 @@ import os.path from queryparser.adql import ADQLQueryTranslator -from queryparser.postgresql import PostgreSQLQueryProcessor from queryparser.exceptions import QuerySyntaxError from psycopg2 import connect, DatabaseError From fb20a94bb63b1b06b7eb535446ec0196a946ef16 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 15:25:03 +0200 Subject: [PATCH 67/76] adapted conf example --- cdci_data_analysis/config_dir/conf_env.yml.example | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cdci_data_analysis/config_dir/conf_env.yml.example b/cdci_data_analysis/config_dir/conf_env.yml.example index 82f2cd0f..dd8771cc 100644 --- a/cdci_data_analysis/config_dir/conf_env.yml.example +++ b/cdci_data_analysis/config_dir/conf_env.yml.example @@ -129,3 +129,8 @@ dispatcher: vo_mysql_pg_user: MYSQL_PG_USER vo_mysql_pg_password: MYSQL_PG_PASSWORD vo_mysql_pg_db: MYSQL_PG_DB + # postgresql credentials + vo_psql_pg_host: PSQL_PG_HOST + vo_psql_pg_user: PSQL_PG_USER + vo_psql_pg_password: PSQL_PG_PASSWORD + vo_psql_pg_db: PSQL_PG_DB \ No newline at end of file From d1438aa4cbeb31efd085daa61a455a50d3a78820 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 15:35:08 +0200 Subject: [PATCH 68/76] not needed requirements --- requirements.txt | 2 -- setup.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index f551831e..7e13b3e3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,8 +29,6 @@ GitPython nbformat sentry-sdk pytest-sentry -sqlparse -queryparser-python3 -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index 7bed627e..7d44ff71 100644 --- a/setup.py +++ b/setup.py @@ -45,8 +45,6 @@ "black>=22.10.0", "bs4", "GitPython", - "sqlparse", - "queryparser-python3", "nbformat", "giturlparse", "sentry-sdk", From 6b20e0d423628665e52928dbaea3ccdd1b1b1553 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 15:36:38 +0200 Subject: [PATCH 69/76] not needed requirements --- requirements.txt | 1 + setup.py | 1 + 2 files changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 7e13b3e3..2b56561d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,6 +29,7 @@ GitPython nbformat sentry-sdk pytest-sentry +queryparser-python3 -e git+https://github.com/oda-hub/oda_api.git#egg=oda_api MarkupSafe==2.0.1 diff --git a/setup.py b/setup.py index 7d44ff71..2a2dce29 100644 --- a/setup.py +++ b/setup.py @@ -45,6 +45,7 @@ "black>=22.10.0", "bs4", "GitPython", + "queryparser-python3", "nbformat", "giturlparse", "sentry-sdk", From 900f223e469f5baaf0685ca6c10cf159c084ecd7 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 16:33:17 +0200 Subject: [PATCH 70/76] freezing version pytest-xdist --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 2b56561d..1bec6967 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ pyyaml simplejson flask==2.0.3 astropy>=5.0.1 +pytest-xdist<=3.5.0 pylogstash_context>=0.1.19 gunicorn decorator From f7a1055b4826679ce7a45844921f7cfb1ccd88ff Mon Sep 17 00:00:00 2001 From: burnout87 Date: Fri, 27 Sep 2024 18:12:12 +0200 Subject: [PATCH 71/76] adapted config tests and new config test --- cdci_data_analysis/pytest_fixtures.py | 27 +++++++++++++++++++++++++++ tests/conftest.py | 2 ++ tests/test_server_basic.py | 25 +++++++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/cdci_data_analysis/pytest_fixtures.py b/cdci_data_analysis/pytest_fixtures.py index 85649a53..c3c4000c 100644 --- a/cdci_data_analysis/pytest_fixtures.py +++ b/cdci_data_analysis/pytest_fixtures.py @@ -611,6 +611,27 @@ def dispatcher_test_conf_with_gallery_fn(dispatcher_test_conf_fn): yield fn +@pytest.fixture +def dispatcher_test_conf_with_vo_options_fn(dispatcher_test_conf_fn): + fn = "test-dispatcher-conf-with-vo-options.yaml" + + with open(fn, "w") as f: + with open(dispatcher_test_conf_fn) as f_default: + f.write(f_default.read()) + + f.write('\n vo_options:' + '\n vo_mysql_pg_host: "localhost"' + '\n vo_mysql_pg_user: "user"' + '\n vo_mysql_pg_password: "password"' + '\n vo_mysql_pg_db: "database"' + '\n vo_psql_pg_host: "localhost"' + '\n vo_psql_pg_user: "user"' + '\n vo_psql_pg_password: "password"' + '\n vo_psql_pg_db: "database"') + + yield fn + + @pytest.fixture def dispatcher_test_conf_with_matrix_options_fn(dispatcher_test_conf_fn): fn = "test-dispatcher-conf-with-matrix-options.yaml" @@ -708,10 +729,16 @@ def dispatcher_test_conf_with_gallery(dispatcher_test_conf_with_gallery_fn): yield yaml.load(open(dispatcher_test_conf_with_gallery_fn), Loader=yaml.SafeLoader)['dispatcher'] +@pytest.fixture +def dispatcher_test_conf_with_vo_options(dispatcher_test_conf_with_vo_options_fn): + yield yaml.load(open(dispatcher_test_conf_with_vo_options_fn), Loader=yaml.SafeLoader)['dispatcher'] + + @pytest.fixture def dispatcher_test_conf_with_matrix_options(dispatcher_test_conf_with_matrix_options_fn): yield yaml.load(open(dispatcher_test_conf_with_matrix_options_fn), Loader=yaml.SafeLoader)['dispatcher'] + @pytest.fixture def dispatcher_test_conf_with_gallery_no_resolver(dispatcher_test_conf_with_gallery_no_resolver_fn): yield yaml.load(open(dispatcher_test_conf_with_gallery_no_resolver_fn), Loader=yaml.SafeLoader)['dispatcher'] diff --git a/tests/conftest.py b/tests/conftest.py index 1f6659bd..f026f5bc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,9 +18,11 @@ gunicorn_dispatcher_long_living_fixture_with_matrix_options, dispatcher_test_conf, dispatcher_test_conf_with_gallery, + dispatcher_test_conf_with_vo_options, dispatcher_test_conf_with_gallery_no_resolver, dispatcher_test_conf_empty_sentry_fn, dispatcher_test_conf_with_gallery_fn, + dispatcher_test_conf_with_vo_options_fn, dispatcher_test_conf_with_gallery_no_resolver_fn, dispatcher_live_fixture_with_external_products_url, dispatcher_live_fixture_with_default_route_products_url, diff --git a/tests/test_server_basic.py b/tests/test_server_basic.py index 25af7d2f..75bb1c1b 100644 --- a/tests/test_server_basic.py +++ b/tests/test_server_basic.py @@ -2407,6 +2407,7 @@ def test_example_config(dispatcher_test_conf): example_config = yaml.load(open(example_config_fn), Loader=yaml.SafeLoader)['dispatcher'] example_config.pop('product_gallery_options', None) example_config.pop('matrix_options', None) + example_config.pop('vo_options', None) mapper = lambda x, y: ".".join(map(str, x)) example_config_keys = flatten_nested_structure(example_config, mapper) @@ -2428,6 +2429,7 @@ def test_example_config_with_gallery(dispatcher_test_conf_with_gallery): example_config = yaml.load(open(example_config_fn), Loader=yaml.SafeLoader)['dispatcher'] example_config.pop('matrix_options', None) + example_config.pop('vo_options', None) mapper = lambda x, y: ".".join(map(str, x)) example_config_keys = flatten_nested_structure(example_config, mapper) @@ -2449,6 +2451,7 @@ def test_example_config_with_matrix_options(dispatcher_test_conf_with_matrix_opt with open(example_config_fn) as example_config_fn_f: example_config = yaml.load(example_config_fn_f, Loader=yaml.SafeLoader)['dispatcher'] example_config.pop('product_gallery_options', None) + example_config.pop('vo_options', None) mapper = lambda x, y: ".".join(map(str, x)) example_config_keys = flatten_nested_structure(example_config, mapper) @@ -2460,6 +2463,28 @@ def test_example_config_with_matrix_options(dispatcher_test_conf_with_matrix_opt assert set(example_config_keys) == set(test_config_keys) +def test_example_config_with_vo_options(dispatcher_test_conf_with_vo_options): + import cdci_data_analysis.config_dir + + example_config_fn = os.path.join( + os.path.dirname(cdci_data_analysis.__file__), + "config_dir/conf_env.yml.example" + ) + with open(example_config_fn) as example_config_fn_f: + example_config = yaml.load(example_config_fn_f, Loader=yaml.SafeLoader)['dispatcher'] + example_config.pop('product_gallery_options', None) + example_config.pop('matrix_options', None) + + mapper = lambda x, y: ".".join(map(str, x)) + example_config_keys = flatten_nested_structure(example_config, mapper) + test_config_keys = flatten_nested_structure(dispatcher_test_conf_with_vo_options, mapper) + + print("\n\n\nexample_config_keys", example_config_keys) + print("\n\n\ntest_config_keys", test_config_keys) + + assert set(example_config_keys) == set(test_config_keys) + + def test_image(dispatcher_live_fixture): server = dispatcher_live_fixture From 32f02fb522cbcfe2acc53e6363f00784ff1dcd0e Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 14:16:32 +0200 Subject: [PATCH 72/76] in case local resolver fails, fallback to the external resolver --- cdci_data_analysis/analysis/drupal_helper.py | 44 +++++++++++++++----- cdci_data_analysis/flask_app/app.py | 11 +++-- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/cdci_data_analysis/analysis/drupal_helper.py b/cdci_data_analysis/analysis/drupal_helper.py index ded4b787..9dc6378c 100644 --- a/cdci_data_analysis/analysis/drupal_helper.py +++ b/cdci_data_analysis/analysis/drupal_helper.py @@ -21,6 +21,7 @@ from enum import Enum, auto from astropy.coordinates import SkyCoord, Angle from astropy import units as u +import xml.etree.ElementTree as ET from cdci_data_analysis.analysis import tokenHelper from ..analysis.exceptions import RequestNotUnderstood, InternalError, RequestNotAuthorized @@ -551,11 +552,14 @@ def post_content_to_gallery(decoded_token, if update_astro_entity: auto_update = kwargs.pop('auto_update', 'False') == 'True' if auto_update is True: - name_resolver_url = disp_conf.name_resolver_url + local_name_resolver_url = disp_conf.local_name_resolver_url + external_name_resolver_url = disp_conf.external_name_resolver_url entities_portal_url = disp_conf.entities_portal_url - resolved_obj = resolve_name(name_resolver_url=name_resolver_url, + resolved_obj = resolve_name(local_name_resolver_url=local_name_resolver_url, + external_name_resolver_url=external_name_resolver_url, entities_portal_url=entities_portal_url, - name=src_name) + name=src_name, + sentry_dsn=sentry_dsn) if resolved_obj is not None: msg = '' if 'message' in resolved_obj: @@ -1488,11 +1492,11 @@ def check_matching_coords(source_1_name, source_1_coord_ra, source_1_coord_dec, return False -def resolve_name(name_resolver_url: str, entities_portal_url: str = None, name: str = None): +def resolve_name(local_name_resolver_url: str, external_name_resolver_url: str, entities_portal_url: str = None, name: str = None, sentry_dsn=None): resolved_obj = {} if name is not None: quoted_name = urllib.parse.quote(name.strip()) - res = requests.get(name_resolver_url.format(quoted_name)) + res = requests.get(local_name_resolver_url.format(quoted_name)) if res.status_code == 200: returned_resolved_obj = res.json() if 'success' in returned_resolved_obj: @@ -1513,12 +1517,32 @@ def resolve_name(name_resolver_url: str, entities_portal_url: str = None, name: logger.info(f"resolution of the object {name} unsuccessful") resolved_obj['message'] = f'{name} could not be resolved' else: - logger.warning(f"there seems to be some problem in completing the request for the resolution of the object: {name}\n" - f"the request lead to the error {res.text}, " + logger.warning("There seems to be some problem in completing the request for the resolution of the object" + f" \"{name}\" using the local resolver.\n" + f"The request lead to the error {res.text}, " "this might be due to an error in the url or the service " - "requested is currently not available, " - "please check your request and try to issue it again") - raise InternalError('issue when performing a request to the local resolver', + "requested is currently not available. The external resolver will be used.") + if sentry_dsn is not None: + sentry.capture_message(f'Issue in resolving the object {name} using the local resolver\n{res.text}') + res = requests.get(external_name_resolver_url.format(quoted_name)) + if res.status_code == 200: + root = ET.fromstring(res.text) + resolver_tag = root.find('.//Resolver') + if resolver_tag is not None: + ra_tag = resolver_tag.find('.//jradeg') + resolved_obj['RA'] = float(ra_tag.text) + + dec_tag = resolver_tag.find('.//jdedeg') + resolved_obj['DEC'] = float(dec_tag.text) + else: + logger.warning("There seems to be some problem in completing the request for the resolution of the object" + f" \"{name}\" using the external resolver.\n" + f"The request lead to the error {res.text}, " + "this might be due to an error in the url or the service " + "requested is currently not available. The object could not be resolved.") + if sentry_dsn is not None: + sentry.capture_message(f'Issue in resolving the object {name} using the external resolver\n{res.text}') + raise InternalError('issue when performing a request to the external resolver', status_code=500, payload={'drupal_helper_error_message': res.text}) return resolved_obj diff --git a/cdci_data_analysis/flask_app/app.py b/cdci_data_analysis/flask_app/app.py index 5179aa14..39e4c520 100644 --- a/cdci_data_analysis/flask_app/app.py +++ b/cdci_data_analysis/flask_app/app.py @@ -685,12 +685,17 @@ def resolve_name(): name = par_dic.get('name', None) - name_resolver_url = app_config.name_resolver_url + local_name_resolver_url = app_config.local_name_resolver_url + external_name_resolver_url = app_config.external_name_resolver_url entities_portal_url = app_config.entities_portal_url - resolve_object = drupal_helper.resolve_name(name_resolver_url=name_resolver_url, + sentry_dsn = sentry.sentry_url + + resolve_object = drupal_helper.resolve_name(local_name_resolver_url=local_name_resolver_url, + external_name_resolver_url=external_name_resolver_url, entities_portal_url=entities_portal_url, - name=name) + name=name, + sentry_dsn=sentry_dsn) return resolve_object From 269ac5043bcd1effd1d301723d6f30fed0eb5bf6 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 14:16:42 +0200 Subject: [PATCH 73/76] extended configuration --- cdci_data_analysis/configurer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cdci_data_analysis/configurer.py b/cdci_data_analysis/configurer.py index 2e939c8b..e5e19e8c 100644 --- a/cdci_data_analysis/configurer.py +++ b/cdci_data_analysis/configurer.py @@ -260,7 +260,10 @@ def __init__(self, cfg_dict, origin=None): disp_dict.get('product_gallery_options', {}).get('product_gallery_secret_key', None), disp_dict.get('product_gallery_options', {}).get('product_gallery_timezone', "Europe/Zurich"), - disp_dict.get('product_gallery_options', {}).get('name_resolver_url', 'https://resolver-prod.obsuks1.unige.ch/api/v1.1/byname/{}'), + disp_dict.get('product_gallery_options', {}).get('local_name_resolver_url', + 'https://resolver-prod.obsuks1.unige.ch/api/v1.1/byname/{}'), + disp_dict.get('product_gallery_options', {}).get('external_name_resolver_url', + 'http://cdsweb.u-strasbg.fr/cgi-bin/nph-sesame/-oxp/NSV?{}'), disp_dict.get('product_gallery_options', {}).get('entities_portal_url', 'http://cdsportal.u-strasbg.fr/?target={}'), disp_dict.get('product_gallery_options', {}).get('converttime_revnum_service_url', 'https://www.astro.unige.ch/mmoda/dispatch-data/gw/timesystem/api/v1.0/converttime/UTC/{}/REVNUM'), disp_dict.get('renku_options', {}).get('renku_gitlab_repository_url', None), @@ -346,7 +349,8 @@ def set_conf_dispatcher(self, product_gallery_url, product_gallery_secret_key, product_gallery_timezone, - name_resolver_url, + local_name_resolver_url, + external_name_resolver_url, entities_portal_url, converttime_revnum_service_url, renku_gitlab_repository_url, @@ -405,7 +409,8 @@ def set_conf_dispatcher(self, self.product_gallery_url = product_gallery_url self.product_gallery_secret_key = product_gallery_secret_key self.product_gallery_timezone = product_gallery_timezone - self.name_resolver_url = name_resolver_url + self.local_name_resolver_url = local_name_resolver_url + self.external_name_resolver_url = external_name_resolver_url self.entities_portal_url = entities_portal_url self.converttime_revnum_service_url = converttime_revnum_service_url self.renku_gitlab_repository_url = renku_gitlab_repository_url From 908c8ac68cfdd4189d193b305b00b2c780936b84 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 17:30:28 +0200 Subject: [PATCH 74/76] handling not resolvable url --- cdci_data_analysis/analysis/drupal_helper.py | 123 ++++++++++++------- 1 file changed, 81 insertions(+), 42 deletions(-) diff --git a/cdci_data_analysis/analysis/drupal_helper.py b/cdci_data_analysis/analysis/drupal_helper.py index 9dc6378c..3a3beb9a 100644 --- a/cdci_data_analysis/analysis/drupal_helper.py +++ b/cdci_data_analysis/analysis/drupal_helper.py @@ -1496,55 +1496,94 @@ def resolve_name(local_name_resolver_url: str, external_name_resolver_url: str, resolved_obj = {} if name is not None: quoted_name = urllib.parse.quote(name.strip()) - res = requests.get(local_name_resolver_url.format(quoted_name)) + local_name_resolver_url_formatted = local_name_resolver_url.format(quoted_name) + try: + res = requests.get(local_name_resolver_url_formatted) + if res.status_code == 200: + returned_resolved_obj = res.json() + if 'success' in returned_resolved_obj: + resolved_obj['name'] = name.replace('_', ' ') + if returned_resolved_obj['success']: + logger.info(f"object {name} successfully resolved") + if 'ra' in returned_resolved_obj: + resolved_obj['RA'] = float(returned_resolved_obj['ra']) + if 'dec' in returned_resolved_obj: + resolved_obj['DEC'] = float(returned_resolved_obj['dec']) + if 'object_ids' in returned_resolved_obj: + resolved_obj['object_ids'] = returned_resolved_obj['object_ids'] + if 'object_type' in returned_resolved_obj: + resolved_obj['object_type'] = returned_resolved_obj['object_type'] + resolved_obj['entity_portal_link'] = entities_portal_url.format(quoted_name) + resolved_obj['message'] = f'{name} successfully resolved' + elif not returned_resolved_obj['success']: + logger.info(f"resolution of the object {name} unsuccessful") + resolved_obj['message'] = f'{name} could not be resolved' + else: + logger.warning("There seems to be some problem in completing the request for the resolution of the object" + f" \"{name}\" using the local resolver.\n" + f"The request lead to the error {res.text}, " + "this might be due to an error in the url or the service " + "requested is currently not available. The external resolver will be used.") + if sentry_dsn is not None: + sentry.capture_message(f'Failed to resolve object "{name}" using the local resolver. ' + f'URL: {local_name_resolver_url_formatted} ' + f'Status Code: {res.status_code} ' + f'Response: {res.text}') + except (ConnectionError, + requests.exceptions.ConnectionError, + requests.exceptions.Timeout) as e: + logger.warning(f'An exception occurred while trying to resolve the object "{name}" using the local resolver. ' + f'using the url: {local_name_resolver_url_formatted}. Exception details: {str(e)}') + if sentry_dsn is not None: + sentry.capture_message(f'An exception occurred while trying to resolve the object "{name}" using the local resolver. ' + f'URL: {local_name_resolver_url_formatted} ' + f"Exception details: {str(e)}") + res = requests.get(external_name_resolver_url.format(quoted_name)) if res.status_code == 200: - returned_resolved_obj = res.json() - if 'success' in returned_resolved_obj: - resolved_obj['name'] = name.replace('_', ' ') - if returned_resolved_obj['success']: - logger.info(f"object {name} successfully resolved") - if 'ra' in returned_resolved_obj: - resolved_obj['RA'] = float(returned_resolved_obj['ra']) - if 'dec' in returned_resolved_obj: - resolved_obj['DEC'] = float(returned_resolved_obj['dec']) - if 'object_ids' in returned_resolved_obj: - resolved_obj['object_ids'] = returned_resolved_obj['object_ids'] - if 'object_type' in returned_resolved_obj: - resolved_obj['object_type'] = returned_resolved_obj['object_type'] - resolved_obj['entity_portal_link'] = entities_portal_url.format(quoted_name) - resolved_obj['message'] = f'{name} successfully resolved' - elif not returned_resolved_obj['success']: - logger.info(f"resolution of the object {name} unsuccessful") + root = ET.fromstring(res.text) + resolved_obj['name'] = name.replace('_', ' ') + resolver_tag = root.find('.//Resolver') + if resolver_tag is not None: + ra_tag = resolver_tag.find('.//jradeg') + dec_tag = resolver_tag.find('.//jdedeg') + if ra_tag is None or dec_tag is None: + info_tag = root.find('.//INFO') resolved_obj['message'] = f'{name} could not be resolved' + if info_tag is not None: + message_info = info_tag.text + resolved_obj['message'] += f': {message_info}' + else: + resolved_obj['RA'] = float(ra_tag.text) + resolved_obj['DEC'] = float(dec_tag.text) + resolved_obj['entity_portal_link'] = entities_portal_url.format(quoted_name) + else: + warning_msg = ("There seems to be some problem in completing the request for the resolution of the object" + f" \"{name}\" using the external resolver.") + resolved_obj['message'] = f'{name} could not be resolved' + info_tag = root.find('.//INFO') + if info_tag is not None: + warning_msg += (f"The request lead to the error {info_tag.text}, " + "this might be due to an error in the name of the object that ha been provided.") + resolved_obj['message'] += f': {info_tag.text}' + logger.warning(warning_msg) + if sentry_dsn is not None: + sentry.capture_message(f'Failed to resolve object "{name}" using the remote resolver. ' + f'URL: {local_name_resolver_url.format(quoted_name)} ' + f'Status Code: {res.status_code} ' + f'Response: {res.text}' + f"Info returned from the resolver: {resolved_obj['message']}") else: logger.warning("There seems to be some problem in completing the request for the resolution of the object" - f" \"{name}\" using the local resolver.\n" + f" \"{name}\" using the external resolver.\n" f"The request lead to the error {res.text}, " "this might be due to an error in the url or the service " - "requested is currently not available. The external resolver will be used.") + "requested is currently not available. The object could not be resolved.") if sentry_dsn is not None: - sentry.capture_message(f'Issue in resolving the object {name} using the local resolver\n{res.text}') - res = requests.get(external_name_resolver_url.format(quoted_name)) - if res.status_code == 200: - root = ET.fromstring(res.text) - resolver_tag = root.find('.//Resolver') - if resolver_tag is not None: - ra_tag = resolver_tag.find('.//jradeg') - resolved_obj['RA'] = float(ra_tag.text) - - dec_tag = resolver_tag.find('.//jdedeg') - resolved_obj['DEC'] = float(dec_tag.text) - else: - logger.warning("There seems to be some problem in completing the request for the resolution of the object" - f" \"{name}\" using the external resolver.\n" - f"The request lead to the error {res.text}, " - "this might be due to an error in the url or the service " - "requested is currently not available. The object could not be resolved.") - if sentry_dsn is not None: - sentry.capture_message(f'Issue in resolving the object {name} using the external resolver\n{res.text}') - raise InternalError('issue when performing a request to the external resolver', - status_code=500, - payload={'drupal_helper_error_message': res.text}) + sentry.capture_message(f'Failed to resolve object "{name}" using the remote resolver. ' + f'URL: {local_name_resolver_url.format(quoted_name)} ' + f'Status Code: {res.status_code} ' + f'Response: {res.text}') + resolved_obj['message'] = f'{name} could not be resolved: {res.text}' return resolved_obj From 71e5187ca894cef56951502f1b154d146fcc8a59 Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 17:31:06 +0200 Subject: [PATCH 75/76] extended tests --- cdci_data_analysis/pytest_fixtures.py | 43 +++++++++++++++++++++++- tests/conftest.py | 3 ++ tests/test_server_basic.py | 47 +++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/cdci_data_analysis/pytest_fixtures.py b/cdci_data_analysis/pytest_fixtures.py index c3c4000c..71cb2146 100644 --- a/cdci_data_analysis/pytest_fixtures.py +++ b/cdci_data_analysis/pytest_fixtures.py @@ -1,6 +1,7 @@ # this could be a separate package or/and a pytest plugin from json import JSONDecodeError +import responses import sentry_sdk import yaml @@ -604,7 +605,28 @@ def dispatcher_test_conf_with_gallery_fn(dispatcher_test_conf_fn): '\n product_gallery_url: "http://cdciweb02.astro.unige.ch/mmoda/galleryd"' f'\n product_gallery_secret_key: "{os.getenv("DISPATCHER_PRODUCT_GALLERY_SECRET_KEY", "secret_key")}"' '\n product_gallery_timezone: "Europe/Zurich"' - '\n name_resolver_url: "https://resolver-prod.obsuks1.unige.ch/api/v1.1/byname/{}"' + '\n local_name_resolver_url: "https://resolver-prod.obsuks1.unige.ch/api/v1.1/byname/{}"' + '\n external_name_resolver_url: "http://cdsweb.u-strasbg.fr/cgi-bin/nph-sesame/-oxp/NSV?{}"' + '\n entities_portal_url: "http://cdsportal.u-strasbg.fr/?target={}"' + '\n converttime_revnum_service_url: "https://www.astro.unige.ch/mmoda/dispatch-data/gw/timesystem/api/v1.0/converttime/UTC/{}/REVNUM"') + + yield fn + + +@pytest.fixture +def dispatcher_test_conf_with_gallery_invalid_local_resolver_fn(dispatcher_test_conf_fn): + fn = "test-dispatcher-conf-with-gallery.yaml" + + with open(fn, "w") as f: + with open(dispatcher_test_conf_fn) as f_default: + f.write(f_default.read()) + + f.write('\n product_gallery_options:' + '\n product_gallery_url: "http://cdciweb02.astro.unige.ch/mmoda/galleryd"' + f'\n product_gallery_secret_key: "{os.getenv("DISPATCHER_PRODUCT_GALLERY_SECRET_KEY", "secret_key")}"' + '\n product_gallery_timezone: "Europe/Zurich"' + '\n local_name_resolver_url: "http://invalid_url/"' + '\n external_name_resolver_url: "http://cdsweb.u-strasbg.fr/cgi-bin/nph-sesame/-oxp/NSV?{}"' '\n entities_portal_url: "http://cdsportal.u-strasbg.fr/?target={}"' '\n converttime_revnum_service_url: "https://www.astro.unige.ch/mmoda/dispatch-data/gw/timesystem/api/v1.0/converttime/UTC/{}/REVNUM"') @@ -729,6 +751,11 @@ def dispatcher_test_conf_with_gallery(dispatcher_test_conf_with_gallery_fn): yield yaml.load(open(dispatcher_test_conf_with_gallery_fn), Loader=yaml.SafeLoader)['dispatcher'] +@pytest.fixture +def dispatcher_test_conf_with_gallery_invalid_local_resolver(dispatcher_test_conf_with_gallery_invalid_local_resolver_fn): + yield yaml.load(open(dispatcher_test_conf_with_gallery_invalid_local_resolver_fn), Loader=yaml.SafeLoader)['dispatcher'] + + @pytest.fixture def dispatcher_test_conf_with_vo_options(dispatcher_test_conf_with_vo_options_fn): yield yaml.load(open(dispatcher_test_conf_with_vo_options_fn), Loader=yaml.SafeLoader)['dispatcher'] @@ -1147,6 +1174,20 @@ def dispatcher_live_fixture_with_gallery_no_resolver(pytestconfig, dispatcher_te os.kill(pid, signal.SIGINT) +@pytest.fixture +def dispatcher_live_fixture_with_gallery_invalid_local_resolver(pytestconfig, dispatcher_test_conf_with_gallery_invalid_local_resolver_fn, + dispatcher_debug): + dispatcher_state = start_dispatcher(pytestconfig.rootdir, dispatcher_test_conf_with_gallery_invalid_local_resolver_fn) + + service = dispatcher_state['url'] + pid = dispatcher_state['pid'] + + yield service + + kill_child_processes(pid, signal.SIGINT) + os.kill(pid, signal.SIGINT) + + @pytest.fixture def dispatcher_live_fixture_no_products_url(pytestconfig, dispatcher_test_conf_no_products_url_fn, dispatcher_debug): dispatcher_state = start_dispatcher(pytestconfig.rootdir, dispatcher_test_conf_no_products_url_fn) diff --git a/tests/conftest.py b/tests/conftest.py index f026f5bc..25e5194b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -11,6 +11,7 @@ dispatcher_live_fixture_no_debug_mode, dispatcher_live_fixture_with_gallery, dispatcher_live_fixture_with_gallery_no_resolver, + dispatcher_live_fixture_with_gallery_invalid_local_resolver, dispatcher_long_living_fixture, gunicorn_dispatcher_long_living_fixture, dispatcher_long_living_fixture_with_matrix_options, @@ -20,8 +21,10 @@ dispatcher_test_conf_with_gallery, dispatcher_test_conf_with_vo_options, dispatcher_test_conf_with_gallery_no_resolver, + dispatcher_test_conf_with_gallery_invalid_local_resolver, dispatcher_test_conf_empty_sentry_fn, dispatcher_test_conf_with_gallery_fn, + dispatcher_test_conf_with_gallery_invalid_local_resolver_fn, dispatcher_test_conf_with_vo_options_fn, dispatcher_test_conf_with_gallery_no_resolver_fn, dispatcher_live_fixture_with_external_products_url, diff --git a/tests/test_server_basic.py b/tests/test_server_basic.py index 75bb1c1b..f25b1d46 100644 --- a/tests/test_server_basic.py +++ b/tests/test_server_basic.py @@ -2717,6 +2717,53 @@ def test_source_resolver(dispatcher_live_fixture_with_gallery, dispatcher_test_c .format(urllib.parse.quote(source_to_resolve.strip())) +@pytest.mark.test_drupal +@pytest.mark.parametrize("source_to_resolve", ['Mrk 421', 'Mrk_421', 'GX 1+4', 'fake object', None]) +def test_source_resolver_invalid_local_resolver(dispatcher_live_fixture_with_gallery_invalid_local_resolver, dispatcher_test_conf_with_gallery_invalid_local_resolver, source_to_resolve): + server = dispatcher_live_fixture_with_gallery_invalid_local_resolver + + logger.info("constructed server: %s", server) + + # let's generate a valid token + token_payload = { + **default_token_payload, + "roles": "general, gallery contributor", + } + encoded_token = jwt.encode(token_payload, secret_key, algorithm='HS256') + + params = {'name': source_to_resolve, + 'token': encoded_token} + + c = requests.get(os.path.join(server, "resolve_name"), + params={**params} + ) + + assert c.status_code == 200 + resolved_obj = c.json() + print('Resolved object returned: ', resolved_obj) + + if source_to_resolve is None: + assert resolved_obj == {} + elif source_to_resolve == 'fake object': + assert 'name' in resolved_obj + assert 'message' in resolved_obj + + # the name resolver replaces automatically underscores with spaces in the returned name + assert resolved_obj['name'] == source_to_resolve + assert resolved_obj['message'] == f'{source_to_resolve} could not be resolved' + else: + assert 'name' in resolved_obj + assert 'DEC' in resolved_obj + assert 'RA' in resolved_obj + assert 'entity_portal_link' in resolved_obj + assert 'object_ids' in resolved_obj + assert 'object_type' in resolved_obj + + assert resolved_obj['name'] == source_to_resolve.replace('_', ' ') + assert resolved_obj['entity_portal_link'] == dispatcher_test_conf_with_gallery["product_gallery_options"]["entities_portal_url"]\ + .format(urllib.parse.quote(source_to_resolve.strip())) + + @pytest.mark.test_drupal @pytest.mark.parametrize("type_group", ['instruments', 'Instruments', 'products', 'sources', 'aaaaaa', '', None]) @pytest.mark.parametrize("parent", ['isgri', 'production', 'all', 'aaaaaa', '', None]) From 4ef033950571a3af1b1ef23d9be3f439c560705e Mon Sep 17 00:00:00 2001 From: burnout87 Date: Wed, 2 Oct 2024 17:34:46 +0200 Subject: [PATCH 76/76] adapted test --- cdci_data_analysis/config_dir/conf_env.yml.example | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cdci_data_analysis/config_dir/conf_env.yml.example b/cdci_data_analysis/config_dir/conf_env.yml.example index dd8771cc..115ba4c0 100644 --- a/cdci_data_analysis/config_dir/conf_env.yml.example +++ b/cdci_data_analysis/config_dir/conf_env.yml.example @@ -115,8 +115,10 @@ dispatcher: product_gallery_secret_key: PRODUCT_GALLERY_SECRET_KEY # timezone used within the drupal configuration, these two values have to be always aligned product_gallery_timezone: PRODUCT_GALLERY_SECRET_KEY - # url of the name resolver - name_resolver_url: NAME_RESOLVER_URL + # url of the local name resolver + local_name_resolver_url: NAME_RESOLVER_URL + # url of the external name resolver + external_name_resolver_url: NAME_RESOLVER_URL # url of the online catalog for astrophysical entities entities_portal_url: ENTITIES_PORTAL_URL # url for the conversion of a given time, in UTC format, to the correspondent REVNUM