diff --git a/parrot/processes/__init__.py b/parrot/processes/__init__.py index 437441e..7a2dd77 100644 --- a/parrot/processes/__init__.py +++ b/parrot/processes/__init__.py @@ -1,5 +1,7 @@ from .wps_say_hello import SayHello +from .wps_dashboard import Dashboard processes = [ SayHello(), + Dashboard(), ] diff --git a/parrot/processes/wps_dashboard.py b/parrot/processes/wps_dashboard.py new file mode 100644 index 0000000..c2ea766 --- /dev/null +++ b/parrot/processes/wps_dashboard.py @@ -0,0 +1,102 @@ +from pathlib import Path + +from pywps import Process, LiteralInput, ComplexOutput, Format + +from parrot import query + + +class Dashboard(Process): + def __init__(self): + inputs = [ + LiteralInput( + "time", + "Time Period", + abstract="The time period for the report seperated by /" + "Example: 2023-09-01/2023-09-30", + data_type="string", + default="2023-09-01/2023-09-30", + min_occurs=0, + max_occurs=1, + ), + ] + outputs = [ + ComplexOutput( + "report", + "Generated HTML Report", + as_reference=True, + supported_formats=[Format("text/html")], + ), + ] + + super(Dashboard, self).__init__( + self._handler, + identifier="dashboard", + title="Generate HTML Report", + version="1.0", + abstract="Generate an HTML report from a provenance database.", + inputs=inputs, + outputs=outputs, + status_supported=True, + store_supported=True, + ) + + def _handler(self, request, response): + workdir = Path(self.workdir) + # input_csv = request.inputs['input_csv'][0].file + + # Query the provenance database ... result is a Pandas DataFrame + df = query.query() + + # Generate an HTML report from the DataFrame + html_report = self.write_html(df, workdir) + + print(f"report: {html_report}") + response.outputs["report"].file = html_report + # response.outputs["report"].output_format = Format("text/html") + + return response + + def write_html(self, df, workdir): + # Convert the DataFrame to an HTML table + html_table = df.to_html(escape=False, index=False) + + # Define the HTML template + html_template = f""" + + + + Provenance Report + + + +

Provenance Report

+ {html_table} + + + """ + + # Write the HTML template to a file + outfile = workdir / "provenance_report.html" + with outfile.open(mode="w") as file: + file.write(html_template) + + return outfile diff --git a/parrot/query.py b/parrot/query.py new file mode 100644 index 0000000..4876937 --- /dev/null +++ b/parrot/query.py @@ -0,0 +1,76 @@ +from duck.db import GraphDB +import pandas as pd +import json +import yaml + + +def display_image(base64_image): + # img_data = base64.b64decode(base64_image) + # img = Image.open(io.BytesIO(img_data)) + return ''.format(base64_image) + + +def display_json(data): + content = yaml.dump(data, default_flow_style=True, indent=2) + return f"
{content}
" + + +def query(): + query_str = """ + SELECT ?process ?dataset ?variable ?startTime ?endTime ?input ?output ?info ?histogram + WHERE { + ?exec rdf:type provone:Execution ; + rdfs:label ?process ; + clint:dataset_name ?dataset ; + clint:variable_name ?variable ; + prov:startedAtTime ?startTime ; + prov:endedAtTime ?endTime ; + clint:info ?info ; + clint:histogram ?histogram . + + ?input rdf:type prov:Entity . + + ?output rdf:type prov:Entity ; + prov:qualifiedDerivation [ prov:entity ?input; prov:hadActivity ?exec ] . + } + """ # noqa + graph_db = GraphDB() + results = graph_db.query(query_str) + + data = [] + for row in results: + # print(row) + process = row.process.split("/")[-1] + dataset = row.dataset.value + variable = row.variable.value + start_time = row.startTime.value + end_time = row.endTime.value + input = row.input.split("/")[-1] + input = input.split("urn:clint:")[-1] + output = row.output.split("/")[-1] + output = output.split("urn:clint:")[-1] + # min = row.min.value + # max = row.max.value + # mean = row.mean.value + # stddev = row.stddev.value + info = json.loads(row.info.value) + histogram = row.histogram.value + entry = { + "Process": process, + "Dataset": dataset, + "Variable": variable, + "Start Time": start_time, + "End Time": end_time, + "Input": input, + "Output": output, + # "Min": min, + # "Max": max, + # "Mean": mean, + # "StdDev": stddev, + "Histogram": display_image(histogram), + } + for key in info: + entry[key] = display_json(info[key]) + data.append(entry) + df = pd.DataFrame(data) + return df diff --git a/tests/test_wps_caps.py b/tests/test_wps_caps.py index 693bdbf..e5fdac7 100644 --- a/tests/test_wps_caps.py +++ b/tests/test_wps_caps.py @@ -6,11 +6,11 @@ def test_wps_caps(): client = client_for(Service(processes=processes)) - resp = client.get(service='wps', request='getcapabilities', version='1.0.0') - names = resp.xpath_text('/wps:Capabilities' - '/wps:ProcessOfferings' - '/wps:Process' - '/ows:Identifier') + resp = client.get(service="wps", request="getcapabilities", version="1.0.0") + names = resp.xpath_text( + "/wps:Capabilities" "/wps:ProcessOfferings" "/wps:Process" "/ows:Identifier" + ) assert sorted(names.split()) == [ - 'hello', + "dashboard", + "hello", ]