added dashboard process

climateintelligence · Apr 12, 2024 · 3254b25 · 3254b25
1 parent ac57812
commit 3254b25
Show file tree

Hide file tree

Showing 4 changed files with 186 additions and 6 deletions.
diff --git a/parrot/processes/__init__.py b/parrot/processes/__init__.py
@@ -1,5 +1,7 @@
 from .wps_say_hello import SayHello
+from .wps_dashboard import Dashboard
 
 processes = [
     SayHello(),
+    Dashboard(),
 ]
diff --git a/parrot/processes/wps_dashboard.py b/parrot/processes/wps_dashboard.py
@@ -0,0 +1,102 @@
+from pathlib import Path
+
+from pywps import Process, LiteralInput, ComplexOutput, Format
+
+from parrot import query
+
+
+class Dashboard(Process):
+    def __init__(self):
+        inputs = [
+            LiteralInput(
+                "time",
+                "Time Period",
+                abstract="The time period for the report seperated by /"
+                "Example: 2023-09-01/2023-09-30",
+                data_type="string",
+                default="2023-09-01/2023-09-30",
+                min_occurs=0,
+                max_occurs=1,
+            ),
+        ]
+        outputs = [
+            ComplexOutput(
+                "report",
+                "Generated HTML Report",
+                as_reference=True,
+                supported_formats=[Format("text/html")],
+            ),
+        ]
+
+        super(Dashboard, self).__init__(
+            self._handler,
+            identifier="dashboard",
+            title="Generate HTML Report",
+            version="1.0",
+            abstract="Generate an HTML report from a provenance database.",
+            inputs=inputs,
+            outputs=outputs,
+            status_supported=True,
+            store_supported=True,
+        )
+
+    def _handler(self, request, response):
+        workdir = Path(self.workdir)
+        # input_csv = request.inputs['input_csv'][0].file
+
+        # Query the provenance database ... result is a Pandas DataFrame
+        df = query.query()
+
+        # Generate an HTML report from the DataFrame
+        html_report = self.write_html(df, workdir)
+
+        print(f"report: {html_report}")
+        response.outputs["report"].file = html_report
+        # response.outputs["report"].output_format = Format("text/html")
+
+        return response
+
+    def write_html(self, df, workdir):
+        # Convert the DataFrame to an HTML table
+        html_table = df.to_html(escape=False, index=False)
+
+        # Define the HTML template
+        html_template = f"""
+        <!DOCTYPE html>
+        <html>
+        <head>
+            <title>Provenance Report</title>
+            <style>
+                table {{
+                    border-collapse: collapse;
+                    width: 100%;
+                    border: 1px solid #ddd;
+                }}
+
+                th, td {{
+                    text-align: left;
+                    padding: 8px;
+                }}
+
+                th {{
+                    background-color: #f2f2f2;
+                }}
+
+                tr:nth-child(even) {{
+                    background-color: #f2f2f2;
+                }}
+            </style>
+        </head>
+        <body>
+            <h1>Provenance Report</h1>
+            {html_table}
+        </body>
+        </html>
+        """
+
+        # Write the HTML template to a file
+        outfile = workdir / "provenance_report.html"
+        with outfile.open(mode="w") as file:
+            file.write(html_template)
+
+        return outfile
diff --git a/parrot/query.py b/parrot/query.py
@@ -0,0 +1,76 @@
+from duck.db import GraphDB
+import pandas as pd
+import json
+import yaml
+
+
+def display_image(base64_image):
+    # img_data = base64.b64decode(base64_image)
+    # img = Image.open(io.BytesIO(img_data))
+    return '<img src="data:image/png;base64,{}" width="200"/>'.format(base64_image)
+
+
+def display_json(data):
+    content = yaml.dump(data, default_flow_style=True, indent=2)
+    return f"<pre>{content}</pre>"
+
+
+def query():
+    query_str = """
+        SELECT ?process ?dataset ?variable ?startTime ?endTime ?input ?output ?info ?histogram
+        WHERE {
+            ?exec rdf:type provone:Execution ;
+                rdfs:label ?process ;
+                clint:dataset_name ?dataset ;
+                clint:variable_name ?variable ;
+                prov:startedAtTime ?startTime ;
+                prov:endedAtTime ?endTime ;
+                clint:info ?info ;
+                clint:histogram ?histogram .
+
+            ?input rdf:type prov:Entity .
+
+            ?output rdf:type prov:Entity ;
+                prov:qualifiedDerivation [ prov:entity ?input; prov:hadActivity ?exec ] .
+    }
+    """  # noqa
+    graph_db = GraphDB()
+    results = graph_db.query(query_str)
+
+    data = []
+    for row in results:
+        # print(row)
+        process = row.process.split("/")[-1]
+        dataset = row.dataset.value
+        variable = row.variable.value
+        start_time = row.startTime.value
+        end_time = row.endTime.value
+        input = row.input.split("/")[-1]
+        input = input.split("urn:clint:")[-1]
+        output = row.output.split("/")[-1]
+        output = output.split("urn:clint:")[-1]
+        # min = row.min.value
+        # max = row.max.value
+        # mean = row.mean.value
+        # stddev = row.stddev.value
+        info = json.loads(row.info.value)
+        histogram = row.histogram.value
+        entry = {
+            "Process": process,
+            "Dataset": dataset,
+            "Variable": variable,
+            "Start Time": start_time,
+            "End Time": end_time,
+            "Input": input,
+            "Output": output,
+            # "Min": min,
+            # "Max": max,
+            # "Mean": mean,
+            # "StdDev": stddev,
+            "Histogram": display_image(histogram),
+        }
+        for key in info:
+            entry[key] = display_json(info[key])
+        data.append(entry)
+    df = pd.DataFrame(data)
+    return df
diff --git a/tests/test_wps_caps.py b/tests/test_wps_caps.py
@@ -6,11 +6,11 @@
 
 def test_wps_caps():
     client = client_for(Service(processes=processes))
-    resp = client.get(service='wps', request='getcapabilities', version='1.0.0')
-    names = resp.xpath_text('/wps:Capabilities'
-                            '/wps:ProcessOfferings'
-                            '/wps:Process'
-                            '/ows:Identifier')
+    resp = client.get(service="wps", request="getcapabilities", version="1.0.0")
+    names = resp.xpath_text(
+        "/wps:Capabilities" "/wps:ProcessOfferings" "/wps:Process" "/ows:Identifier"
+    )
     assert sorted(names.split()) == [
-        'hello',
+        "dashboard",
+        "hello",
     ]