Skip to content

Commit

Permalink
merged developed, fixed conflicts
Browse files Browse the repository at this point in the history
Signed-off-by: Duncan Ragsdale <[email protected]>
  • Loading branch information
Thistleman committed Jul 25, 2024
2 parents b4da2d5 + c31d99b commit 9eb88df
Show file tree
Hide file tree
Showing 29 changed files with 2,465 additions and 363 deletions.
117 changes: 117 additions & 0 deletions NewTaskRequirements.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Required files for creating a new PV Validation Hub Task

## config.json

Example JSON:

```json
{
"category_name": "Time Shift Analysis",
"function_name": "detect_time_shifts",
"comparison_type": "time_series",
"performance_metrics": [
"runtime",
"mean_absolute_error"
],
"allowable_kwargs": [
"latitude",
"longitude",
"data_sampling_frequency"
],
"ground_truth_compare": [
"time_series"
],
"public_results_table": "time-shift-public-metrics.json",
"private_results_columns": [
"system_id",
"file_name",
"run_time",
"data_requirements",
"mean_absolute_error_time_series",
"data_sampling_frequency",
"issue"
],
"plots": [
{
"type": "histogram",
"x_val": "mean_absolute_error_time_series",
"color_code": "issue",
"title": "Time Series MAE Distribution by Issue",
"save_file_path": "mean_absolute_error_time_series_dist.png"
},
{
"type": "histogram",
"x_val": "mean_absolute_error_time_series",
"color_code": "data_sampling_frequency",
"title": "Time Series MAE Distribution by Sampling Frequency",
"save_file_path": "mean_absolute_error_time_series_dist.png"
},
{
"type": "histogram",
"x_val": "run_time",
"title": "Run Time Distribution",
"save_file_path": "run_time_dist.png"
}
]
}
```

## system_metadata.csv

Required columns:

```csv
system_id,name,azimuth,tilt,elevation,latitude,longitude,tracking,climate_type,dc_capacity
```

## file_metadata.csv

Required columns:

```csv
file_id,system_id,file_name,timezone,data_sampling_frequency,issue
```

## template.py (Marimo template with cli args input)

Marimo python file will need to input data from `mo.cli_args()` method

Example:

```python
def create_df_from_cli_args():
args = mo.cli_args().to_dict()
data = args.get("results_df")
rows = []
for row in data:
rows.append(json.loads(row))

df = pd.DataFrame.from_records(rows)
return df
```

## csv data files

file names must match what is included in the file_name in the file_metadata.csv

## ground truth csv data files

file names must match what is included in the data files folder

## Markdown files for Task

### description.md

The markdown file used for the description tab in an analysis.

### dataset.md

The markdown file to describe the dataset in the data tab.

### shortdesc.md

The markdown file that is used on the card.

### SubmissionInstructions.md

The markdown file that is used on the Submission Instructions tab in the analysis.
Binary file modified compressions/1/sdt-submission.zip
Binary file not shown.
1 change: 0 additions & 1 deletion compressions/1/sdt-submission/submission_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
def detect_time_shifts(
time_series, latitude=None, longitude=None, data_sampling_frequency=None
):
raise NotImplementedError("This function is not implemented yet.")
dh = DataHandler(time_series.to_frame())
dh.run_pipeline(fix_shifts=True, verbose=False, round_shifts_to_hour=False)
return dh.time_shift_analysis.correction_estimate
Binary file added compressions/4/az-tilt-submission.zip
Binary file not shown.
34 changes: 34 additions & 0 deletions dockerize-workflow/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Ignore files and directories generated by the operating system
.DS_Store
Thumbs.db

# Ignore build artifacts
node_modules
dist
build

# Ignore development and debugging files
npm-debug.log*
yarn-debug.log*
yarn-error.log*
.vscode

# Ignore editor-specific files
*.swp
*.swo
*.bak

# Ignore project-specific files
.env
.env.local
.env.*.local
.env.development
.env.test
.env.production

# Ignore any other files or directories as needed
__pycache__
.venv
.dockerignore
Dockerfile
docker-compose.yml
3 changes: 3 additions & 0 deletions dockerize-workflow/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
results/*
data/*
certs/*
20 changes: 20 additions & 0 deletions dockerize-workflow/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Use an official Python runtime as the base image
FROM python:3.12-alpine

# Set the working directory to /app

WORKDIR /app

# Copy the current directory contents into the container at /app
COPY . .

RUN apk add --no-cache docker-cli

RUN apk add gcc python3-dev musl-dev linux-headers

RUN python3 -m venv .venv && \
.venv/bin/pip install --upgrade pip && \
.venv/bin/pip install --no-cache-dir -r requirements.txt

# run container without closing
CMD ["tail", "-f", "/dev/null"]
16 changes: 16 additions & 0 deletions dockerize-workflow/docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
services:
dockerize-workflow:
build:
context: ./
env_file:
- .env
ports:
- "8787:8787"
volumes:
- /var/run/docker.sock:/var/run/docker.sock
- ./certs:/app/certs
- ./main.py:/app/main.py
- ./requirements.txt:/app/requirements.txt
- ./results:/app/results
- ./data:/app/data
- ./environment:/app/environment
33 changes: 33 additions & 0 deletions dockerize-workflow/environment/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Use an official Python runtime as the base image
FROM python:3.11-slim

# Set the working directory in the container
WORKDIR /app

RUN apt-get update

COPY unzip.py .
COPY requirements.txt .

# Install the Python dependencies for the submission wrapper
RUN pip install --no-cache-dir -r requirements.txt

# Copy the submission package into the container
COPY $zip_file .

# Unzip the submission package

RUN python -m unzip $zip_file submission

WORKDIR /app/submission

# Install the Python dependencies
RUN pip install --no-cache-dir -r requirements.txt


# Set the working directory in the container
WORKDIR /app

COPY submission_wrapper.py .
# Command to keep the container running without doing anything
# CMD tail -f /dev/null
2 changes: 2 additions & 0 deletions dockerize-workflow/environment/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
numpy
solar-data-tools==1.3.2
Binary file added dockerize-workflow/environment/submission.zip
Binary file not shown.
152 changes: 152 additions & 0 deletions dockerize-workflow/environment/submission_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from importlib import import_module
import inspect
import sys
import pandas as pd
import numpy as np
from time import perf_counter
from functools import wraps
from typing import Any, ParamSpec, Union, Tuple, TypeVar, Callable, cast
from logging import Logger
import logging

T = TypeVar("T")

P = ParamSpec("P")


def logger_if_able(
message: str, logger: Logger | None = None, level: str = "INFO"
):
if logger is not None:
levels_dict = {
"DEBUG": logging.DEBUG,
"INFO": logging.INFO,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
}

level = level.upper()

if level not in levels_dict:
raise Exception(f"Invalid log level: {level}")

log_level = levels_dict[level]

logger.log(log_level, message)
else:
print(message)


def timing(verbose: bool = True, logger: Union[Logger, None] = None):
def decorator(func: Callable[P, T]):
def wrapper(*args: P.args, **kwargs: P.kwargs) -> Tuple[T, float]:
start_time = perf_counter()
result = func(*args, **kwargs)
end_time = perf_counter()
execution_time = end_time - start_time
if verbose:
msg = (
f"{func.__name__} took {execution_time:.3f} seconds to run"
)
logger_if_able(msg, logger)
return result, execution_time

return wrapper

return decorator


def format_args_for_submission(data_dir: str, args: list[str]):
filename = args[0]

file_path = f"{data_dir}/{filename}"

df = pd.read_csv(
file_path,
index_col=0,
parse_dates=True,
)

print(df.head(5))

series: pd.Series = df.asfreq("60min").squeeze()

submission_args = [series, *args[1:]]

return submission_args


def import_submission_function(submission_file_name: str, function_name: str):
# Dynamically import function from submission.py
try:
submission_module = import_module(submission_file_name)
except ModuleNotFoundError as e:
print(f"ModuleNotFoundError: {submission_file_name} not found")
raise e

try:
submission_function: Callable[[pd.Series, Any], np.ndarray] = getattr(
submission_module, function_name
)
function_parameters = list(
inspect.signature(submission_function).parameters.keys()
)
except AttributeError as e:
print(
f"AttributeError: {function_name} not found in submission module"
)
raise e

return submission_function, function_parameters


def main():
args = sys.argv[1:]

if len(args) < 1:
print("Function name not provided")
sys.exit(1)

submission_file_name = args[0]
function_name = args[1]
data_file_name = args[2]

print("Getting submission function...")

submission_function, function_parameters = import_submission_function(
submission_file_name, function_name
)
print("Got submission function")

print(f"Submission file name: {submission_file_name}")
print(f"Function name: {function_name}")
print(f"Function: {submission_function}")
print(f"Function parameters: {function_parameters}")

data_dir = "/app/data/"
results_dir = "/app/results/"

submission_args = format_args_for_submission(data_dir, args[2:])

print(f"Submission args: {submission_args}")

results, execution_time = timing()(submission_function)(*submission_args)

print(f"Execution time: {execution_time}")

print(f"Results: {results}")

# save results to csv file
results_df = pd.DataFrame(results)
results_file = f"{results_dir}/{data_file_name}"
results_df.to_csv(results_file)

execution_tuple = (data_file_name, execution_time)
execution_file = f"{results_dir}/time.csv"
execution_df = pd.DataFrame([execution_tuple])
execution_df.to_csv(execution_file, mode="a", header=False)


if __name__ == "__main__":
main()
Loading

0 comments on commit 9eb88df

Please sign in to comment.