Skip to content

Commit

Permalink
python(feat): Add units_row and descriptions_row option to simpele cs…
Browse files Browse the repository at this point in the history
…v upload
  • Loading branch information
marcsiftstack committed Nov 16, 2024
1 parent d6e6fad commit fd3559b
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 10 deletions.
5 changes: 4 additions & 1 deletion python/examples/data_import/csv/custom/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,8 @@
)

import_service: DataImportService = csv_upload_service.upload(input_csv, csv_config)
print(import_service.wait_until_complete())
print(import_service.get_data_import())

print("Waiting for upload to complete...")
import_service.wait_until_complete()
print("Upload example complete!")
11 changes: 9 additions & 2 deletions python/examples/data_import/csv/simple/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,15 @@
}

csv_upload_service = CsvUploadService(rest_config)

# Can optionally specify units_row=N or description_row=N if these rows exist.
# Must also specify first_data_row=N in this case.
import_service: DataImportService = csv_upload_service.simple_upload(
asset_name, "sample_data.csv"
asset_name,
"sample_data.csv",
)
print(import_service.wait_until_complete())
print(import_service.get_data_import())

print("Waiting for upload to complete...")
import_service.wait_until_complete()
print("Upload example complete!")
134 changes: 134 additions & 0 deletions python/lib/sift_py/data_import/_csv_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,3 +259,137 @@ def test_simple_upload_invalid_csv(mocker: MockFixture):
)
svc = CsvUploadService(rest_config)
svc.simple_upload("test_asset", "sample.csv")


def test_simple_upload_metadata_csv(mocker: MockFixture):
mock_path_is_file = mocker.patch("sift_py.data_import.csv.Path.is_file")
mock_path_is_file.return_value = True

def mock_read_csv(*_, **kwargs):
if "skiprows" in kwargs:
return pd.DataFrame(
{
"time": [1, 2, 3],
"channel_int": [-1, 2, 0],
}
)
else:
return pd.DataFrame(
{
"time": ["s", "a description", 1, 2, 3],
"channel_int": ["degC", "another description", -1, 2, 1],
}
)

mocker.patch("sift_py.data_import.csv.pd.read_csv", mock_read_csv)

mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post")
mock_requests_post.side_effect = [
MockResponse(
status_code=200,
text=json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"}),
),
MockResponse(status_code=200, text=""),
]
mocker.patch(
"sift_py.data_import.csv.open",
mocker.mock_open(),
)
svc = CsvUploadService(rest_config)

svc.simple_upload("test_asset", "sample.csv", units_row=2, descriptions_row=3)

expected_csv_config = CsvConfig(
{
"asset_name": "test_asset",
"run_name": "",
"run_id": "",
"first_data_row": 2,
"time_column": {
"format": "TIME_FORMAT_ABSOLUTE_DATETIME",
"column_number": 1,
},
"data_columns": {
"2": {
"name": "channel_int",
"data_type": "CHANNEL_DATA_TYPE_INT_64",
"component": "",
"units": "degC",
"description": "another description",
"enum_types": [],
"bit_field_elements": [],
}
},
}
)

mock_requests_post.assert_any_call(
url="https://some_uri.com/api/v1/data-imports:upload",
headers={
"Authorization": "Bearer 123123123",
"Content-Encoding": "application/octet-stream",
},
data=json.dumps({"csv_config": expected_csv_config.to_dict()}),
)


def test_simple_upload_uint64_csv(mocker: MockFixture):
mock_path_is_file = mocker.patch("sift_py.data_import.csv.Path.is_file")
mock_path_is_file.return_value = True

mock_read_csv = mocker.patch("sift_py.data_import.csv.pd.read_csv")
mock_read_csv.return_value = pd.DataFrame(
{
"time": [1, 2, 3],
"channel_uint64": [-1, 2, 2**63],
}
)

mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post")
mock_requests_post.side_effect = [
MockResponse(
status_code=200,
text=json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"}),
),
MockResponse(status_code=200, text=""),
]
mocker.patch(
"sift_py.data_import.csv.open",
mocker.mock_open(),
)
svc = CsvUploadService(rest_config)

svc.simple_upload("test_asset", "sample.csv")

expected_csv_config = CsvConfig(
{
"asset_name": "test_asset",
"run_name": "",
"run_id": "",
"first_data_row": 2,
"time_column": {
"format": "TIME_FORMAT_ABSOLUTE_DATETIME",
"column_number": 1,
},
"data_columns": {
"2": {
"name": "channel_uint64",
"data_type": "CHANNEL_DATA_TYPE_UINT_64",
"component": "",
"units": "",
"description": "",
"enum_types": [],
"bit_field_elements": [],
}
},
}
)

mock_requests_post.assert_any_call(
url="https://some_uri.com/api/v1/data-imports:upload",
headers={
"Authorization": "Bearer 123123123",
"Content-Encoding": "application/octet-stream",
},
data=json.dumps({"csv_config": expected_csv_config.to_dict()}),
)
58 changes: 51 additions & 7 deletions python/lib/sift_py/data_import/csv.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import mimetypes
from pathlib import Path
from typing import Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union, cast
from urllib.parse import urljoin, urlparse

import pandas as pd
Expand Down Expand Up @@ -138,6 +138,9 @@ def simple_upload(
time_format: TimeFormatType = TimeFormatType.ABSOLUTE_DATETIME,
run_name: Optional[str] = None,
run_id: Optional[str] = None,
units_row: Optional[int] = None,
descriptions_row: Optional[int] = None,
relative_start_time: Optional[str] = None,
) -> DataImportService:
"""
Uploads the CSV file pointed to by `path` to the specified asset. This function will
Expand All @@ -149,17 +152,45 @@ def simple_upload(
Override `time_format` to specify the time data format. Default is `TimeFormatType.ABSOLUTE_DATETIME`.
Override `run_name` to specify the name of the run to create for this data. Default is None.
Override `run_id` to specify the id of the run to add this data to. Default is None.
Override `units_row` to specify which row contains unit information. Default is None.
Override `descriptions_row` to specify which row contains channel description information. Default is None.
Override `relative_start_time` if a relative time format is used. Default is None.
"""
self._validate_file_type(path)

# Convert to 0 index
skip_rows: List[int] = []
if units_row is not None:
units_row -= 1
skip_rows.append(units_row)
if descriptions_row is not None:
descriptions_row -= 1
skip_rows.append(descriptions_row)

types = {
"integer": int,
"string": str,
"floating": float,
"boolean": bool,
"integer": "int",
"string": "string",
"floating": "float",
"boolean": "bool",
}

def is_uint64(n: pd.Series) -> bool:
int64_max = 2**63 - 1
return bool((n > int64_max).any())

data_config = {}
df = pd.read_csv(path)
df = pd.read_csv(path, skiprows=skip_rows)

units: Optional[List[str]] = None
if units_row is not None:
df_units = pd.read_csv(path, nrows=units_row)
units = cast(List[str], df_units.iloc[units_row - 1].astype(str))

descriptions: Optional[List[str]] = None
if descriptions_row is not None:
df_descriptions = pd.read_csv(path, nrows=descriptions_row)
descriptions = cast(List[str], df_descriptions.iloc[descriptions_row - 1].astype(str))

for i, header in enumerate(df.columns):
if i + 1 == time_column:
continue
Expand All @@ -170,10 +201,20 @@ def simple_upload(
raise Exception(
f"Unable to upload data type in column {i+1} {header}. Inferred type: {inferred_dtype}"
)
if dtype == "int":
dtype = "uint64" if is_uint64(df.iloc[:, i]) else "int64"

data_config[i + 1] = {"name": header, "data_type": dtype}

config_info = {
if units is not None:
data_config[i + 1]["units"] = units[i] if units[i] != "nan" else ""

if descriptions is not None:
data_config[i + 1]["description"] = (
descriptions[i] if descriptions[i] != "nan" else ""
)

config_info: Dict[str, Any] = {
"asset_name": asset_name,
"first_data_row": first_data_row,
"time_column": {
Expand All @@ -189,6 +230,9 @@ def simple_upload(
if run_id is not None:
config_info["run_id"] = run_name

if relative_start_time is not None:
config_info["time_column"]["relative_start_time"] = relative_start_time

csv_config = CsvConfig(config_info)

return self.upload(path, csv_config)
Expand Down

0 comments on commit fd3559b

Please sign in to comment.