From 097bf7fe308e81dc6655d657f4616ec76f78a344 Mon Sep 17 00:00:00 2001 From: Marc Julien Date: Wed, 16 Oct 2024 16:09:01 -0700 Subject: [PATCH] python(feat): Add CSV and TDMS upload service (#102) --- .../data_import/csv/custom/.env-example | 3 + .../examples/data_import/csv/custom/main.py | 67 +++++ .../data_import/csv/custom/requirements.txt | 2 + .../data_import/csv/custom/sample_data.csv | 51 ++++ .../data_import/csv/simple/.env-example | 3 + .../examples/data_import/csv/simple/main.py | 34 +++ .../data_import/csv/simple/requirements.txt | 2 + .../data_import/csv/simple/sample_data.csv | 51 ++++ python/examples/data_import/tdms/.env-example | 3 + python/examples/data_import/tdms/main.py | 33 +++ .../data_import/tdms/requirements.txt | 2 + .../data_import/tdms/sample_data.tdms | Bin 0 -> 14271 bytes python/lib/sift_py/data_import/__init__.py | 129 +++++++++ python/lib/sift_py/data_import/_config.py | 167 +++++++++++ .../lib/sift_py/data_import/_config_test.py | 166 +++++++++++ python/lib/sift_py/data_import/_csv_test.py | 261 ++++++++++++++++++ .../lib/sift_py/data_import/_status_test.py | 176 ++++++++++++ python/lib/sift_py/data_import/_tdms_test.py | 238 ++++++++++++++++ python/lib/sift_py/data_import/config.py | 19 ++ python/lib/sift_py/data_import/csv.py | 216 +++++++++++++++ python/lib/sift_py/data_import/status.py | 113 ++++++++ python/lib/sift_py/data_import/tdms.py | 200 ++++++++++++++ python/lib/sift_py/data_import/time_format.py | 39 +++ .../file_attachment/_internal/upload.py | 19 +- .../sift_py/file_attachment/_service_test.py | 17 +- python/lib/sift_py/ingestion/channel.py | 132 +++++---- python/lib/sift_py/rest.py | 13 + python/pyproject.toml | 2 + 28 files changed, 2085 insertions(+), 73 deletions(-) create mode 100644 python/examples/data_import/csv/custom/.env-example create mode 100644 python/examples/data_import/csv/custom/main.py create mode 100644 python/examples/data_import/csv/custom/requirements.txt create mode 100644 python/examples/data_import/csv/custom/sample_data.csv create mode 100644 python/examples/data_import/csv/simple/.env-example create mode 100644 python/examples/data_import/csv/simple/main.py create mode 100644 python/examples/data_import/csv/simple/requirements.txt create mode 100644 python/examples/data_import/csv/simple/sample_data.csv create mode 100644 python/examples/data_import/tdms/.env-example create mode 100644 python/examples/data_import/tdms/main.py create mode 100644 python/examples/data_import/tdms/requirements.txt create mode 100644 python/examples/data_import/tdms/sample_data.tdms create mode 100644 python/lib/sift_py/data_import/__init__.py create mode 100644 python/lib/sift_py/data_import/_config.py create mode 100644 python/lib/sift_py/data_import/_config_test.py create mode 100644 python/lib/sift_py/data_import/_csv_test.py create mode 100644 python/lib/sift_py/data_import/_status_test.py create mode 100644 python/lib/sift_py/data_import/_tdms_test.py create mode 100644 python/lib/sift_py/data_import/config.py create mode 100644 python/lib/sift_py/data_import/csv.py create mode 100644 python/lib/sift_py/data_import/status.py create mode 100644 python/lib/sift_py/data_import/tdms.py create mode 100644 python/lib/sift_py/data_import/time_format.py diff --git a/python/examples/data_import/csv/custom/.env-example b/python/examples/data_import/csv/custom/.env-example new file mode 100644 index 00000000..cdef5f89 --- /dev/null +++ b/python/examples/data_import/csv/custom/.env-example @@ -0,0 +1,3 @@ +SIFT_API_URI="" +SIFT_API_KEY="" +ASSET_NAME="" \ No newline at end of file diff --git a/python/examples/data_import/csv/custom/main.py b/python/examples/data_import/csv/custom/main.py new file mode 100644 index 00000000..736aa17a --- /dev/null +++ b/python/examples/data_import/csv/custom/main.py @@ -0,0 +1,67 @@ +import csv +import os + +from dotenv import load_dotenv +from sift_py.data_import.config import CsvConfig +from sift_py.data_import.csv import CsvUploadService +from sift_py.data_import.status import DataImportService +from sift_py.rest import SiftRestConfig + +if __name__ == "__main__": + """ + Example of uploading a CSV file into Sift using custom CSV config. + """ + + load_dotenv() + + sift_uri = os.getenv("SIFT_API_URI") + assert sift_uri, "expected 'SIFT_API_URI' environment variable to be set" + + apikey = os.getenv("SIFT_API_KEY") + assert apikey, "expected 'SIFT_API_KEY' environment variable to be set" + + asset_name = os.getenv("ASSET_NAME") + assert asset_name, "expected 'ASSET_NAME' environment variable to be set" + + rest_config: SiftRestConfig = { + "uri": sift_uri, + "apikey": apikey, + } + + csv_upload_service = CsvUploadService(rest_config) + + # Create CSV config. + input_csv = "sample_data.csv" + + # Parse CSV to get channel names. + data_config = {} + with open(input_csv, "r") as f: + reader = csv.DictReader(f) + headers = next(reader) + for i, channel in enumerate(headers): + if channel == "timestamp": + continue + data_config[i + 1] = { + "name": channel, + # This example assumes all channels are doubles. + # Can also use `ChannelDoubleType.DOUBLE` or `double` + "data_type": "CHANNEL_DATA_TYPE_DOUBLE", + "description": f"Example channel {channel}", + } + + csv_config = CsvConfig( + { + "asset_name": asset_name, + "first_data_row": 2, + "time_column": { + "format": "TIME_FORMAT_ABSOLUTE_DATETIME", + # Can also use `TimeFormatType.ABSOLUTE_DATETIME` + "column_number": 1, + }, + "data_columns": data_config, + } + ) + + import_service: DataImportService = csv_upload_service.upload(input_csv, csv_config) + print(import_service.wait_until_complete()) + print("Upload example complete!") diff --git a/python/examples/data_import/csv/custom/requirements.txt b/python/examples/data_import/csv/custom/requirements.txt new file mode 100644 index 00000000..2dda90fe --- /dev/null +++ b/python/examples/data_import/csv/custom/requirements.txt @@ -0,0 +1,2 @@ +python-dotenv +sift-stack-py diff --git a/python/examples/data_import/csv/custom/sample_data.csv b/python/examples/data_import/csv/custom/sample_data.csv new file mode 100644 index 00000000..974bffb9 --- /dev/null +++ b/python/examples/data_import/csv/custom/sample_data.csv @@ -0,0 +1,51 @@ +timestamp,channel_0,channel_1,channel_2,channel_3,channel_4,channel_5,channel_6,channel_7,channel_8,channel_9 +2024-10-07 17:00:09.982126,0.9869788584872923,0.4321820341919653,0.5867135634469265,0.9613042704758855,0.8581117009916057,0.47931312587076513,0.08242174011901193,0.1933231289442503,0.35985209963106657,0.3541647897768103 +2024-10-07 17:00:10.002126,0.5701255316316417,0.5914707762677202,0.2562630025294298,0.2513389890039397,0.42158646662087185,0.3479905929531466,0.26458283424910256,0.4609703329809085,0.6421614421556726,0.8510388436200512 +2024-10-07 17:00:10.022126,0.49446373422349477,0.3195179734137701,0.7871899227553234,0.9344052236947964,0.672805707797897,0.5123445839142331,0.28222507345627657,0.06418497987230987,0.23737333108063496,0.27500526480430076 +2024-10-07 17:00:10.042126,0.6323411689241686,0.12129516635402504,0.9523423895236848,0.6884533600751157,0.7144189711378498,0.3981104884533361,0.7761816578087838,0.8901628333060857,0.30626343283413393,0.6011538466824089 +2024-10-07 17:00:10.062126,0.7105902314226873,0.5099079791743336,0.4802228469605496,0.10704676305717797,0.10138693932861131,0.2650078206727895,0.25211372664734555,0.5767357520495985,0.9286870825289508,0.25497721804082396 +2024-10-07 17:00:10.082126,0.5256323808689144,0.1736180220982083,0.6524881071381322,0.003042953818593541,0.6613972481385193,0.7100947908784161,0.7477606441382524,0.3912805619092947,0.8430490363583497,0.6194785712573339 +2024-10-07 17:00:10.102126,0.9693387841288329,0.903623556498963,0.5100034454529075,0.6407491036361468,0.8569231122512672,0.05056118054309999,0.44232019387397803,0.7469144554405716,0.9727410561790976,0.24048712876411005 +2024-10-07 17:00:10.122126,0.7955698564962665,0.7249446474764154,0.8870730273307796,0.8843604048911804,0.5195901521212246,0.05753648053992644,0.7481253948701445,0.04145468150297349,0.2757433105519955,0.8271431370209696 +2024-10-07 17:00:10.142126,0.44163115772117134,0.3290077544927732,0.2755718544807364,0.772133850506479,0.768188038982342,0.7265904485292926,0.6565012067425104,0.23367690620333514,0.029934154305872096,0.9695809128153028 +2024-10-07 17:00:10.162126,0.8156068002765097,0.29274451770409216,0.5421572486014887,0.13000757637350413,0.022321842369040334,0.8869017316875608,0.4233633405896129,0.9960102139371052,0.11212362102743312,0.32849721419760736 +2024-10-07 17:00:10.182126,0.5834782910613371,0.4146197648932102,0.5813947696667644,0.07047903300825009,0.6602636693291395,0.25604504320586363,0.23331372679399032,0.38001110441597374,0.30182669662997474,0.065460012927686 +2024-10-07 17:00:10.202126,0.4958858227610534,0.03335758086761054,0.18306127159920793,0.2873661888506289,0.3622892776923665,0.499305859523576,0.6819818039499034,0.7077342031985537,0.6166251879949645,0.396436674329553 +2024-10-07 17:00:10.222126,0.5522713559775752,0.059736261748141506,0.6671495700467506,0.28880193876063154,0.5949485419694917,0.15234371456520912,0.8820635740470701,0.2623615814355038,0.05939341425818523,0.33899151593770116 +2024-10-07 17:00:10.242126,0.2548536144767083,0.9446851850635495,0.2503283278262417,0.6603460241625815,0.9426257446009163,0.2335313814529668,0.11325626995463156,0.8561477524963562,0.5714950967235992,0.6208318132502507 +2024-10-07 17:00:10.262126,0.6483317839597722,0.9058103142522725,0.8245270488872087,0.5102024611674935,0.9927493027517336,0.03993821791474972,0.38976451460023287,0.16434407971163412,0.8862485490664651,0.5239478232944098 +2024-10-07 17:00:10.282126,0.8149201339397839,0.7137737976420994,0.5557818190695873,0.4938338815400628,0.415033367396499,0.7035967227531553,0.2799010551744173,0.11133675295610845,0.09614041182273869,0.6749010616348136 +2024-10-07 17:00:10.302126,0.966821236972675,0.6427478499039081,0.08793942901785445,0.983536883456765,0.08017419642633206,0.9628090059077687,0.2261439442554486,0.8616042317097121,0.29768680983311857,0.7099921172794093 +2024-10-07 17:00:10.322126,0.07616495133533874,0.8477361352992079,0.5124562051137337,0.3477102817431239,0.9803013846846018,0.11564621113417217,0.500394919401976,0.9532968441813576,0.6981640095296011,0.11464731475203482 +2024-10-07 17:00:10.342126,0.2816774196886087,0.6946350894468526,0.06032100021328923,0.08671742329512011,0.3703285502256528,0.23098242439121186,0.8024638729107021,0.7519069755902849,0.1567668195054993,0.6188475921954685 +2024-10-07 17:00:10.362126,0.3562879856158999,0.5821585057874376,0.5106635612521454,0.3801483655437152,0.9198726678600827,0.9418166961193987,0.388429410377256,0.2977471804131373,0.9471563180632896,0.14325457954021137 +2024-10-07 17:00:10.382126,0.2567504742394161,0.06216349913585972,0.06597933896982477,0.9929001020720042,0.04246495074234258,0.059639438947922785,0.35082423391626916,0.49816120975354294,0.4793436374437493,0.9044262977225134 +2024-10-07 17:00:10.402126,0.729668044188631,0.87303573486452,0.28241255331422177,0.9844784607945438,0.5628283312476997,0.7772705514326647,0.5621228460418618,0.31655274740604544,0.14667119076481394,0.46479306538035026 +2024-10-07 17:00:10.422126,0.6079166221523755,0.3381935261303928,0.4027552753614071,0.9463221601878666,0.3079585365095189,0.7384939769719963,0.3068195207236858,0.9170954845496502,0.5033755202586822,0.1866483806338577 +2024-10-07 17:00:10.442126,0.6374130109345892,0.18034567217625985,0.3434712128818532,0.17021073481209192,0.3008496813095347,0.9906288845050785,0.1767094160069448,0.10490367522653032,0.451783284411753,0.28951700336392294 +2024-10-07 17:00:10.462126,0.6064278489558738,0.927735760571258,0.5420948784165588,0.6429208279586314,0.6491094970355998,0.4658083998707867,0.5023833964486385,0.31178928614967627,0.7371075546530739,0.20117118757702135 +2024-10-07 17:00:10.482126,0.4513829072880269,0.08750235625396385,0.897429061180492,0.525573337899755,0.4958912577189468,0.6666299042049784,0.6673821543792452,0.24940296325704814,0.39990346596891735,0.03908119199471982 +2024-10-07 17:00:10.502126,0.6526868292235004,0.2396467272906465,0.48876587520447246,0.9600364585017024,0.5645764863120679,0.6525672122039082,0.5892799462667705,0.2623733923630508,0.12644043893423196,0.557038851697632 +2024-10-07 17:00:10.522126,0.8206340728243324,0.3856078258875246,0.10355737982275359,0.40008217561542403,0.775489883125943,0.7809085279365546,0.9728982608591412,0.7437225300634555,0.9064050525860767,0.34954913722287684 +2024-10-07 17:00:10.542126,0.7183869527962506,0.6057076354942508,0.7141305521692453,0.864181892944079,0.3063329533214951,0.5922208449149451,0.2165961724433344,0.361412924700091,0.9195428793118164,0.9117659605065301 +2024-10-07 17:00:10.562126,0.5337432823497181,0.3029799047348265,0.7187998606002997,0.6413294998441611,0.06956711458021725,0.7653584390353513,0.5478444411319665,0.23956816537402514,0.565309371127512,0.09571756811408905 +2024-10-07 17:00:10.582126,0.9438967074410107,0.5947221959103646,0.36845583913223356,0.062139986030510186,0.4868199978665264,0.9933813175937465,0.5802919514297851,0.3290430584865637,0.10596102722542844,0.2548595173495307 +2024-10-07 17:00:10.602126,0.9934730684688866,0.8642964082869516,0.4976780006661251,0.28274284200988264,0.2873345110619544,0.5603090594354533,0.03671323561253215,0.2910425794795004,0.27643557935209917,0.9933617886999094 +2024-10-07 17:00:10.622126,0.7625363022038244,0.15457745224329023,0.14594004187301046,0.5333898421447607,0.06272457694883149,0.6695233127369234,0.11348515414578297,0.5372190720009107,0.8982742712845617,0.0735953901666454 +2024-10-07 17:00:10.642126,0.9752915088836254,0.4212118900250873,0.7517891300677948,0.6173271203730661,0.9484753022335238,0.49003318121474826,0.9673069773540193,0.7340492846172215,0.7752184647782351,0.3540611205129477 +2024-10-07 17:00:10.662126,0.8752711555400344,0.5802701554789678,0.5206563647840734,0.33822713547368,0.9940105706040785,0.22225489571851353,0.09547252426599151,0.6501010981395459,0.11054088308311194,0.9583261706090845 +2024-10-07 17:00:10.682126,0.07347363289278785,0.014646911613324765,0.7320190228788871,0.7709297572291158,0.15512255074438153,0.15390836335702818,0.5177227117538571,0.3236803212166608,0.8070567542605646,0.14637148735742855 +2024-10-07 17:00:10.702126,0.7705283572230599,0.10391905603399265,0.22647839339151277,0.19381828130242862,0.7394259865623566,0.537284895745162,0.665941669144839,3.682024595530464e-05,0.2948538006772726,0.5132436577867977 +2024-10-07 17:00:10.722126,0.5801002058755251,0.5347360901142872,0.9426863263468371,0.10078965364197234,0.18092649132383554,0.4158556131912532,0.48780894559491883,0.30729645738027744,0.8750348655618886,0.7104045822140087 +2024-10-07 17:00:10.742126,0.5643358961498655,0.7476536978422998,0.42184219720188965,0.5986913936301931,0.7995464506487457,0.47011356406382276,0.47165656214479945,0.640485625907709,0.14945533562104174,0.5078522070157255 +2024-10-07 17:00:10.762126,0.689416312998768,0.6869095715975265,0.44233170845065484,0.18918733339531457,0.05840017597923497,0.5475729301622566,0.8653724176218798,0.4307388257626793,0.02484138230351174,0.6041369641600293 +2024-10-07 17:00:10.782126,0.655117523072002,0.7744548323118985,0.12191113923871255,0.7580828943619093,0.5846162089198462,0.10163805596923403,0.9548748239914063,0.17047041185779388,0.3828186387151279,0.2125862018544078 +2024-10-07 17:00:10.802126,0.9978949894880911,0.4407389649883179,0.3874802461581107,0.787917288981986,0.46395079770224845,0.7076803126961722,0.021586569921456245,0.5047605761660581,0.11754763946252733,0.6914425169805245 +2024-10-07 17:00:10.822126,0.18230805546987583,0.7162526432491131,0.21515641083754522,0.011646521130654008,0.28184867409261694,0.9899533951647436,0.8802442501936819,0.15601023898717636,0.43165659645721866,0.8624006609001295 +2024-10-07 17:00:10.842126,0.08772137639860766,0.023610987792976545,0.18152206111415536,0.38963482785942316,0.9412403573232293,0.4134256244939435,0.7480337877841542,0.7004645831294896,0.6068471493001919,0.9784047958687754 +2024-10-07 17:00:10.862126,0.555431255352411,0.32821540235710556,0.8050463718214766,0.39059374606368025,0.901122696046085,0.7502017205918795,0.9967762731875593,0.46418284947619637,0.2502871364121475,0.8883270975329431 +2024-10-07 17:00:10.882126,0.8628168741632191,0.0691464712374249,0.2338115694969456,0.20339096239946164,0.31131589189871856,0.8803184037865743,0.6050595482347587,0.9791982321603043,0.7768227079004556,0.17412397358921428 +2024-10-07 17:00:10.902126,0.7372826427025838,0.47780638494554384,0.9565364953062893,0.8023572786972596,0.697601265722501,0.2996954441916203,0.45187436183282126,0.5050166941863234,0.5971251947486449,0.2663036154763788 +2024-10-07 17:00:10.922126,0.2616462515215068,0.9801170295903668,0.9714067759861551,0.6171366258392272,0.20210955994064783,0.5821302248582718,0.21728119192057127,0.5659070515053762,0.33890253787639546,0.7847785601399289 +2024-10-07 17:00:10.942126,0.30765032091105926,0.3315540842714122,0.8583987435373389,0.8090398019035149,0.053143709883349044,0.6161159934280365,0.9085598110294536,0.22921848109316345,0.09698643931431572,0.9558913981896255 +2024-10-07 17:00:10.962126,0.29892134818962646,0.9746840156550289,0.9189029062113264,0.6774566374705723,0.6404015592597371,0.39860549421962166,0.36998704719579667,0.7438885993373414,0.7012269332301863,0.35944279411072455 diff --git a/python/examples/data_import/csv/simple/.env-example b/python/examples/data_import/csv/simple/.env-example new file mode 100644 index 00000000..cdef5f89 --- /dev/null +++ b/python/examples/data_import/csv/simple/.env-example @@ -0,0 +1,3 @@ +SIFT_API_URI="" +SIFT_API_KEY="" +ASSET_NAME="" \ No newline at end of file diff --git a/python/examples/data_import/csv/simple/main.py b/python/examples/data_import/csv/simple/main.py new file mode 100644 index 00000000..b7563984 --- /dev/null +++ b/python/examples/data_import/csv/simple/main.py @@ -0,0 +1,34 @@ +import os + +from dotenv import load_dotenv +from sift_py.data_import.csv import CsvUploadService +from sift_py.data_import.status import DataImportService +from sift_py.rest import SiftRestConfig + +if __name__ == "__main__": + """ + Example of uploading a CSV file into Sift using default CSV config. + """ + + load_dotenv() + + sift_uri = os.getenv("SIFT_API_URI") + assert sift_uri, "expected 'SIFT_API_URI' environment variable to be set" + + apikey = os.getenv("SIFT_API_KEY") + assert apikey, "expected 'SIFT_API_KEY' environment variable to be set" + + asset_name = os.getenv("ASSET_NAME") + assert asset_name, "expected 'ASSET_NAME' environment variable to be set" + + rest_config: SiftRestConfig = { + "uri": sift_uri, + "apikey": apikey, + } + + csv_upload_service = CsvUploadService(rest_config) + import_service: DataImportService = csv_upload_service.simple_upload( + asset_name, "sample_data.csv" + ) + print(import_service.wait_until_complete()) + print("Upload example complete!") diff --git a/python/examples/data_import/csv/simple/requirements.txt b/python/examples/data_import/csv/simple/requirements.txt new file mode 100644 index 00000000..2dda90fe --- /dev/null +++ b/python/examples/data_import/csv/simple/requirements.txt @@ -0,0 +1,2 @@ +python-dotenv +sift-stack-py diff --git a/python/examples/data_import/csv/simple/sample_data.csv b/python/examples/data_import/csv/simple/sample_data.csv new file mode 100644 index 00000000..974bffb9 --- /dev/null +++ b/python/examples/data_import/csv/simple/sample_data.csv @@ -0,0 +1,51 @@ +timestamp,channel_0,channel_1,channel_2,channel_3,channel_4,channel_5,channel_6,channel_7,channel_8,channel_9 +2024-10-07 17:00:09.982126,0.9869788584872923,0.4321820341919653,0.5867135634469265,0.9613042704758855,0.8581117009916057,0.47931312587076513,0.08242174011901193,0.1933231289442503,0.35985209963106657,0.3541647897768103 +2024-10-07 17:00:10.002126,0.5701255316316417,0.5914707762677202,0.2562630025294298,0.2513389890039397,0.42158646662087185,0.3479905929531466,0.26458283424910256,0.4609703329809085,0.6421614421556726,0.8510388436200512 +2024-10-07 17:00:10.022126,0.49446373422349477,0.3195179734137701,0.7871899227553234,0.9344052236947964,0.672805707797897,0.5123445839142331,0.28222507345627657,0.06418497987230987,0.23737333108063496,0.27500526480430076 +2024-10-07 17:00:10.042126,0.6323411689241686,0.12129516635402504,0.9523423895236848,0.6884533600751157,0.7144189711378498,0.3981104884533361,0.7761816578087838,0.8901628333060857,0.30626343283413393,0.6011538466824089 +2024-10-07 17:00:10.062126,0.7105902314226873,0.5099079791743336,0.4802228469605496,0.10704676305717797,0.10138693932861131,0.2650078206727895,0.25211372664734555,0.5767357520495985,0.9286870825289508,0.25497721804082396 +2024-10-07 17:00:10.082126,0.5256323808689144,0.1736180220982083,0.6524881071381322,0.003042953818593541,0.6613972481385193,0.7100947908784161,0.7477606441382524,0.3912805619092947,0.8430490363583497,0.6194785712573339 +2024-10-07 17:00:10.102126,0.9693387841288329,0.903623556498963,0.5100034454529075,0.6407491036361468,0.8569231122512672,0.05056118054309999,0.44232019387397803,0.7469144554405716,0.9727410561790976,0.24048712876411005 +2024-10-07 17:00:10.122126,0.7955698564962665,0.7249446474764154,0.8870730273307796,0.8843604048911804,0.5195901521212246,0.05753648053992644,0.7481253948701445,0.04145468150297349,0.2757433105519955,0.8271431370209696 +2024-10-07 17:00:10.142126,0.44163115772117134,0.3290077544927732,0.2755718544807364,0.772133850506479,0.768188038982342,0.7265904485292926,0.6565012067425104,0.23367690620333514,0.029934154305872096,0.9695809128153028 +2024-10-07 17:00:10.162126,0.8156068002765097,0.29274451770409216,0.5421572486014887,0.13000757637350413,0.022321842369040334,0.8869017316875608,0.4233633405896129,0.9960102139371052,0.11212362102743312,0.32849721419760736 +2024-10-07 17:00:10.182126,0.5834782910613371,0.4146197648932102,0.5813947696667644,0.07047903300825009,0.6602636693291395,0.25604504320586363,0.23331372679399032,0.38001110441597374,0.30182669662997474,0.065460012927686 +2024-10-07 17:00:10.202126,0.4958858227610534,0.03335758086761054,0.18306127159920793,0.2873661888506289,0.3622892776923665,0.499305859523576,0.6819818039499034,0.7077342031985537,0.6166251879949645,0.396436674329553 +2024-10-07 17:00:10.222126,0.5522713559775752,0.059736261748141506,0.6671495700467506,0.28880193876063154,0.5949485419694917,0.15234371456520912,0.8820635740470701,0.2623615814355038,0.05939341425818523,0.33899151593770116 +2024-10-07 17:00:10.242126,0.2548536144767083,0.9446851850635495,0.2503283278262417,0.6603460241625815,0.9426257446009163,0.2335313814529668,0.11325626995463156,0.8561477524963562,0.5714950967235992,0.6208318132502507 +2024-10-07 17:00:10.262126,0.6483317839597722,0.9058103142522725,0.8245270488872087,0.5102024611674935,0.9927493027517336,0.03993821791474972,0.38976451460023287,0.16434407971163412,0.8862485490664651,0.5239478232944098 +2024-10-07 17:00:10.282126,0.8149201339397839,0.7137737976420994,0.5557818190695873,0.4938338815400628,0.415033367396499,0.7035967227531553,0.2799010551744173,0.11133675295610845,0.09614041182273869,0.6749010616348136 +2024-10-07 17:00:10.302126,0.966821236972675,0.6427478499039081,0.08793942901785445,0.983536883456765,0.08017419642633206,0.9628090059077687,0.2261439442554486,0.8616042317097121,0.29768680983311857,0.7099921172794093 +2024-10-07 17:00:10.322126,0.07616495133533874,0.8477361352992079,0.5124562051137337,0.3477102817431239,0.9803013846846018,0.11564621113417217,0.500394919401976,0.9532968441813576,0.6981640095296011,0.11464731475203482 +2024-10-07 17:00:10.342126,0.2816774196886087,0.6946350894468526,0.06032100021328923,0.08671742329512011,0.3703285502256528,0.23098242439121186,0.8024638729107021,0.7519069755902849,0.1567668195054993,0.6188475921954685 +2024-10-07 17:00:10.362126,0.3562879856158999,0.5821585057874376,0.5106635612521454,0.3801483655437152,0.9198726678600827,0.9418166961193987,0.388429410377256,0.2977471804131373,0.9471563180632896,0.14325457954021137 +2024-10-07 17:00:10.382126,0.2567504742394161,0.06216349913585972,0.06597933896982477,0.9929001020720042,0.04246495074234258,0.059639438947922785,0.35082423391626916,0.49816120975354294,0.4793436374437493,0.9044262977225134 +2024-10-07 17:00:10.402126,0.729668044188631,0.87303573486452,0.28241255331422177,0.9844784607945438,0.5628283312476997,0.7772705514326647,0.5621228460418618,0.31655274740604544,0.14667119076481394,0.46479306538035026 +2024-10-07 17:00:10.422126,0.6079166221523755,0.3381935261303928,0.4027552753614071,0.9463221601878666,0.3079585365095189,0.7384939769719963,0.3068195207236858,0.9170954845496502,0.5033755202586822,0.1866483806338577 +2024-10-07 17:00:10.442126,0.6374130109345892,0.18034567217625985,0.3434712128818532,0.17021073481209192,0.3008496813095347,0.9906288845050785,0.1767094160069448,0.10490367522653032,0.451783284411753,0.28951700336392294 +2024-10-07 17:00:10.462126,0.6064278489558738,0.927735760571258,0.5420948784165588,0.6429208279586314,0.6491094970355998,0.4658083998707867,0.5023833964486385,0.31178928614967627,0.7371075546530739,0.20117118757702135 +2024-10-07 17:00:10.482126,0.4513829072880269,0.08750235625396385,0.897429061180492,0.525573337899755,0.4958912577189468,0.6666299042049784,0.6673821543792452,0.24940296325704814,0.39990346596891735,0.03908119199471982 +2024-10-07 17:00:10.502126,0.6526868292235004,0.2396467272906465,0.48876587520447246,0.9600364585017024,0.5645764863120679,0.6525672122039082,0.5892799462667705,0.2623733923630508,0.12644043893423196,0.557038851697632 +2024-10-07 17:00:10.522126,0.8206340728243324,0.3856078258875246,0.10355737982275359,0.40008217561542403,0.775489883125943,0.7809085279365546,0.9728982608591412,0.7437225300634555,0.9064050525860767,0.34954913722287684 +2024-10-07 17:00:10.542126,0.7183869527962506,0.6057076354942508,0.7141305521692453,0.864181892944079,0.3063329533214951,0.5922208449149451,0.2165961724433344,0.361412924700091,0.9195428793118164,0.9117659605065301 +2024-10-07 17:00:10.562126,0.5337432823497181,0.3029799047348265,0.7187998606002997,0.6413294998441611,0.06956711458021725,0.7653584390353513,0.5478444411319665,0.23956816537402514,0.565309371127512,0.09571756811408905 +2024-10-07 17:00:10.582126,0.9438967074410107,0.5947221959103646,0.36845583913223356,0.062139986030510186,0.4868199978665264,0.9933813175937465,0.5802919514297851,0.3290430584865637,0.10596102722542844,0.2548595173495307 +2024-10-07 17:00:10.602126,0.9934730684688866,0.8642964082869516,0.4976780006661251,0.28274284200988264,0.2873345110619544,0.5603090594354533,0.03671323561253215,0.2910425794795004,0.27643557935209917,0.9933617886999094 +2024-10-07 17:00:10.622126,0.7625363022038244,0.15457745224329023,0.14594004187301046,0.5333898421447607,0.06272457694883149,0.6695233127369234,0.11348515414578297,0.5372190720009107,0.8982742712845617,0.0735953901666454 +2024-10-07 17:00:10.642126,0.9752915088836254,0.4212118900250873,0.7517891300677948,0.6173271203730661,0.9484753022335238,0.49003318121474826,0.9673069773540193,0.7340492846172215,0.7752184647782351,0.3540611205129477 +2024-10-07 17:00:10.662126,0.8752711555400344,0.5802701554789678,0.5206563647840734,0.33822713547368,0.9940105706040785,0.22225489571851353,0.09547252426599151,0.6501010981395459,0.11054088308311194,0.9583261706090845 +2024-10-07 17:00:10.682126,0.07347363289278785,0.014646911613324765,0.7320190228788871,0.7709297572291158,0.15512255074438153,0.15390836335702818,0.5177227117538571,0.3236803212166608,0.8070567542605646,0.14637148735742855 +2024-10-07 17:00:10.702126,0.7705283572230599,0.10391905603399265,0.22647839339151277,0.19381828130242862,0.7394259865623566,0.537284895745162,0.665941669144839,3.682024595530464e-05,0.2948538006772726,0.5132436577867977 +2024-10-07 17:00:10.722126,0.5801002058755251,0.5347360901142872,0.9426863263468371,0.10078965364197234,0.18092649132383554,0.4158556131912532,0.48780894559491883,0.30729645738027744,0.8750348655618886,0.7104045822140087 +2024-10-07 17:00:10.742126,0.5643358961498655,0.7476536978422998,0.42184219720188965,0.5986913936301931,0.7995464506487457,0.47011356406382276,0.47165656214479945,0.640485625907709,0.14945533562104174,0.5078522070157255 +2024-10-07 17:00:10.762126,0.689416312998768,0.6869095715975265,0.44233170845065484,0.18918733339531457,0.05840017597923497,0.5475729301622566,0.8653724176218798,0.4307388257626793,0.02484138230351174,0.6041369641600293 +2024-10-07 17:00:10.782126,0.655117523072002,0.7744548323118985,0.12191113923871255,0.7580828943619093,0.5846162089198462,0.10163805596923403,0.9548748239914063,0.17047041185779388,0.3828186387151279,0.2125862018544078 +2024-10-07 17:00:10.802126,0.9978949894880911,0.4407389649883179,0.3874802461581107,0.787917288981986,0.46395079770224845,0.7076803126961722,0.021586569921456245,0.5047605761660581,0.11754763946252733,0.6914425169805245 +2024-10-07 17:00:10.822126,0.18230805546987583,0.7162526432491131,0.21515641083754522,0.011646521130654008,0.28184867409261694,0.9899533951647436,0.8802442501936819,0.15601023898717636,0.43165659645721866,0.8624006609001295 +2024-10-07 17:00:10.842126,0.08772137639860766,0.023610987792976545,0.18152206111415536,0.38963482785942316,0.9412403573232293,0.4134256244939435,0.7480337877841542,0.7004645831294896,0.6068471493001919,0.9784047958687754 +2024-10-07 17:00:10.862126,0.555431255352411,0.32821540235710556,0.8050463718214766,0.39059374606368025,0.901122696046085,0.7502017205918795,0.9967762731875593,0.46418284947619637,0.2502871364121475,0.8883270975329431 +2024-10-07 17:00:10.882126,0.8628168741632191,0.0691464712374249,0.2338115694969456,0.20339096239946164,0.31131589189871856,0.8803184037865743,0.6050595482347587,0.9791982321603043,0.7768227079004556,0.17412397358921428 +2024-10-07 17:00:10.902126,0.7372826427025838,0.47780638494554384,0.9565364953062893,0.8023572786972596,0.697601265722501,0.2996954441916203,0.45187436183282126,0.5050166941863234,0.5971251947486449,0.2663036154763788 +2024-10-07 17:00:10.922126,0.2616462515215068,0.9801170295903668,0.9714067759861551,0.6171366258392272,0.20210955994064783,0.5821302248582718,0.21728119192057127,0.5659070515053762,0.33890253787639546,0.7847785601399289 +2024-10-07 17:00:10.942126,0.30765032091105926,0.3315540842714122,0.8583987435373389,0.8090398019035149,0.053143709883349044,0.6161159934280365,0.9085598110294536,0.22921848109316345,0.09698643931431572,0.9558913981896255 +2024-10-07 17:00:10.962126,0.29892134818962646,0.9746840156550289,0.9189029062113264,0.6774566374705723,0.6404015592597371,0.39860549421962166,0.36998704719579667,0.7438885993373414,0.7012269332301863,0.35944279411072455 diff --git a/python/examples/data_import/tdms/.env-example b/python/examples/data_import/tdms/.env-example new file mode 100644 index 00000000..cdef5f89 --- /dev/null +++ b/python/examples/data_import/tdms/.env-example @@ -0,0 +1,3 @@ +SIFT_API_URI="" +SIFT_API_KEY="" +ASSET_NAME="" \ No newline at end of file diff --git a/python/examples/data_import/tdms/main.py b/python/examples/data_import/tdms/main.py new file mode 100644 index 00000000..e02de78d --- /dev/null +++ b/python/examples/data_import/tdms/main.py @@ -0,0 +1,33 @@ +import os + +from dotenv import load_dotenv +from sift_py.data_import.tdms import TdmsUploadService +from sift_py.rest import SiftRestConfig + +if __name__ == "__main__": + """ + Example of uploading a TDMS file into Sift. + """ + + load_dotenv() + + sift_uri = os.getenv("SIFT_API_URI") + assert sift_uri, "expected 'SIFT_API_URI' environment variable to be set" + + apikey = os.getenv("SIFT_API_KEY") + assert apikey, "expected 'SIFT_API_KEY' environment variable to be set" + + asset_name = os.getenv("ASSET_NAME") + assert asset_name, "expected 'ASSET_NAME' environment variable to be set" + + rest_config: SiftRestConfig = { + "uri": sift_uri, + "apikey": apikey, + } + + tdms_upload_service = TdmsUploadService(rest_config) + import_service = tdms_upload_service.upload( + "sample_data.tdms", asset_name, group_into_components=True + ) + print(import_service.wait_until_complete()) + print("Upload example complete!") diff --git a/python/examples/data_import/tdms/requirements.txt b/python/examples/data_import/tdms/requirements.txt new file mode 100644 index 00000000..2dda90fe --- /dev/null +++ b/python/examples/data_import/tdms/requirements.txt @@ -0,0 +1,2 @@ +python-dotenv +sift-stack-py diff --git a/python/examples/data_import/tdms/sample_data.tdms b/python/examples/data_import/tdms/sample_data.tdms new file mode 100644 index 0000000000000000000000000000000000000000..c85d77eb144ff5b8c15b8a2f0b3235a3d1ffd695 GIT binary patch literal 14271 zcmeI0dpK0>yT=g{${|Up=%6zqk}5b*zY!*u(^btv$Z(uX65Q;;b!k-qxJiYmy!SC+q=e&8gHWh zJ`^O3_>sM{wX2PjjkDYD8@(i!w7cu#0|@=+x&IvCw5{z~n}3}qy#Hzf#ODc{v(~Qm zE^hXxon?qaS;Dmb=CVEQD)a9f{jc6a@%MG^&h~D^ow?dOpZtB@EW-9H?BFK4Q#Kbi z{&w*a=3(XNZnN>k`#y^e6#($QF(x1jd>7V7H{HO6y{%3h?`qOx9`e%8l z{AoN?{#hQI|1=((|5+ZYe;NZpT{u$%XqGzA;czUuI&mH5atl$|vExeK2b2GqSemgRevS8nqAg z5Di~hAo;Be_h!jL)RTlWD%T3GsM7Ie;;k)@!yr7!g)6xH>fq;HWp&A+3N`z02-WPU zhSZSOr2`5~xL9!Azi8TzdTQ^Q;|nS<8huJ+T>LY-hcdTt71EJDckA6i9R~Kj^s|_E zuN(eOFPs|~bSTwJv1l#>@OiK5=CiaG;f=*zJt-}yQIcKJI2nu8i$ufXuXQ6Up-5In zo`xz_3GV241~w(z_$?Z%$HYXK+|M=|YHb6m_PDcgAZv4K{BAZJO8cgA+{t)Xc=^u4 zBsQLKdxiTfqC-akT_*q1{n+C`l_K!)I44=*?c+)I>Dj8yoGC^%n?<< z>K{bR7Y`4Y*Cf>3Jjg3(Ux=gEzSe(luZ5ejkEdI3HM|^k9-UDO2e02GKdF;}(|SWA z1uOetr&jE`Zec$pKMBgU3)SPfMwV+eB=glT7Hgt6@&9AGrVS~Kk z{l>j4tYxy+8P+qA!CAq-TCxXkH;&jYv!Wn|yl4F9l`6!PuKl)qaSNQ{9Fuf}S%_*G zGqK$ji;5lf7uI|!M-FpD{goaA2}!@63hZQ~zA4nN&8`Vexg8$vv@)!`X_nd%$OiZE z46XVF`FP-~S+VIm4RS6f(H>_y;Z;2_OYl)Xx_`2sdxp^PaeFiA%Uc@6mh0c>(QAYA z=8uyfI@_S=G>e`R7lSp9ITD9;*tjDdoEz)d32s;GwO&4Cj6E$dTpLKi?Gr_{&Y=u= zZFUZkXL}(hgPV2mX&(-;#d)_n4&q+^HGSrrB&<&ee@*eK1pR$>`@N$SP&6a&x_f=b z&uqb!gK|s+m#;q>>d}QXOy$w{mB8;t#lb0M8j8EhL&b&}SmE?YT6!}ZBK~ckRbEmc zLly0`h<^&1=Irmu^l*gp+v}4Vy`WgCaR0U89#-gOXFEF3v6H%a4Ua(wcHQ-wy?FT` zWE7gpS*uFm74UFNyKfIZB#YPIZ6|ouAW2G3U`Y&<3!oWqQ%Mos&LF886ThBvWpnmM5;GDUIq7pl+N?3qJkoOZ z00V0tORaiyhYtPrpe^#Q?Qj?2yOc#`W0<`E=%)-Cd^L^VHfT`c)-y!i#!-W`$Hk&W z0l6@8*%VgQTL@a6&)qsD58?~YtHsc+scQ~PV{mW6 zA;h8N8IDR`Tw?g1i8rk_mk&#IV_@USLQ@h8F?R1OOV-sP+`D8%_zML;R%Snl3aCM( zA7|sQ3L5NI$$591mZD^dsN?vPOz6M()j}O^K_|b0z_Y*lp-hhRPLrpg+~vjh^84Ko z{x}-=bgBw^&nwNwHS3}IEq3#^!e}B6)}0Tn%|%w{a?MhQGLVOBec!7$fifY{aRu?9 zIZT%9m#W8`Fxl^_#B<*nT$8>+r5~YZ0>9P_)M9+B@5C+NOqf}TzMNW4#NB#Fn* zkdd{8l-=5maXYjAd<_z|N92v2WwwE1rT%j%kv6C~I#d2CYQc|8l}b?}-VPN9udPzc zgIUpSq3tyc+>Y|%ejd^bC55jPCm#wFBTAhL3>v`y_?91Y1gf+^pl z*x)$)?Ud^)SNJH#$#a(0VaT38b`3G_!=9LWSj=YPe3FZlHW62*3SRYieIJ09g~|rc zXBF_1H!-KW=U_e4JBYWr3EZZ(^up0zc-;&hNuI-mZcf04zC&-2OA{Ay4s8KI-Jlu;dt_#Z} z<>|(;YTJ~8|6W^n zq%#wqhpRuur}Vjw}M@Y4)h6)@$^e8-08P%vyR?36;}m##Ng zCX*}hvhsxgT!|V4y4vRWEG8jWH8IL~C>OdXk6m9p^$yWO7IHlAX&73lQ0Uar4wo!x zoh?^LMBZ=;csaifA@XG^@yUHy>(Dx+JfYOogIjTXCd}U)qvk9 z9R?})ZuU&F;3_V6X!c18IE_bc2yLT5*n3-za90u(;~N{)*A_ukwuzrvLaYy?w{_L( z78u_xzb)0}ExJw)W)_O{!8tx$!SP`~Dx0reoIBW!!rn`~6O=x1ynG+Vy73CRE}T*h z>u%t^_2iJ|1RGqJN4$2-Z$$a-i(9LuiM&AHbc(#79sJQNmht_fz#*uZOvJyhX!D*6fTZn!)e6_VBoD(1%>QL*5i&ga;lddd+E>) zSG^liw1@#*PahQfS=Nv897hsg3kw(fRtWeI@%g@0N}tNdL5%0c3e|hoB6OS33gd|$ zbSLk&72nkaHEJ-lGU(Ws!Xc%RPDS7HGjTlLY_R84&2?(&gUKnk?UP}p;NY2^b=b2P zezQ-Odq?D;;9Sgg$<4(4SpO;Pud*)ax=ra@B%~mglbRFyvI<`{EFOB&+aUZwwk)nX z8{d|yT%xk?nS@ zp9%Y`a~U!FyOI9hb8Kl)Gx{Rh_kXh>f&bR*x7+43U^UgAvilSbyduMV0UG_tT{QSQ zc`_M(PU${2&ogjtX#arljvfSrf7jS!O-1R~T?fLFs5s>aebHW=Ka_pBP52EY6(6gJCf7l>zhL;s{eH+8blqVb%Z1+g zjmA03B*^XkF+vmUfLK9z+M>tJ$ZfwZbnaduhK53__i1N?d+?y$PO&ChTT5P=}6<)7$k4Qzm#;)pz@oF>%LZ3XXow`GVxJhIYZDj{M z;-2hRe%^xdadM?lMJhzEZt?%Rza8D~^f9#!gJ8@%;_U9+4cGUVBK2cC;3KrtNW!}T z99LIn&bO(7!iJiv%a7>T`)J!mf3JSH(4_*Cj*!6I%YJRw-i0{o==-yG81T3os}$hd zfQi}xpA(X;5GCKrD5MiS?&*unK1{~w<00wt6gFD!_}S0$ZHFhC7iz6*LBvY##Zy-c zA;DL5Q-0(#9BPBi)QNeqU3ZTqlST!{@%8=Ay#ZLg#>6yjssO?2-lS6u5^kxh)z_S- zLL^cuzIAmstn+>>^10Om9|*WduO~y_VBRIKh;m%KnBEmxR)cv)moCemtwD+X7V$B$ z4zz7Hpr;PiVl$s<{I5$?Tt2JzD>XUa_INV(uXZBOFJj^j^QOV$ zBnryBoW7cw_2Rqg&A7Q`G*Cxx^X=G7MdtClJ*JM~xqdk!(i_2M2>Jk%Yv@IwD$0-Pq$g@6wqOg7f|H z)OOAw_(Ufj`9SbV=F+t~euIut2Rq7@M?Ih&#LAS#%}`8%c`!Q@{%cMyTYS112Lftk zFMMWTq+gwtL+wGEF6p^hTq6cosj?AB#x~ny%Pl_^LOx{vM;`xX6i)S4-&)v>SGwB` zSHuwYUVm5rZbBElD3Q@Ab?Zj_nT%h35e=}tAaq)`nF^VmUw5q5dIi3~^aN4yW*Ba< zFqAi=Vw9ff@Flhz3GU(=@~X|y33Qo=3t>WhlZl;qStmvUxD_J?*&yYu<#KT9fmhps zvHLe7V9hF0eAeHM`J1+VOn60upk1EvzLshTCazuD>R*Vq^n=FoYULQ3RK96+v<2ta zKD(pEIkr8+~ALboZ2p1sstVECc%-`=`bwPsZp}Jv+J}T=OV2Aku7)JzR{PoPM)>u(dA}8` z!sqiA33Dac(C6OXoKs$k&Cx%0>TT@AEss4PmhI|>8yDA{F~?#&)O1%pEja*+^6o9? zpD-~rSE4{gBNJI#(e%`iIs~67ig;Gs2T!L*JK}=65Xh4$FrTQWD^kfkefp;|knWoO8h`t_$)`I`)*MG;Cd>y;dT!2PEO9l5^h%(MRf9 z;`NCM->cdC0^`}3@EmvYu4@O$-SN~Ro+eyT9<|~+*9lT&V#KelHMrQOzGEVqj(eFI zdXM(jLiJbVE9p(W(7e6A=uA-#{8yFRUkIh3z%kU+%!dZ^w;Bq{*NFLYa>PfwnaB&l zLLCD6gNT;6=-gV-51X-cN2eWZ-0=8t=xPN6(#4i5^=21h?Tw+6WwS_#IC}d?Z(knj zTRJqjuGhlOz*0A#m4zEOpC94m>O{;s*^XhNUNEp+vVm_b4FVr@zbo(Ug8{=tuD-Jz zM-;P%ANce^(|na_`qvNGWYx=U(BBCU-&7rQ#TG2f7WpwW$i#|e!q*>&)Z7i%Qv#OpDx?yy*%v@P@ z083SPG+N*GfLFNYptLzr#}&9#xVDqwG34@sJko`WIoY8xyW6q8N6=6!iw4#EAulfE z5_Qft4WlSiD!%Rv=g}edS1g&2ATLu++!|3 zy~S`41xZ`xdU3FjI(**HbjbiTbp>+YFlab`_i)XxhizDPcx8-dRTuOX%Hp^3)FMk` z$Ju$G3SbdBm-AC03;yLA8bNu3NZ0ssCg%zT@#hz6+a-3uo)Q%Ph}dT;TR{EtDqs+S ze3ZDk#<^f_%`PN~b%SsHa_w`gvoR`p%WIi?GqFFhz^#i>fwq{Y<|NI}kf=HwSV$qm zWpCz!Fua5RzVTP_)$Mp`71A{S1QRrd@?xvlTDV@)zOGK>1Em8UBe#@mP_vM%v}Io@ zt_5w7DH-U+_)Eq6#xn0=bc`>uuA&+;H(jrFfr?m>CHDHn^TkA_Iq_+2q#Tuni}*cXRB=R&x*MpfRfrecWIVxX&7hjrBYk8UP? zU@iGX;|O6Pvu>YMDJdV6ZSu#TYQ=+cTjqsw(*vZbBnmyVtHWB`-D0i&ER=UZEZs)gVjJnze+17TVs`*-U@byl-Yg6@1UDt!Wja_PTYzo?VZ)J_=HXvVPYMClg zM-Ld!8$C*qNPgF?8^^n_wMbgv`UC@7&PQtn zCkK#H9n|wPI0icO6W?Bw`e0v^m*Lydjn%%)!jvaHFlc}5W6ErWantqX(YHvL|Dw`- z$c3o4JiEN)_@Yo?9)6k8-385xY6blVVeoIh=1{%26Ku1oWsSQSxY4@$=jfL%IG(&E z@kO^4y4JJ#T8Q_{ks6h+^7$=7U0fMToTa!>qQ+WwpdTd#99LyCbKxm0byo9TIX<>= z(!1U=k(cIwo=)i5%z-&C8sB8XVZX(iNINp5Ja25%&ST){!tFRZqnLNgW2@BI>-_;A^Xwdw1i^mGv=)j|||~Tsf1XyPYuV zluypvK!Z|K-YxyaZk)=PI$e2@*hjLHJa^zk19&fR#N;07fTF;|a=E}~I9>=NgOmY8 U9U7LsT#*UxbB;&;;;q1c05pB4ssI20 literal 0 HcmV?d00001 diff --git a/python/lib/sift_py/data_import/__init__.py b/python/lib/sift_py/data_import/__init__.py new file mode 100644 index 00000000..0aec3738 --- /dev/null +++ b/python/lib/sift_py/data_import/__init__.py @@ -0,0 +1,129 @@ +""" +This module contains services to facilitate importing data. +It also provides utilities to easily query the import status. + +The fundamental components of this module are the following: +- `sift_py.data_import.config.CsvConfig` +- `sift_py.data_import.csv.CsvUploadService` +- `sift_py.data_import.status.DataImportService` + + +## Simple CSV Upload + +A simple CSV upload without needing to craft a custom CSV config can be done like so: +```python +from sift_py.data_import.csv import CsvUploadService +from sift_py.data_import.status import DataImportService +from sift_py.rest import SiftRestConfig + +rest_config: SiftRestConfig = { + "uri": sift_uri, + "apikey": apikey, +} + +asset_name = "Your Asset Name" +csv_upload_service = CsvUploadService(rest_config) +import_service: DataImportService = csv_upload_service.simple_upload(asset_name, "sample_data.csv") + +# Blocks until the import is completed. +import_service.wait_until_complete() +``` + +This example assumes several things about how the data is formatted. For example, that first column +contains datetime formatted time stamps. See docstring for `simple_upload` to see what can be overridden. + +## TDMS Upload + +TDMS files can be uploaded like so: +```python +from sift_py.data_import.csv import TdmsUploadService +from sift_py.data_import.status import DataImportService +from sift_py.rest import SiftRestConfig + +rest_config: SiftRestConfig = { + "uri": sift_uri, + "apikey": apikey, +} + +asset_name = "Your Asset Name" +csv_upload_service = CsvUploadService(rest_config) +import_service: DataImportService = csv_upload_service.simple_upload(asset_name, "sample_data.tdms") + +# Blocks until the import is completed. +import_service.wait_until_complete() +``` + +If you want to upload TDMS groups as sift Components set `group_into_components` to True: +```python +csv_upload_service.simple_upload(asset_name, "sample_data.tdms", group_into_components=True) +``` + +Some times there are TDMS channels without valid data or timing information, you can skip these channels by +setting `ignore_errors` to True: +```python +csv_upload_service.simple_upload(asset_name, "sample_data.tdms", ignore_errors=True) +``` +The channels being skipped will be printed out to stdout. + +## CSV Upload with custom CSV config + +If your data is formatted a specific way you can create a CsvConfig that will be used to properly +parse your data: +```python +from sift_py.data_import.csv import CsvUploadService +from sift_py.data_import.status import DataImportService +from sift_py.rest import SiftRestConfig +from sift_py.data_import.config import CsvConfig + +rest_config: SiftRestConfig = { + "uri": sift_uri, + "apikey": apikey, +} + +csv_upload_service = CsvUploadService(rest_config) + +# Create CSV config. +input_csv = "sample_data.csv" + +# Parse CSV to get channel names. +data_config = {} +with open(input_csv, "r") as f: + reader = csv.DictReader(f) + headers = next(reader) + for i, channel in enumerate(headers): + if channel == "timestamp": + continue + data_config[i + 1] = { + "name": channel, + # This example assumes all channels are doubles. + # Can also use `ChannelDoubleType.DOUBLE` or `double` + "data_type": "CHANNEL_DATA_TYPE_DOUBLE", + "description": f"Example channel {channel}", + } + +csv_config = CsvConfig( + { + "asset_name": asset_name, + "first_data_row": 2, + "time_column": { + "format": "TIME_FORMAT_ABSOLUTE_DATETIME", + # Can also use `TimeFormatType.ABSOLUTE_DATETIME` + "column_number": 1, + }, + "data_columns": data_config, + } +) + +import_service: DataImportService = csv_upload_service.upload(input_csv, csv_config) +import_service.wait_until_complete() +``` + +In this example the CSV can be created programmatically. You can also import use a json file directly: +```python +import json +from sift_py.data_import.config import CsvConfig + +with open("config.json") as f: + csv_config = CsvConfig(json.load(f)) +``` +""" diff --git a/python/lib/sift_py/data_import/_config.py b/python/lib/sift_py/data_import/_config.py new file mode 100644 index 00000000..50d199bf --- /dev/null +++ b/python/lib/sift_py/data_import/_config.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +from typing import Dict, List, Optional, Type, Union + +from pydantic import BaseModel, ConfigDict, field_validator, model_validator +from pydantic_core import PydanticCustomError +from typing_extensions import Self + +from sift_py.data_import.time_format import TimeFormatType +from sift_py.ingestion.channel import ChannelBitFieldElement, ChannelDataType, ChannelEnumType + + +class ConfigBaseModel(BaseModel): + """ + Specialized BaseMode that forbids extra fields. + """ + + model_config = ConfigDict(extra="forbid") + + +class CsvConfigImpl(ConfigBaseModel): + """ + Defines the CSV config spec. + """ + + asset_name: str + run_name: str = "" + run_id: str = "" + first_data_row: int + time_column: TimeColumn + data_columns: Dict[int, DataColumn] + + @model_validator(mode="after") + def validate_config(self) -> Self: + if not self.data_columns: + raise PydanticCustomError("invalid_config_error", "Empty 'data_columns'") + return self + + +class EnumType(ConfigBaseModel, ChannelEnumType): + """ + Defines an enum entry in the CSV config. + """ + + +class BitFieldElement(ConfigBaseModel, ChannelBitFieldElement): + """ + Defines a bit field element entry in the CSV config. + """ + + +class TimeColumn(ConfigBaseModel): + """ + Defines a time column entry in the CSV config. + """ + + format: Union[str, TimeFormatType] + column_number: int + relative_start_time: Optional[str] = None + + @field_validator("format", mode="before") + @classmethod + def convert_format(cls, raw: Union[str, TimeFormatType]) -> str: + """ + Converts the provided format value to a string. + """ + if isinstance(raw, TimeFormatType): + return raw.as_human_str() + elif isinstance(raw, str): + value = TimeFormatType.from_str(raw) + if value is not None: + return value.as_human_str() + + raise PydanticCustomError("invalid_config_error", f"Invalid time format: {raw}.") + + @model_validator(mode="after") + def validate_time(self) -> Self: + """ + Validates the provided time format. + """ + format = TimeFormatType.from_str(self.format) # type: ignore + if format is None: + raise PydanticCustomError( + "invalid_config_error", f"Invalid time format: {self.format}." + ) + + if format.is_relative(): + if self.relative_start_time is None: + raise PydanticCustomError("invalid_config_error", "Missing 'relative_start_time'") + else: + if self.relative_start_time is not None: + raise PydanticCustomError( + "invalid_config_error", + "'relative_start_time' specified for non relative time format.", + ) + + return self + + +class DataColumn(ConfigBaseModel): + """ + Defines a data column entry in the CSV config. + """ + + name: str + data_type: Union[str, ChannelDataType, Type] + component: str = "" + units: str = "" + description: str = "" + # Only valid if data_type is "CHANNEL_DATA_TYPE_ENUM". + enum_types: List[EnumType] = [] + # Only valid if data_type is "CHANNEL_DATA_TYPE_BIT_FIELD" + bit_field_elements: List[BitFieldElement] = [] + + @field_validator("data_type", mode="before") + @classmethod + def convert_data_type(cls, raw: Union[str, ChannelDataType, Type]) -> str: + """ + Converts the provided data_type value to a string. + """ + if isinstance(raw, type): + if raw == int: + return ChannelDataType.INT_64.as_human_str(api_format=True) + elif raw == float: + return ChannelDataType.DOUBLE.as_human_str(api_format=True) + elif raw == str: + return ChannelDataType.STRING.as_human_str(api_format=True) + elif raw == bool: + return ChannelDataType.BOOL.as_human_str(api_format=True) + elif isinstance(raw, ChannelDataType): + return raw.as_human_str(api_format=True) + elif isinstance(raw, str): + value = ChannelDataType.from_str(raw) + if value is not None: + return value.as_human_str(api_format=True) + + raise PydanticCustomError("invalid_config_error", f"Invalid data_type: {raw}.") + + @model_validator(mode="after") + def validate_enums(self) -> Self: + """ + Validates the enum configuration. + """ + data_type = ChannelDataType.from_str(self.data_type) # type: ignore + if self.enum_types: + if data_type != ChannelDataType.ENUM: + raise PydanticCustomError( + "invalid_config_error", + f"Enums can only be specified with the CHANNEL_DATA_TYPE_ENUM data type. {self.name} is {self.data_type}", + ) + + return self + + @model_validator(mode="after") + def validate_bit_fields(self) -> Self: + """ + Validates the bit field configuration. + """ + data_type = ChannelDataType.from_str(self.data_type) # type: ignore + if self.bit_field_elements: + if data_type != ChannelDataType.BIT_FIELD: + raise PydanticCustomError( + "invalid_config_error", + f"Bit fields can only be specified with the CHANNEL_DATA_TYPE_BIT_FIELD data type. {self.name} is {self.data_type}", + ) + + return self diff --git a/python/lib/sift_py/data_import/_config_test.py b/python/lib/sift_py/data_import/_config_test.py new file mode 100644 index 00000000..ef6ebc7a --- /dev/null +++ b/python/lib/sift_py/data_import/_config_test.py @@ -0,0 +1,166 @@ +import pytest + +from sift_py.data_import.config import CsvConfig +from sift_py.data_import.time_format import TimeFormatType +from sift_py.ingestion.channel import ChannelDataType + + +@pytest.fixture +def csv_config_data(): + return { + "asset_name": "test_asset", + "first_data_row": 2, + "time_column": { + "format": "TIME_FORMAT_ABSOLUTE_DATETIME", + "column_number": 1, + }, + "data_columns": { + 1: { + "name": "channel", + "data_type": "CHANNEL_DATA_TYPE_INT_32", + } + }, + } + + +def test_empty_data_columns(csv_config_data: dict): + csv_config_data["data_columns"] = {} + with pytest.raises(Exception, match="Empty 'data_columns'"): + CsvConfig(csv_config_data) + + +def test_data_column_validation(csv_config_data: dict): + csv_config_data["data_columns"] = { + 1: { + "name": "channel", + "data_type": "INVALID_DATA_TYPE", + } + } + with pytest.raises(Exception, match="Invalid data_type:"): + CsvConfig(csv_config_data) + + csv_config_data["data_columns"] = {1: {"name": "channel", "data_type": complex}} + with pytest.raises(Exception, match="Invalid data_type:"): + CsvConfig(csv_config_data) + + csv_config_data["data_columns"] = { + 1: {"name": "channel_bool", "data_type": ChannelDataType.BOOL}, + 2: {"name": "channel_double", "data_type": ChannelDataType.DOUBLE}, + 3: {"name": "channel_int", "data_type": ChannelDataType.INT_64}, + 4: {"name": "channel_str", "data_type": ChannelDataType.STRING}, + } + CsvConfig(csv_config_data) + + +def test_enums(csv_config_data: dict): + csv_config_data["data_columns"] = { + 1: { + "name": "channel", + "data_type": "CHANNEL_DATA_TYPE_INT_32", + "enum_types": [ + {"key": 1, "name": "value_1"}, + {"key": 2, "name": "value_2"}, + ], + } + } + with pytest.raises(Exception, match="Enums can only be specified"): + CsvConfig(csv_config_data) + + csv_config_data["data_columns"] = { + 1: { + "name": "channel", + "data_type": "CHANNEL_DATA_TYPE_ENUM", + "enum_types": [ + {"key": 1, "name": "value_1", "extra_key": "value"}, + {"key": 2, "name": "value_2"}, + ], + } + } + with pytest.raises(Exception, match="validation error"): + CsvConfig(csv_config_data) + + csv_config_data["data_columns"] = { + 1: { + "name": "channel", + "data_type": "CHANNEL_DATA_TYPE_ENUM", + "enum_types": [ + {"key": 1, "name": "value_1"}, + {"key": 2, "name": "value_2"}, + ], + } + } + CsvConfig(csv_config_data) + + +def test_bit_field(csv_config_data: dict): + csv_config_data["data_columns"] = { + 1: { + "name": "channel", + "data_type": "CHANNEL_DATA_TYPE_INT_32", + "bit_field_elements": [ + {"index": 1, "name": "bit_field_name_1", "bit_count": 4}, + ], + } + } + with pytest.raises(Exception, match="Bit fields can only be specified"): + CsvConfig(csv_config_data) + + csv_config_data["data_columns"] = { + 1: { + "name": "channel", + "data_type": "CHANNEL_DATA_TYPE_INT_32", + "bit_field_elements": [ + { + "index": 1, + "name": "bit_field_name_1", + "bit_count": 4, + "extra_key": "value", + }, + ], + } + } + with pytest.raises(Exception, match="validation error"): + CsvConfig(csv_config_data) + + csv_config_data["data_columns"] = { + 1: { + "name": "channel", + "data_type": "CHANNEL_DATA_TYPE_BIT_FIELD", + "bit_field_elements": [ + {"index": 1, "name": "bit_field_name_1", "bit_count": 4}, + ], + } + } + CsvConfig(csv_config_data) + + +def test_time_column(csv_config_data: dict): + csv_config_data["time_column"] = { + "format": "INVALID_TIME_FORMAT", + "column_number": 1, + } + with pytest.raises(Exception, match="Invalid time format"): + CsvConfig(csv_config_data) + + csv_config_data["time_column"] = { + "format": "TIME_FORMAT_RELATIVE_SECONDS", + "column_number": 1, + } + with pytest.raises(Exception, match="Missing 'relative_start_time'"): + CsvConfig(csv_config_data) + + csv_config_data["time_column"] = { + "format": "TIME_FORMAT_ABSOLUTE_UNIX_SECONDS", + "column_number": 1, + "relative_start_time": "100", + } + with pytest.raises( + Exception, match="'relative_start_time' specified for non relative time format." + ): + CsvConfig(csv_config_data) + + csv_config_data["time_column"] = { + "format": TimeFormatType.ABSOLUTE_DATETIME, + "column_number": 1, + } + CsvConfig(csv_config_data) diff --git a/python/lib/sift_py/data_import/_csv_test.py b/python/lib/sift_py/data_import/_csv_test.py new file mode 100644 index 00000000..8dbed288 --- /dev/null +++ b/python/lib/sift_py/data_import/_csv_test.py @@ -0,0 +1,261 @@ +import json + +import pandas as pd +import pytest +from pytest_mock import MockFixture + +from sift_py.data_import.config import CsvConfig +from sift_py.data_import.csv import CsvUploadService +from sift_py.rest import SiftRestConfig + + +class MockResponse: + status_code: int + text: str + + def __init__(self, status_code: int, text: str): + self.status_code = status_code + self.text = text + + def json(self) -> dict: + return json.loads(self.text) + + +csv_config = CsvConfig( + { + "asset_name": "test_asset", + "first_data_row": 2, + "time_column": { + "format": "TIME_FORMAT_ABSOLUTE_DATETIME", + "column_number": 1, + }, + "data_columns": { + 2: { + "name": "channel_1", + "data_type": "CHANNEL_DATA_TYPE_DOUBLE", + } + }, + } +) + +rest_config: SiftRestConfig = { + "uri": "some_uri.com", + "apikey": "123123123", +} + + +def test_csv_upload_service_upload_validate_path(mocker: MockFixture): + mock_path_is_file = mocker.patch("sift_py.data_import.csv.Path.is_file") + mock_path_is_file.return_value = False + + with pytest.raises(Exception, match="does not point to a regular file"): + svc = CsvUploadService(rest_config) + svc.upload( + path="some_csv.csv", + csv_config=csv_config, + ) + + +def test_csv_upload_service_upload_validate_mime_type(mocker: MockFixture): + mock_path_is_file = mocker.patch("sift_py.data_import.csv.Path.is_file") + mock_path_is_file.return_value = True + + with pytest.raises(Exception, match="MIME"): + svc = CsvUploadService(rest_config) + svc.upload( + path="some_csv.asdfghjkl", + csv_config=csv_config, + ) + + with pytest.raises(Exception, match="Must be text or csv"): + svc = CsvUploadService(rest_config) + svc.upload( + path="some_file.pdf", + csv_config=csv_config, + ) + + +def test_csv_upload_service_invalid_config_response(mocker: MockFixture): + mock_path_is_file = mocker.patch("sift_py.data_import.csv.Path.is_file") + mock_path_is_file.return_value = True + + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.return_value = MockResponse(status_code=400, text="Invalid request") + with pytest.raises(Exception, match="Config file upload request failed"): + svc = CsvUploadService(rest_config) + + svc.upload( + path="some_csv.csv", + csv_config=csv_config, + ) + + +def test_csv_upload_service_invalid_data_response(mocker: MockFixture): + mock_path_is_file = mocker.patch("sift_py.data_import.csv.Path.is_file") + mock_path_is_file.return_value = True + + mocker.patch( + "sift_py.data_import.csv.open", + mocker.mock_open(), + ) + + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.return_value = MockResponse(status_code=200, text="asdgasdg") + + with pytest.raises(Exception, match="Invalid response"): + svc = CsvUploadService(rest_config) + + svc.upload( + path="some_csv.csv", + csv_config=csv_config, + ) + + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.return_value = MockResponse(status_code=200, text="{}") + + with pytest.raises(Exception, match="Response missing required keys"): + svc = CsvUploadService(rest_config) + + svc.upload( + path="some_csv.csv", + csv_config=csv_config, + ) + + mock_requests_post.side_effect = [ + MockResponse( + status_code=200, + text=json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"}), + ), + MockResponse(status_code=400, text="Invalid request"), + ] + + with pytest.raises(Exception, match="Data file upload request failed"): + svc = CsvUploadService(rest_config) + + svc.upload( + path="some_csv.csv", + csv_config=csv_config, + ) + + +def test_csv_upload_service_success(mocker: MockFixture): + mock_path_is_file = mocker.patch("sift_py.data_import.csv.Path.is_file") + mock_path_is_file.return_value = True + + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.side_effect = [ + MockResponse( + status_code=200, + text=json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"}), + ), + MockResponse(status_code=200, text=""), + ] + + mocker.patch( + "sift_py.data_import.csv.open", + mocker.mock_open(), + ) + svc = CsvUploadService( + { + "uri": "some_uri.com", + "apikey": "123123123", + }, + ) + + svc.upload( + path="some_csv.csv", + csv_config=csv_config, + ) + + +def test_csv_upload_service_upload_validate_url(mocker: MockFixture): + with pytest.raises(Exception, match="Invalid URL scheme:"): + svc = CsvUploadService(rest_config) + + svc.upload_from_url( + url="asdf://some_url.com/file.csv", + csv_config=csv_config, + ) + + +def test_csv_upload_service_upload_from_url_invalid_config(mocker: MockFixture): + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.return_value = MockResponse(status_code=400, text="Invalid request") + with pytest.raises(Exception, match="URL upload request failed"): + svc = CsvUploadService(rest_config) + + svc.upload_from_url( + url="http://some_url.com/file.csv", + csv_config=csv_config, + ) + + +def test_csv_upload_service_upload_from_url_success(mocker: MockFixture): + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.return_value = MockResponse( + status_code=200, + text=json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"}), + ) + svc = CsvUploadService( + { + "uri": "some_uri.com", + "apikey": "123123123", + }, + ) + + svc.upload_from_url( + url="http://some_url.com/file.csv", + csv_config=csv_config, + ) + + +def test_simple_upload_invalid_csv(mocker: MockFixture): + mock_path_is_file = mocker.patch("sift_py.data_import.csv.Path.is_file") + mock_path_is_file.return_value = True + + mock_read_csv = mocker.patch("sift_py.data_import.csv.pd.read_csv") + mock_read_csv.return_value = pd.DataFrame( + { + "time": [1, 2, 3], + "channel_1": [1, 1.0, True], + } + ) + with pytest.raises(Exception, match="Unable to upload.*Inferred type: mixed-integer"): + svc = CsvUploadService(rest_config) + svc.simple_upload("test_asset", "sample.csv") + + mock_read_csv = mocker.patch("sift_py.data_import.csv.pd.read_csv") + mock_read_csv.return_value = pd.DataFrame( + { + "time": [1, 2, 3], + "channel_1": [complex(1), complex(1), complex(1)], + } + ) + with pytest.raises(Exception, match="Unable to upload.*Inferred type: complex"): + svc = CsvUploadService(rest_config) + svc.simple_upload("test_asset", "sample.csv") + + mock_read_csv = mocker.patch("sift_py.data_import.csv.pd.read_csv") + mock_read_csv.return_value = pd.DataFrame( + { + "time": [1, 2, 3], + "channel_bool": [True, True, False], + "channel_int": [-1, 2, 0], + "channel_double": [1.0, 2.0, -3.3], + "channel_string": ["a", "b", "c"], + } + ) + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.side_effect = [ + MockResponse( + status_code=200, + text=json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"}), + ), + MockResponse(status_code=200, text=""), + ] + mocker.patch( + "sift_py.data_import.csv.open", + mocker.mock_open(), + ) + svc = CsvUploadService(rest_config) + svc.simple_upload("test_asset", "sample.csv") diff --git a/python/lib/sift_py/data_import/_status_test.py b/python/lib/sift_py/data_import/_status_test.py new file mode 100644 index 00000000..104c6528 --- /dev/null +++ b/python/lib/sift_py/data_import/_status_test.py @@ -0,0 +1,176 @@ +import json +from copy import deepcopy + +import pytest +from pytest_mock import MockFixture + +from sift_py.data_import.status import DataImportService, DataImportStatusType +from sift_py.rest import SiftRestConfig + +rest_config: SiftRestConfig = { + "uri": "some_uri.com", + "apikey": "123123123", +} + + +@pytest.fixture +def data_import_data(): + return { + "dataImport": { + "dataImportId": "random-data-import-id", + "createdDate": "2024-10-07T18:37:00.146649Z", + "modifiedDate": "2024-10-07T18:37:00.146649Z", + "sourceUrl": "", + "status": "", + "errorMessage": "", + "csvConfig": {}, + } + } + + +class MockResponse: + status_code: int + text: str + + def __init__(self, status_code: int, text: str): + self.status_code = status_code + self.text = text + + def json(self): + return json.loads(self.text) + + def raise_for_status(self): + if self.status_code != 200: + raise Exception("Invalid status") + + +def test_get_status(mocker: MockFixture, data_import_data: dict): + mock_requests_post = mocker.patch("sift_py.data_import.status.requests.get") + data_import_data["dataImport"]["status"] = "DATA_IMPORT_STATUS_SUCCEEDED" + mock_requests_post.return_value = MockResponse( + status_code=200, text=json.dumps(data_import_data) + ) + service = DataImportService(rest_config, "123-123-123") + assert service.get_data_import().status == DataImportStatusType.SUCCEEDED + + data_import_data["dataImport"]["status"] = "DATA_IMPORT_STATUS_PENDING" + mock_requests_post.return_value = MockResponse( + status_code=200, text=json.dumps(data_import_data) + ) + service = DataImportService(rest_config, "123-123-123") + assert service.get_data_import().status == DataImportStatusType.PENDING + + data_import_data["dataImport"]["status"] = "DATA_IMPORT_STATUS_IN_PROGRESS" + mock_requests_post.return_value = MockResponse( + status_code=200, text=json.dumps(data_import_data) + ) + service = DataImportService(rest_config, "123-123-123") + assert service.get_data_import().status == DataImportStatusType.IN_PROGRESS + + data_import_data["dataImport"]["status"] = "DATA_IMPORT_STATUS_FAILED" + mock_requests_post.return_value = MockResponse( + status_code=200, text=json.dumps(data_import_data) + ) + service = DataImportService(rest_config, "123-123-123") + assert service.get_data_import().status == DataImportStatusType.FAILED + + data_import_data["dataImport"]["status"] = "INVALID_STATUS" + with pytest.raises(Exception, match="Invalid data import status"): + mock_requests_post.return_value = MockResponse( + status_code=200, text=json.dumps(data_import_data) + ) + service = DataImportService(rest_config, "123-123-123") + service.get_data_import() + + +def test_wait_success(mocker: MockFixture, data_import_data: dict): + mock_time_sleep = mocker.patch("sift_py.data_import.status.time.sleep") + mock_requests_get = mocker.patch("sift_py.data_import.status.requests.get") + + succeeded = deepcopy(data_import_data) + succeeded["dataImport"]["status"] = "DATA_IMPORT_STATUS_SUCCEEDED" + + pending = deepcopy(data_import_data) + pending["dataImport"]["status"] = "DATA_IMPORT_STATUS_PENDING" + + in_progress = deepcopy(data_import_data) + in_progress["dataImport"]["status"] = "DATA_IMPORT_STATUS_IN_PROGRESS" + + mock_requests_get.side_effect = [ + MockResponse( + status_code=200, + text=json.dumps(pending), + ), + MockResponse( + status_code=200, + text=json.dumps(in_progress), + ), + MockResponse( + status_code=200, + text=json.dumps(succeeded), + ), + ] + + service = DataImportService(rest_config, "123-123-123") + assert service.wait_until_complete().status == DataImportStatusType.SUCCEEDED + mock_time_sleep.assert_any_call(1) + mock_time_sleep.assert_any_call(2) + + +def test_wait_failure(mocker: MockFixture, data_import_data: dict): + mock_requests_get = mocker.patch("sift_py.data_import.status.requests.get") + + failed = deepcopy(data_import_data) + failed["dataImport"]["status"] = "DATA_IMPORT_STATUS_FAILED" + + pending = deepcopy(data_import_data) + pending["dataImport"]["status"] = "DATA_IMPORT_STATUS_PENDING" + + in_progress = deepcopy(data_import_data) + in_progress["dataImport"]["status"] = "DATA_IMPORT_STATUS_IN_PROGRESS" + + mock_requests_get.side_effect = [ + MockResponse( + status_code=200, + text=json.dumps(pending), + ), + MockResponse( + status_code=200, + text=json.dumps(in_progress), + ), + MockResponse( + status_code=200, + text=json.dumps(failed), + ), + ] + + service = DataImportService(rest_config, "123-123-123") + assert service.wait_until_complete().status == DataImportStatusType.FAILED + + +def test_wait_max_polling_interval(mocker: MockFixture, data_import_data: dict): + mock_time_sleep = mocker.patch("sift_py.data_import.status.time.sleep") + mock_requests_get = mocker.patch("sift_py.data_import.status.requests.get") + + succeeded = deepcopy(data_import_data) + succeeded["dataImport"]["status"] = "DATA_IMPORT_STATUS_SUCCEEDED" + + in_progress = deepcopy(data_import_data) + in_progress["dataImport"]["status"] = "DATA_IMPORT_STATUS_IN_PROGRESS" + + mock_requests_get.side_effect = [ + MockResponse( + status_code=200, + text=json.dumps(in_progress), + ) + for _ in range(60) + ] + [ + MockResponse( + status_code=200, + text=json.dumps(succeeded), + ) + ] + + service = DataImportService(rest_config, "123-123-123") + assert service.wait_until_complete().status == DataImportStatusType.SUCCEEDED + mock_time_sleep.assert_called_with(60) diff --git a/python/lib/sift_py/data_import/_tdms_test.py b/python/lib/sift_py/data_import/_tdms_test.py new file mode 100644 index 00000000..59cb411e --- /dev/null +++ b/python/lib/sift_py/data_import/_tdms_test.py @@ -0,0 +1,238 @@ +import json +from typing import Any, Dict, List, Optional + +import pandas as pd +import pytest +from nptdms import TdmsFile, types # type: ignore +from pytest_mock import MockFixture + +from sift_py.data_import.tdms import TdmsUploadService +from sift_py.rest import SiftRestConfig + + +class MockTdmsChannel: + def __init__( + self, + name: str, + group_name: str, + properties: Optional[Dict[str, str]] = None, + data: Optional[List[int]] = None, + data_type: type = types.Int32, + ): + self.name: str = name + self.group_name: str = group_name + self.properties: Optional[Dict[str, str]] = properties or {} + self.data: Optional[List[int]] = data or [] + self.data_type: type = data_type + + +class MockTdmsGroup: + def __init__(self, name, channels: List[MockTdmsChannel]): + self.name: str = name + self.path: str = f"/'{name}'" + self._channels: List[MockTdmsChannel] = channels + self.properties: Optional[Dict[str, str]] = {} + + def channels(self) -> List[MockTdmsChannel]: + return self._channels + + +class MockTdmsFile: + def __init__(self, groups: List[MockTdmsGroup]): + self._groups: List[MockTdmsGroup] = groups + self.properties: Dict[str, str] = {} + + def groups(self) -> List[MockTdmsGroup]: + return self._groups + + def as_dataframe(self, *_, **__): + return pd.DataFrame() + + +class MockResponse: + def __init__(self): + self.status_code = 200 + self.text = json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"}) + + def json(self) -> dict: + return json.loads(self.text) + + +@pytest.fixture +def mock_tdms_file(): + mock_tdms_groups = [ + MockTdmsGroup( + name=f"Group {g}", + channels=[ + MockTdmsChannel( + name=f"Test/channel_{c}", + group_name=f"Group {g}", + data=[1, 2, 3], + properties={ + "wf_start_time": 0, + "wf_increment": 0.1, + "wf_start_offset": 0, + "extra": "info", + }, + ) + for c in range(5) + ], + ) + for g in range(5) + ] + + return MockTdmsFile(mock_tdms_groups) + + +rest_config: SiftRestConfig = { + "uri": "some_uri.com", + "apikey": "123123123", +} + + +def test_tdms_upload_service_upload_validate_path(mocker: MockFixture): + mock_path_is_file = mocker.patch("sift_py.data_import.tdms.Path.is_file") + mock_path_is_file.return_value = False + + with pytest.raises(Exception, match="does not point to a regular file"): + svc = TdmsUploadService(rest_config) + svc.upload("some_data.tdms", "asset_name") + + +def test_tdms_upload_success(mocker: MockFixture, mock_tdms_file: MockTdmsFile): + mock_path_is_file = mocker.patch("sift_py.data_import.tdms.Path.is_file") + mock_path_is_file.return_value = True + + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.return_value = MockResponse() + + def mock_tdms_file_constructor(path): + """The first call should always return the mocked object since + it is mocking a call to open the orignal tdms file. + + The second call should return a real TdmsFile since the unit + test will actually create one with filtered channels. + """ + print(path) + if path == "some_tdms.tdms": + return mock_tdms_file + else: + return TdmsFile(path) + + mocker.patch("sift_py.data_import.tdms.TdmsFile", mock_tdms_file_constructor) + + svc = TdmsUploadService(rest_config) + + def get_csv_config(mock, n): + """Return the CSV config that was created and uploaded under the hood.""" + return json.loads(mock_requests_post.call_args_list[n].kwargs["data"])["csv_config"] + + # Test without grouping + svc.upload("some_tdms.tdms", "asset_name") + config = get_csv_config(mock_requests_post, 0) + expected_config: Dict[str, Any] = { + "asset_name": "asset_name", + "run_name": "", + "run_id": "", + "first_data_row": 2, + "time_column": { + "format": "TIME_FORMAT_ABSOLUTE_DATETIME", + "column_number": 1, + "relative_start_time": None, + }, + "data_columns": {}, + } + for i in range(5): + for j in range(5): + expected_config["data_columns"][str(2 + (i * 5) + j)] = { + "name": f"Test channel_{j}", + "data_type": "CHANNEL_DATA_TYPE_INT_32", + "component": "", + "units": "", + "description": "None\nwf_start_time: 0\nwf_increment: 0.1\nwf_start_offset: 0\nextra: info\n", + "enum_types": [], + "bit_field_elements": [], + } + assert config == expected_config + + # Test with grouping + svc.upload("some_tdms.tdms", "asset_name", group_into_components=True) + config = get_csv_config(mock_requests_post, 2) + for i in range(5): + for j in range(5): + expected_config["data_columns"][str(2 + (i * 5) + j)]["component"] = f"Group {i}" + assert config == expected_config + + # Test with run information + svc.upload( + "some_tdms.tdms", + "asset_name", + group_into_components=True, + run_name="Run Name", + run_id="Run ID", + ) + expected_config["run_name"] = "Run Name" + expected_config["run_id"] = "Run ID" + config = get_csv_config(mock_requests_post, 4) + assert config == expected_config + + +def test_tdms_upload_ignore_errors(mocker: MockFixture): + mock_path_is_file = mocker.patch("sift_py.data_import.tdms.Path.is_file") + mock_path_is_file.return_value = True + + mocker.patch("sift_py.data_import.tdms.TdmsWriter") + + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.return_value = MockResponse() + + # Start with all invalid channels + mock_tdms_groups = [ + MockTdmsGroup( + name=f"Group {g}", + channels=[MockTdmsChannel(f"channel_{c}", f"Group {g}") for c in range(5)], + ) + for g in range(5) + ] + mock_tdms_file = MockTdmsFile(mock_tdms_groups) + + def mock_tdms_file_constructor(path): + """The first call should always return the mocked object since + it is mocking a call to open the orignal tdms file. + + The second call should return a real TdmsFile since the unit + test will actually create one with filtered channels. + """ + print(path) + if path == "some_tdms.tdms": + return mock_tdms_file + else: + return TdmsFile(path) + + mocker.patch("sift_py.data_import.tdms.TdmsFile", mock_tdms_file_constructor) + + svc = TdmsUploadService(rest_config) + + with pytest.raises(Exception, match="does not contain timing information"): + svc.upload("some_tdms.tdms", "asset_name") + + with pytest.raises(Exception, match="No valid channels remaining"): + svc.upload("some_tdms.tdms", "asset_name", ignore_errors=True) + + +def test_tdms_upload_unknown_data_type(mocker: MockFixture, mock_tdms_file: MockTdmsFile): + mock_path_is_file = mocker.patch("sift_py.data_import.tdms.Path.is_file") + mock_path_is_file.return_value = True + + mocker.patch("sift_py.data_import.tdms.TdmsWriter") + + mock_requests_post = mocker.patch("sift_py.data_import.csv.requests.post") + mock_requests_post.return_value = MockResponse() + + mock_tdms_file.groups()[0].channels()[0].data_type = types.ComplexDoubleFloat + mocker.patch("sift_py.data_import.tdms.TdmsFile").return_value = mock_tdms_file + + svc = TdmsUploadService(rest_config) + + with pytest.raises(Exception, match="data type not supported"): + svc.upload("some_tdms.tdms", "asset_name") diff --git a/python/lib/sift_py/data_import/config.py b/python/lib/sift_py/data_import/config.py new file mode 100644 index 00000000..d41354c7 --- /dev/null +++ b/python/lib/sift_py/data_import/config.py @@ -0,0 +1,19 @@ +from typing import Any, Dict + +from sift_py.data_import._config import CsvConfigImpl + + +class CsvConfig: + """ + Defines the CSV config for data imports. + """ + + def __init__(self, config_info: Dict[str, Any]): + self._config_info = config_info + self._csv_config = CsvConfigImpl(**self._config_info) + + def to_json(self) -> str: + return self._csv_config.model_dump_json() + + def to_dict(self) -> Dict[str, Any]: + return self._csv_config.model_dump() diff --git a/python/lib/sift_py/data_import/csv.py b/python/lib/sift_py/data_import/csv.py new file mode 100644 index 00000000..2f460ebe --- /dev/null +++ b/python/lib/sift_py/data_import/csv.py @@ -0,0 +1,216 @@ +import json +import mimetypes +from pathlib import Path +from typing import Optional, Tuple, Union +from urllib.parse import urljoin, urlparse + +import pandas as pd +import requests + +from sift_py.data_import.config import CsvConfig +from sift_py.data_import.status import DataImportService +from sift_py.data_import.time_format import TimeFormatType +from sift_py.rest import SiftRestConfig, compute_uri + + +class CsvUploadService: + UPLOAD_PATH = "/api/v1/data-imports:upload" + URL_PATH = "/api/v1/data-imports:url" + + _rest_conf: SiftRestConfig + _upload_uri: str + _url_uri: str + _apikey: str + + def __init__(self, rest_conf: SiftRestConfig): + self._rest_conf = rest_conf + base_uri = compute_uri(rest_conf) + self._apikey = rest_conf["apikey"] + self._upload_uri = urljoin(base_uri, self.UPLOAD_PATH) + self._url_uri = urljoin(base_uri, self.URL_PATH) + + def upload( + self, + path: Union[str, Path], + csv_config: CsvConfig, + ) -> DataImportService: + """ + Uploads the CSV file pointed to by `path` using a custom CSV config. + """ + content_encoding = self._validate_file_type(path) + + response = requests.post( + url=self._upload_uri, + headers={ + "Authorization": f"Bearer {self._apikey}", + "Content-Encoding": "application/octet-stream", + }, + data=json.dumps({"csv_config": csv_config.to_dict()}), + ) + + if response.status_code != 200: + raise Exception( + f"Config file upload request failed with status code {response.status_code}. {response.text}" + ) + + try: + upload_info = response.json() + except (json.decoder.JSONDecodeError, KeyError): + raise Exception(f"Invalid response: {response.text}") + + try: + upload_url: str = upload_info["uploadUrl"] + data_import_id: str = upload_info["dataImportId"] + except KeyError as e: + raise Exception(f"Response missing required keys: {e}") + + with open(path, "rb") as f: + headers = { + "Authorization": f"Bearer {self._apikey}", + "Content-Encoding": content_encoding, + } + + response = requests.post( + url=upload_url, + headers=headers, + data=f, + ) + + if response.status_code != 200: + raise Exception( + f"Data file upload request failed with status code {response.status_code}. {response.text}" + ) + + return DataImportService(self._rest_conf, data_import_id) + + def upload_from_url( + self, + url: str, + csv_config: CsvConfig, + ) -> DataImportService: + """ + Uploads the CSV file pointed to by `url` using a custom CSV config. + """ + parsed_url = urlparse(url) + if parsed_url.scheme not in ["s3", "http", "https"]: + raise Exception( + f"Invalid URL scheme: '{parsed_url.scheme}'. Only S3 and HTTP(S) URLs are supported." + ) + + headers = {"Authorization": f"Bearer {self._apikey}"} + + response = requests.post( + url=self._url_uri, + headers=headers, + data=json.dumps( + ( + { + "url": url, + "csv_config": csv_config.to_dict(), + } + ) + ), + ) + + if response.status_code != 200: + raise Exception( + f"URL upload request failed with status code {response.status_code}. {response.text}" + ) + + try: + upload_info = response.json() + except (json.decoder.JSONDecodeError, KeyError) as e: + raise Exception(f"Invalid response: {e}") + + try: + data_import_id: str = upload_info["dataImportId"] + except KeyError as e: + raise Exception(f"Response missing required keys: {e}") + + return DataImportService(self._rest_conf, data_import_id) + + def simple_upload( + self, + asset_name: str, + path: Union[str, Path], + first_data_row: int = 2, + time_column: int = 1, + time_format: TimeFormatType = TimeFormatType.ABSOLUTE_DATETIME, + run_name: Optional[str] = None, + run_id: Optional[str] = None, + ) -> DataImportService: + """ + Uploads the CSV file pointed to by `path` to the specified asset. This function will + infer the data types and assume certain things about how the data is formatted. See the options + below for what parameters can be overridden. Use `upload` if you need to specify a custom CSV config. + + Override `first_data_row` to specify which is the first row with data. Default is 2. + Override `time_column` to specify which column contains timestamp information. Default is 1. + Override `time_format` to specify the time data format. Default is `TimeFormatType.ABSOLUTE_DATETIME`. + Override `run_name` to specify the name of the run to create for this data. Default is None. + Override `run_id` to specify the id of the run to add this data to. Default is None. + """ + self._validate_file_type(path) + + types = { + "integer": int, + "string": str, + "floating": float, + "boolean": bool, + } + data_config = {} + df = pd.read_csv(path) + for i, header in enumerate(df.columns): + if i + 1 == time_column: + continue + + inferred_dtype = pd.api.types.infer_dtype(df[df.columns[i]], skipna=False) + dtype = types.get(inferred_dtype) + if dtype is None: + raise Exception( + f"Unable to upload data type in column {i+1} {header}. Inferred type: {inferred_dtype}" + ) + + data_config[i + 1] = {"name": header, "data_type": dtype} + + config_info = { + "asset_name": asset_name, + "first_data_row": first_data_row, + "time_column": { + "format": time_format, + "column_number": time_column, + }, + "data_columns": data_config, + } + + if run_name is not None: + config_info["run_name"] = run_name + + if run_id is not None: + config_info["run_id"] = run_name + + csv_config = CsvConfig(config_info) + + return self.upload(path, csv_config) + + def _validate_file_type(self, path: Union[str, Path]) -> Optional[str]: + posix_path = Path(path) if isinstance(path, str) else path + + if not posix_path.is_file(): + raise Exception(f"Provided path, '{path}', does not point to a regular file.") + + _, mimetype, content_encoding = self.__class__._mime_and_content_type_from_path(posix_path) + + if not mimetype: + raise Exception(f"The MIME-type of '{posix_path}' could not be computed.") + + if mimetype not in ["test/plain", "text/csv"]: + raise Exception(f"{path} is not a valid file type. Must be text or csv.") + + return content_encoding + + @staticmethod + def _mime_and_content_type_from_path(path: Path) -> Tuple[str, Optional[str], Optional[str]]: + file_name = path.name + mime, encoding = mimetypes.guess_type(path) + return file_name, mime, encoding diff --git a/python/lib/sift_py/data_import/status.py b/python/lib/sift_py/data_import/status.py new file mode 100644 index 00000000..34fab72c --- /dev/null +++ b/python/lib/sift_py/data_import/status.py @@ -0,0 +1,113 @@ +import time +from datetime import datetime +from enum import Enum +from typing import Optional, Union +from urllib.parse import urljoin + +import requests +from pydantic import BaseModel, ConfigDict, field_validator +from pydantic.alias_generators import to_camel +from pydantic_core import PydanticCustomError +from typing_extensions import Self + +from sift_py.rest import SiftRestConfig, compute_uri + + +class DataImportStatusType(Enum): + """Status of the data import.""" + + SUCCEEDED = "DATA_IMPORT_STATUS_SUCCEEDED" + PENDING = "DATA_IMPORT_STATUS_PENDING" + IN_PROGRESS = "DATA_IMPORT_STATUS_IN_PROGRESS" + FAILED = "DATA_IMPORT_STATUS_FAILED" + + @classmethod + def from_str(cls, val: str) -> Optional[Self]: + try: + return cls(val) + except ValueError: + return None + + def as_human_str(self) -> str: + return self.value + + +class DataImport(BaseModel): + """Metadata regarding the data import.""" + + model_config = ConfigDict(extra="forbid", alias_generator=to_camel, populate_by_name=True) + + data_import_id: str + created_date: datetime + modified_date: datetime + source_url: str = "" + status: Union[str, DataImportStatusType] + error_message: str = "" + csv_config: dict + + @field_validator("status", mode="before") + @classmethod + def convert_status(cls, raw: Union[str, DataImportStatusType]) -> DataImportStatusType: + if isinstance(raw, DataImportStatusType): + return raw + elif isinstance(raw, str): + value = DataImportStatusType.from_str(raw) + if value is not None: + return value + + raise PydanticCustomError( + "invalid_data_import_error", f"Invalid data import status: {raw}." + ) + + +class DataImportService: + """ + Service used to retrive information about a particular data import. + """ + + STATUS_PATH = "/api/v1/data-imports" + _data_import_id: str + + def __init__(self, restconf: SiftRestConfig, data_import_id: str): + base_uri = compute_uri(restconf) + self._data_import_id = data_import_id + self._status_uri = urljoin(base_uri, self.STATUS_PATH) + self._apikey = restconf["apikey"] + + def get_data_import(self) -> DataImport: + """ + Returns information about the data import. + """ + response = requests.get( + url=f"{self._status_uri}/{self._data_import_id}", + headers={"Authorization": f"Bearer {self._apikey}"}, + ) + response.raise_for_status() + + data = response.json().get("dataImport") + data_import = DataImport(**data) + return data_import + + def wait_until_complete(self) -> DataImport: + """ + Blocks until the data import is completed. Check the status to determine + if the import was successful or not. + """ + polling_interval = 1 + while True: + data_import = self.get_data_import() + status: DataImportStatusType = data_import.status # type: ignore + if status in [ + DataImportStatusType.SUCCEEDED, + DataImportStatusType.FAILED, + ]: + return data_import + elif status in [ + DataImportStatusType.PENDING, + DataImportStatusType.IN_PROGRESS, + ]: + pass + else: + raise Exception(f"Unknown status: {status}") + time.sleep(polling_interval) + polling_interval = min(polling_interval * 2, 60) diff --git a/python/lib/sift_py/data_import/tdms.py b/python/lib/sift_py/data_import/tdms.py new file mode 100644 index 00000000..7ab3a089 --- /dev/null +++ b/python/lib/sift_py/data_import/tdms.py @@ -0,0 +1,200 @@ +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import Dict, List, Optional, Union + +from nptdms import ( # type: ignore + ChannelObject, + RootObject, + TdmsChannel, + TdmsFile, + TdmsWriter, + types, +) + +from sift_py.data_import._config import DataColumn, TimeColumn +from sift_py.data_import.config import CsvConfig +from sift_py.data_import.csv import CsvUploadService +from sift_py.data_import.status import DataImportService +from sift_py.data_import.time_format import TimeFormatType +from sift_py.ingestion.channel import ChannelDataType +from sift_py.rest import SiftRestConfig + +TDMS_TO_SIFT_TYPES = { + types.Boolean: ChannelDataType.BOOL, + types.Int8: ChannelDataType.INT_32, + types.Int16: ChannelDataType.INT_32, + types.Int32: ChannelDataType.INT_32, + types.Int64: ChannelDataType.INT_64, + types.Uint8: ChannelDataType.UINT_32, + types.Uint16: ChannelDataType.UINT_32, + types.Uint32: ChannelDataType.UINT_32, + types.Uint64: ChannelDataType.UINT_64, + types.SingleFloat: ChannelDataType.FLOAT, + types.DoubleFloat: ChannelDataType.DOUBLE, +} + + +class TdmsUploadService: + """ + Service to upload TDMS files. + """ + + _csv_upload_service: CsvUploadService + + def __init__(self, rest_conf: SiftRestConfig): + self._csv_upload_service = CsvUploadService(rest_conf) + + def upload( + self, + path: Union[str, Path], + asset_name: str, + group_into_components: bool = False, + ignore_errors: bool = False, + run_name: Optional[str] = None, + run_id: Optional[str] = None, + ) -> DataImportService: + """ + Uploads the TDMS file pointed to by `path` to the specified asset. + + Set `group_into_components` to True if you want to upload the TDMS groups as + a Sift Component. + + If `ignore_errors` is True will skip channels without timing information. + + Override `run_name` to specify the name of the run to create for this data. Default is None. + Override `run_id` to specify the id of the run to add this data to. Default is None. + """ + posix_path = Path(path) if isinstance(path, str) else path + + if not posix_path.is_file(): + raise Exception(f"Provided path, '{path}', does not point to a regular file.") + + with NamedTemporaryFile(mode="w", suffix=".csv") as temp_file: + valid_channels = self._convert_to_csv(path, temp_file.name, ignore_errors) + if not valid_channels: + raise Exception(f"No valid channels remaining in {path}") + + csv_config = self._create_csv_config( + valid_channels, asset_name, group_into_components, run_name, run_id + ) + return self._csv_upload_service.upload(temp_file.name, csv_config) + + def _convert_to_csv( + self, src_path: Union[str, Path], dst_path: Union[str, Path], ignore_errors: bool + ) -> List[TdmsChannel]: + """Converts the TDMS file to a temporary CSV on disk that we will upload. + + Returns the valid channels after parsing the TDMS file. Valid channels contain + timing information. + """ + + def contains_timing(channel: TdmsChannel) -> bool: + """Returns true if the TDMS Channel contains timing information.""" + return all( + [ + "wf_increment" in channel.properties, + "wf_start_time" in channel.properties, + "wf_start_offset" in channel.properties, + ] + ) + + def normalize_name(channel_name: str) -> str: + """Normalize channel names by invalid characters.""" + return " ".join(channel_name.replace("/", " ").split()) + + src_file = TdmsFile(src_path) + + original_groups = src_file.groups() + valid_channels: List[ChannelObject] = [] + for group in original_groups: + for channel in group.channels(): + if contains_timing(channel): + new_channel = ChannelObject( + group=normalize_name(channel.group_name), + channel=normalize_name(channel.name), + data=channel.data, + properties=channel.properties, + ) + valid_channels.append(new_channel) + else: + if ignore_errors: + print( + f"{group.name}:{channel.name} does not contain timing information. Skipping." + ) + else: + raise Exception( + f"{group.name}:{channel.name} does not contain timing information. " + "Set `ignore_errors` to True to skip channels without timing information." + ) + + # Write out the new TDMS file with invalid channels removed, then convert to csv. + with NamedTemporaryFile(mode="w") as f: + with TdmsWriter(f.name) as tdms_writer: + root_object = RootObject(src_file.properties) + tdms_writer.write_segment([root_object] + original_groups + valid_channels) + + filtered_tdms_file = TdmsFile(f.name) + df = filtered_tdms_file.as_dataframe(time_index=True, absolute_time=True) + df.to_csv(dst_path, encoding="utf-8") + + return [channel for group in filtered_tdms_file.groups() for channel in group.channels()] + + def _create_csv_config( + self, + channels: List[TdmsChannel], + asset_name: str, + group_into_components: bool, + run_name: Optional[str] = None, + run_id: Optional[str] = None, + ) -> CsvConfig: + """Construct a CsvConfig based on metadata within the TDMS file.""" + data_config: Dict[int, DataColumn] = {} + # Data columns start in column 2 (1-indexed) + first_data_column = 2 + for i, channel in enumerate(channels): + try: + data_type = TDMS_TO_SIFT_TYPES[channel.data_type].as_human_str(api_format=True) + except KeyError: + data_type = None + + if data_type is None: + raise Exception(f"{channel.name} data type not supported: {channel.data_type}") + + extra_info = "" + for k, v in channel.properties.items(): + # Skip these since the csv config has dedicated fields for them. + if k in ["description", "unit_string"]: + continue + # Must convert datetime to a string + elif k == "wf_start_time": + v = str(v) + extra_info += f"{k}: {v}\n" + + channel_config = DataColumn( + name=channel.name, + data_type=data_type, + description=f"{channel.properties.get('description')}\n{extra_info}", + units=channel.properties.get("unit_string") or "", + ) + if group_into_components and channel.group_name: + channel_config.component = channel.group_name + + data_config[first_data_column + i] = channel_config + + config_info = { + "asset_name": asset_name, + "first_data_row": first_data_column, + "time_column": TimeColumn( + format=TimeFormatType.ABSOLUTE_DATETIME, + column_number=1, + ), + "data_columns": data_config, + } + + if run_name is not None: + config_info["run_name"] = run_name + + if run_id is not None: + config_info["run_id"] = run_id + + return CsvConfig(config_info) diff --git a/python/lib/sift_py/data_import/time_format.py b/python/lib/sift_py/data_import/time_format.py new file mode 100644 index 00000000..e4032577 --- /dev/null +++ b/python/lib/sift_py/data_import/time_format.py @@ -0,0 +1,39 @@ +from enum import Enum +from typing import Optional + +from typing_extensions import Self + + +class TimeFormatType(Enum): + ABSOLUTE_RFC3339 = "TIME_FORMAT_ABSOLUTE_RFC3339" + ABSOLUTE_DATETIME = "TIME_FORMAT_ABSOLUTE_DATETIME" + ABSOLUTE_UNIX_SECONDS = "TIME_FORMAT_ABSOLUTE_UNIX_SECONDS" + ABSOLUTE_UNIX_MILLISECONDS = "TIME_FORMAT_ABSOLUTE_UNIX_MILLISECONDS" + ABSOLUTE_UNIX_MICROSECONDS = "TIME_FORMAT_ABSOLUTE_UNIX_MICROSECONDS" + ABSOLUTE_UNIX_NANOSECONDS = "TIME_FORMAT_ABSOLUTE_UNIX_NANOSECONDS" + RELATIVE_NANOSECONDS = "TIME_FORMAT_RELATIVE_NANOSECONDS" + RELATIVE_MICROSECONDS = "TIME_FORMAT_RELATIVE_MICROSECONDS" + RELATIVE_MILLISECONDS = "TIME_FORMAT_RELATIVE_MILLISECONDS" + RELATIVE_SECONDS = "TIME_FORMAT_RELATIVE_SECONDS" + RELATIVE_MINUTES = "TIME_FORMAT_RELATIVE_MINUTES" + RELATIVE_HOURS = "TIME_FORMAT_RELATIVE_HOURS" + + @classmethod + def from_str(cls, val: str) -> Optional[Self]: + try: + return cls(val) + except ValueError: + return None + + def as_human_str(self) -> str: + return self.value + + def is_relative(self) -> bool: + return self in [ + self.RELATIVE_NANOSECONDS, + self.RELATIVE_MICROSECONDS, + self.RELATIVE_MILLISECONDS, + self.RELATIVE_SECONDS, + self.RELATIVE_MINUTES, + self.RELATIVE_HOURS, + ] diff --git a/python/lib/sift_py/file_attachment/_internal/upload.py b/python/lib/sift_py/file_attachment/_internal/upload.py index b9af7039..d69a9c2e 100644 --- a/python/lib/sift_py/file_attachment/_internal/upload.py +++ b/python/lib/sift_py/file_attachment/_internal/upload.py @@ -1,7 +1,7 @@ import mimetypes from pathlib import Path from typing import Any, Dict, Optional, Tuple, Union -from urllib.parse import urljoin, urlparse +from urllib.parse import urljoin import requests from requests_toolbelt import MultipartEncoder @@ -9,7 +9,7 @@ from sift_py._internal.convert.json import to_json from sift_py.file_attachment.entity import Entity from sift_py.file_attachment.metadata import Metadata -from sift_py.rest import SiftRestConfig +from sift_py.rest import SiftRestConfig, compute_uri class UploadService: @@ -21,7 +21,7 @@ class UploadService: _apikey: str def __init__(self, restconf: SiftRestConfig): - base_uri = self.__class__._compute_uri(restconf) + base_uri = compute_uri(restconf) self._upload_uri = urljoin(base_uri, self.UPLOAD_PATH) self._upload_bulk_uri = urljoin(base_uri, self.UPLOAD_BULK_PATH) self._apikey = restconf["apikey"] @@ -98,16 +98,3 @@ def _mime_and_content_type_from_path(path: Path) -> Tuple[str, Optional[str], Op file_name = path.name mime, encoding = mimetypes.guess_type(path) return file_name, mime, encoding - - @staticmethod - def _compute_uri(restconf: SiftRestConfig) -> str: - uri = restconf["uri"] - parsed_uri = urlparse(uri) - - if parsed_uri.scheme != "": - raise Exception(f"The URL scheme '{parsed_uri.scheme}' should not be included") - - if restconf.get("use_ssl", True): - return f"https://{uri}" - - return f"http://{uri}" diff --git a/python/lib/sift_py/file_attachment/_service_test.py b/python/lib/sift_py/file_attachment/_service_test.py index db234a01..fd6fe051 100644 --- a/python/lib/sift_py/file_attachment/_service_test.py +++ b/python/lib/sift_py/file_attachment/_service_test.py @@ -31,14 +31,15 @@ def content_type(self): def test_file_attachments_service_upload_validate_uri(): mock_channel = MockChannel() - with pytest.raises(Exception, match="URL scheme"): - svc = FileAttachmentService( - mock_channel, - { - "uri": "https://some_uri.com", - "apikey": "123123123", - }, - ) + svc = FileAttachmentService( + mock_channel, + { + "uri": "https://some_uri.com", + "apikey": "123123123", + }, + ) + + assert svc is not None svc = FileAttachmentService( mock_channel, diff --git a/python/lib/sift_py/ingestion/channel.py b/python/lib/sift_py/ingestion/channel.py index 12e8635f..7035b4ad 100644 --- a/python/lib/sift_py/ingestion/channel.py +++ b/python/lib/sift_py/ingestion/channel.py @@ -183,6 +183,37 @@ def from_pb(cls, message: ChannelEnumTypePb) -> Self: return cls(name=message.name, key=message.key) +class ChannelDataTypeStrRep(Enum): + DOUBLE = "double" + STRING = "string" + ENUM = "enum" + BIT_FIELD = "bit_field" + BOOL = "bool" + FLOAT = "float" + INT_32 = "int32" + INT_64 = "int64" + UINT_32 = "uint32" + UINT_64 = "uint64" + + @staticmethod + def from_api_format(val: str) -> Optional["ChannelDataTypeStrRep"]: + try: + return { + "CHANNEL_DATA_TYPE_DOUBLE": ChannelDataTypeStrRep.DOUBLE, + "CHANNEL_DATA_TYPE_STRING": ChannelDataTypeStrRep.STRING, + "CHANNEL_DATA_TYPE_ENUM": ChannelDataTypeStrRep.ENUM, + "CHANNEL_DATA_TYPE_BIT_FIELD": ChannelDataTypeStrRep.BIT_FIELD, + "CHANNEL_DATA_TYPE_BOOL": ChannelDataTypeStrRep.BOOL, + "CHANNEL_DATA_TYPE_FLOAT": ChannelDataTypeStrRep.FLOAT, + "CHANNEL_DATA_TYPE_INT_32": ChannelDataTypeStrRep.INT_32, + "CHANNEL_DATA_TYPE_INT_64": ChannelDataTypeStrRep.INT_64, + "CHANNEL_DATA_TYPE_UINT_32": ChannelDataTypeStrRep.UINT_32, + "CHANNEL_DATA_TYPE_UINT_64": ChannelDataTypeStrRep.UINT_64, + }[val] + except KeyError: + return None + + class ChannelDataType(Enum): """ Utility enum class to simplify working with channel data-types generated from protobuf @@ -225,70 +256,73 @@ def from_pb(cls, val: channel_pb.ChannelDataType.ValueType) -> "ChannelDataType" raise ValueError(f"Unknown channel data type '{val}'.") @classmethod - def from_str(cls, val: str) -> Optional["ChannelDataType"]: - val = val.strip() + def from_str(cls, raw: str) -> Optional["ChannelDataType"]: + if raw.startswith("CHANNEL_DATA_TYPE_"): + val = ChannelDataTypeStrRep.from_api_format(raw) + if val is None: + return None + else: + try: + val = ChannelDataTypeStrRep(raw) + except ValueError: + return None - if val == "CHANNEL_DATA_TYPE_DOUBLE" or val == ChannelDataTypeStrRep.DOUBLE.value: + if val == ChannelDataTypeStrRep.DOUBLE: return cls.DOUBLE - elif val == "CHANNEL_DATA_TYPE_STRING" or val == ChannelDataTypeStrRep.STRING.value: + elif val == ChannelDataTypeStrRep.STRING: return cls.STRING - elif val == "CHANNEL_DATA_TYPE_ENUM" or val == ChannelDataTypeStrRep.ENUM.value: + elif val == ChannelDataTypeStrRep.ENUM: return cls.ENUM - elif val == "CHANNEL_DATA_TYPE_BIT_FIELD" or val == ChannelDataTypeStrRep.BIT_FIELD.value: + elif val == ChannelDataTypeStrRep.BIT_FIELD: return cls.BIT_FIELD - elif val == "CHANNEL_DATA_TYPE_BOOL" or val == ChannelDataTypeStrRep.BOOL.value: + elif val == ChannelDataTypeStrRep.BOOL: return cls.BOOL - elif val == "CHANNEL_DATA_TYPE_FLOAT" or val == ChannelDataTypeStrRep.FLOAT.value: + elif val == ChannelDataTypeStrRep.FLOAT: return cls.FLOAT - elif val == "CHANNEL_DATA_TYPE_INT_32" or val == ChannelDataTypeStrRep.INT_32.value: + elif val == ChannelDataTypeStrRep.INT_32: return cls.INT_32 - elif val == "CHANNEL_DATA_TYPE_INT_64" or val == ChannelDataTypeStrRep.INT_64.value: + elif val == ChannelDataTypeStrRep.INT_64: return cls.INT_64 - elif val == "CHANNEL_DATA_TYPE_UINT_32" or val == ChannelDataTypeStrRep.UINT_32.value: + elif val == ChannelDataTypeStrRep.UINT_32: return cls.UINT_32 - elif val == "CHANNEL_DATA_TYPE_UINT_64" or val == ChannelDataTypeStrRep.UINT_64.value: + elif val == ChannelDataTypeStrRep.UINT_64: return cls.UINT_64 - - return None - - def as_human_str(self) -> str: - if self == self.__class__.DOUBLE.value: - return ChannelDataTypeStrRep.DOUBLE.value - elif self == self.__class__.STRING.value: - return ChannelDataTypeStrRep.STRING.value - elif self == self.__class__.ENUM.value: - return ChannelDataTypeStrRep.ENUM.value - elif self == self.__class__.BIT_FIELD.value: - return ChannelDataTypeStrRep.BIT_FIELD.value - elif self == self.__class__.BOOL.value: - return ChannelDataTypeStrRep.BOOL.value - elif self == self.__class__.FLOAT.value: - return ChannelDataTypeStrRep.FLOAT.value - elif self == self.__class__.INT_32.value: - return ChannelDataTypeStrRep.INT_32.value - elif self == self.__class__.INT_64.value: - return ChannelDataTypeStrRep.INT_64.value - elif self == self.__class__.UINT_32.value: - return ChannelDataTypeStrRep.UINT_32.value - elif self == self.__class__.UINT_64.value: - return ChannelDataTypeStrRep.UINT_64.value + else: + raise Exception("Unreachable") + + def as_human_str(self, api_format: bool = False) -> str: + if self == ChannelDataType.DOUBLE: + return "CHANNEL_DATA_TYPE_DOUBLE" if api_format else ChannelDataTypeStrRep.DOUBLE.value + elif self == ChannelDataType.STRING: + return "CHANNEL_DATA_TYPE_STRING" if api_format else ChannelDataTypeStrRep.STRING.value + elif self == ChannelDataType.ENUM: + return "CHANNEL_DATA_TYPE_ENUM" if api_format else ChannelDataTypeStrRep.ENUM.value + elif self == ChannelDataType.BIT_FIELD: + return ( + "CHANNEL_DATA_TYPE_BIT_FIELD" + if api_format + else ChannelDataTypeStrRep.BIT_FIELD.value + ) + elif self == ChannelDataType.BOOL: + return "CHANNEL_DATA_TYPE_BOOL" if api_format else ChannelDataTypeStrRep.BOOL.value + elif self == ChannelDataType.FLOAT: + return "CHANNEL_DATA_TYPE_FLOAT" if api_format else ChannelDataTypeStrRep.FLOAT.value + elif self == ChannelDataType.INT_32: + return "CHANNEL_DATA_TYPE_INT_32" if api_format else ChannelDataTypeStrRep.INT_32.value + elif self == ChannelDataType.INT_64: + return "CHANNEL_DATA_TYPE_INT_64" if api_format else ChannelDataTypeStrRep.INT_64.value + elif self == ChannelDataType.UINT_32: + return ( + "CHANNEL_DATA_TYPE_UINT_32" if api_format else ChannelDataTypeStrRep.UINT_32.value + ) + elif self == ChannelDataType.UINT_64: + return ( + "CHANNEL_DATA_TYPE_UINT_64" if api_format else ChannelDataTypeStrRep.UINT_64.value + ) else: raise Exception("Unreachable.") -class ChannelDataTypeStrRep(Enum): - DOUBLE = "double" - STRING = "string" - ENUM = "enum" - BIT_FIELD = "bit_field" - BOOL = "bool" - FLOAT = "float" - INT_32 = "int32" - INT_64 = "int64" - UINT_32 = "uint32" - UINT_64 = "uint64" - - def channel_fqn(channel: Union[ChannelConfig, ChannelConfigPb, ChannelValue, ChannelPb]) -> str: """ Computes the fully qualified channel name. diff --git a/python/lib/sift_py/rest.py b/python/lib/sift_py/rest.py index 9a43e675..85f636fe 100644 --- a/python/lib/sift_py/rest.py +++ b/python/lib/sift_py/rest.py @@ -2,6 +2,8 @@ from typing_extensions import NotRequired +from sift_py.grpc.transport import _clean_uri + class SiftRestConfig(TypedDict): """ @@ -14,3 +16,14 @@ class SiftRestConfig(TypedDict): uri: str apikey: str use_ssl: NotRequired[bool] + + +def compute_uri(restconf: SiftRestConfig) -> str: + uri = restconf["uri"] + use_ssl = restconf.get("use_ssl", True) + clean_uri = _clean_uri(uri, use_ssl) + + if use_ssl: + return f"https://{clean_uri}" + + return f"http://{clean_uri}" diff --git a/python/pyproject.toml b/python/pyproject.toml index f970f226..097be2f1 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -22,10 +22,12 @@ keywords = [ ] dependencies = [ "grpcio~=1.64", + "npTDMS~=1.9", "PyYAML~=6.0", "pandas~=2.0", "protobuf~=5.26", "pydantic~=2.0", + "pydantic_core~=2.3", "requests~=2.25", "requests-toolbelt~=1.0",