From 0a9e73623b653947ba11f6d448d56077a064117a Mon Sep 17 00:00:00 2001 From: Marc Julien Date: Tue, 15 Oct 2024 17:56:04 -0700 Subject: [PATCH] Add TDMS upload --- python/examples/data_import/tdms/main.py | 31 +++ .../data_import/tdms/requirements.txt | 2 + .../data_import/tdms/sample_data.tdms | Bin 0 -> 14271 bytes python/lib/sift_py/data_import/tdms.py | 189 ++++++++++++++++++ python/pyproject.toml | 1 + 5 files changed, 223 insertions(+) create mode 100644 python/examples/data_import/tdms/main.py create mode 100644 python/examples/data_import/tdms/requirements.txt create mode 100644 python/examples/data_import/tdms/sample_data.tdms create mode 100644 python/lib/sift_py/data_import/tdms.py diff --git a/python/examples/data_import/tdms/main.py b/python/examples/data_import/tdms/main.py new file mode 100644 index 00000000..a11ceab7 --- /dev/null +++ b/python/examples/data_import/tdms/main.py @@ -0,0 +1,31 @@ +import os + +from dotenv import load_dotenv +from sift_py.data_import.tdms import TdmsUploadService +from sift_py.rest import SiftRestConfig + +if __name__ == "__main__": + """ + Example of uploading a TDMS file into Sift. + """ + + load_dotenv() + + sift_uri = os.getenv("SIFT_API_URI") + assert sift_uri, "expected 'SIFT_API_URI' environment variable to be set" + + apikey = os.getenv("SIFT_API_KEY") + assert apikey, "expected 'SIFT_API_KEY' environment variable to be set" + + asset_name = os.getenv("ASSET_NAME") + assert asset_name, "expected 'ASSET_NAME' environment variable to be set" + + rest_config: SiftRestConfig = { + "uri": sift_uri, + "apikey": apikey, + } + + tdms_upload_service = TdmsUploadService(rest_config) + status = tdms_upload_service.upload("sample_data.tdms", asset_name, group_into_components=True) + status.wait_until_complete() + print("Upload example complete!") diff --git a/python/examples/data_import/tdms/requirements.txt b/python/examples/data_import/tdms/requirements.txt new file mode 100644 index 00000000..2dda90fe --- /dev/null +++ b/python/examples/data_import/tdms/requirements.txt @@ -0,0 +1,2 @@ +python-dotenv +sift-stack-py diff --git a/python/examples/data_import/tdms/sample_data.tdms b/python/examples/data_import/tdms/sample_data.tdms new file mode 100644 index 0000000000000000000000000000000000000000..c85d77eb144ff5b8c15b8a2f0b3235a3d1ffd695 GIT binary patch literal 14271 zcmeI0dpK0>yT=g{${|Up=%6zqk}5b*zY!*u(^btv$Z(uX65Q;;b!k-qxJiYmy!SC+q=e&8gHWh zJ`^O3_>sM{wX2PjjkDYD8@(i!w7cu#0|@=+x&IvCw5{z~n}3}qy#Hzf#ODc{v(~Qm zE^hXxon?qaS;Dmb=CVEQD)a9f{jc6a@%MG^&h~D^ow?dOpZtB@EW-9H?BFK4Q#Kbi z{&w*a=3(XNZnN>k`#y^e6#($QF(x1jd>7V7H{HO6y{%3h?`qOx9`e%8l z{AoN?{#hQI|1=((|5+ZYe;NZpT{u$%XqGzA;czUuI&mH5atl$|vExeK2b2GqSemgRevS8nqAg z5Di~hAo;Be_h!jL)RTlWD%T3GsM7Ie;;k)@!yr7!g)6xH>fq;HWp&A+3N`z02-WPU zhSZSOr2`5~xL9!Azi8TzdTQ^Q;|nS<8huJ+T>LY-hcdTt71EJDckA6i9R~Kj^s|_E zuN(eOFPs|~bSTwJv1l#>@OiK5=CiaG;f=*zJt-}yQIcKJI2nu8i$ufXuXQ6Up-5In zo`xz_3GV241~w(z_$?Z%$HYXK+|M=|YHb6m_PDcgAZv4K{BAZJO8cgA+{t)Xc=^u4 zBsQLKdxiTfqC-akT_*q1{n+C`l_K!)I44=*?c+)I>Dj8yoGC^%n?<< z>K{bR7Y`4Y*Cf>3Jjg3(Ux=gEzSe(luZ5ejkEdI3HM|^k9-UDO2e02GKdF;}(|SWA z1uOetr&jE`Zec$pKMBgU3)SPfMwV+eB=glT7Hgt6@&9AGrVS~Kk z{l>j4tYxy+8P+qA!CAq-TCxXkH;&jYv!Wn|yl4F9l`6!PuKl)qaSNQ{9Fuf}S%_*G zGqK$ji;5lf7uI|!M-FpD{goaA2}!@63hZQ~zA4nN&8`Vexg8$vv@)!`X_nd%$OiZE z46XVF`FP-~S+VIm4RS6f(H>_y;Z;2_OYl)Xx_`2sdxp^PaeFiA%Uc@6mh0c>(QAYA z=8uyfI@_S=G>e`R7lSp9ITD9;*tjDdoEz)d32s;GwO&4Cj6E$dTpLKi?Gr_{&Y=u= zZFUZkXL}(hgPV2mX&(-;#d)_n4&q+^HGSrrB&<&ee@*eK1pR$>`@N$SP&6a&x_f=b z&uqb!gK|s+m#;q>>d}QXOy$w{mB8;t#lb0M8j8EhL&b&}SmE?YT6!}ZBK~ckRbEmc zLly0`h<^&1=Irmu^l*gp+v}4Vy`WgCaR0U89#-gOXFEF3v6H%a4Ua(wcHQ-wy?FT` zWE7gpS*uFm74UFNyKfIZB#YPIZ6|ouAW2G3U`Y&<3!oWqQ%Mos&LF886ThBvWpnmM5;GDUIq7pl+N?3qJkoOZ z00V0tORaiyhYtPrpe^#Q?Qj?2yOc#`W0<`E=%)-Cd^L^VHfT`c)-y!i#!-W`$Hk&W z0l6@8*%VgQTL@a6&)qsD58?~YtHsc+scQ~PV{mW6 zA;h8N8IDR`Tw?g1i8rk_mk&#IV_@USLQ@h8F?R1OOV-sP+`D8%_zML;R%Snl3aCM( zA7|sQ3L5NI$$591mZD^dsN?vPOz6M()j}O^K_|b0z_Y*lp-hhRPLrpg+~vjh^84Ko z{x}-=bgBw^&nwNwHS3}IEq3#^!e}B6)}0Tn%|%w{a?MhQGLVOBec!7$fifY{aRu?9 zIZT%9m#W8`Fxl^_#B<*nT$8>+r5~YZ0>9P_)M9+B@5C+NOqf}TzMNW4#NB#Fn* zkdd{8l-=5maXYjAd<_z|N92v2WwwE1rT%j%kv6C~I#d2CYQc|8l}b?}-VPN9udPzc zgIUpSq3tyc+>Y|%ejd^bC55jPCm#wFBTAhL3>v`y_?91Y1gf+^pl z*x)$)?Ud^)SNJH#$#a(0VaT38b`3G_!=9LWSj=YPe3FZlHW62*3SRYieIJ09g~|rc zXBF_1H!-KW=U_e4JBYWr3EZZ(^up0zc-;&hNuI-mZcf04zC&-2OA{Ay4s8KI-Jlu;dt_#Z} z<>|(;YTJ~8|6W^n zq%#wqhpRuur}Vjw}M@Y4)h6)@$^e8-08P%vyR?36;}m##Ng zCX*}hvhsxgT!|V4y4vRWEG8jWH8IL~C>OdXk6m9p^$yWO7IHlAX&73lQ0Uar4wo!x zoh?^LMBZ=;csaifA@XG^@yUHy>(Dx+JfYOogIjTXCd}U)qvk9 z9R?})ZuU&F;3_V6X!c18IE_bc2yLT5*n3-za90u(;~N{)*A_ukwuzrvLaYy?w{_L( z78u_xzb)0}ExJw)W)_O{!8tx$!SP`~Dx0reoIBW!!rn`~6O=x1ynG+Vy73CRE}T*h z>u%t^_2iJ|1RGqJN4$2-Z$$a-i(9LuiM&AHbc(#79sJQNmht_fz#*uZOvJyhX!D*6fTZn!)e6_VBoD(1%>QL*5i&ga;lddd+E>) zSG^liw1@#*PahQfS=Nv897hsg3kw(fRtWeI@%g@0N}tNdL5%0c3e|hoB6OS33gd|$ zbSLk&72nkaHEJ-lGU(Ws!Xc%RPDS7HGjTlLY_R84&2?(&gUKnk?UP}p;NY2^b=b2P zezQ-Odq?D;;9Sgg$<4(4SpO;Pud*)ax=ra@B%~mglbRFyvI<`{EFOB&+aUZwwk)nX z8{d|yT%xk?nS@ zp9%Y`a~U!FyOI9hb8Kl)Gx{Rh_kXh>f&bR*x7+43U^UgAvilSbyduMV0UG_tT{QSQ zc`_M(PU${2&ogjtX#arljvfSrf7jS!O-1R~T?fLFs5s>aebHW=Ka_pBP52EY6(6gJCf7l>zhL;s{eH+8blqVb%Z1+g zjmA03B*^XkF+vmUfLK9z+M>tJ$ZfwZbnaduhK53__i1N?d+?y$PO&ChTT5P=}6<)7$k4Qzm#;)pz@oF>%LZ3XXow`GVxJhIYZDj{M z;-2hRe%^xdadM?lMJhzEZt?%Rza8D~^f9#!gJ8@%;_U9+4cGUVBK2cC;3KrtNW!}T z99LIn&bO(7!iJiv%a7>T`)J!mf3JSH(4_*Cj*!6I%YJRw-i0{o==-yG81T3os}$hd zfQi}xpA(X;5GCKrD5MiS?&*unK1{~w<00wt6gFD!_}S0$ZHFhC7iz6*LBvY##Zy-c zA;DL5Q-0(#9BPBi)QNeqU3ZTqlST!{@%8=Ay#ZLg#>6yjssO?2-lS6u5^kxh)z_S- zLL^cuzIAmstn+>>^10Om9|*WduO~y_VBRIKh;m%KnBEmxR)cv)moCemtwD+X7V$B$ z4zz7Hpr;PiVl$s<{I5$?Tt2JzD>XUa_INV(uXZBOFJj^j^QOV$ zBnryBoW7cw_2Rqg&A7Q`G*Cxx^X=G7MdtClJ*JM~xqdk!(i_2M2>Jk%Yv@IwD$0-Pq$g@6wqOg7f|H z)OOAw_(Ufj`9SbV=F+t~euIut2Rq7@M?Ih&#LAS#%}`8%c`!Q@{%cMyTYS112Lftk zFMMWTq+gwtL+wGEF6p^hTq6cosj?AB#x~ny%Pl_^LOx{vM;`xX6i)S4-&)v>SGwB` zSHuwYUVm5rZbBElD3Q@Ab?Zj_nT%h35e=}tAaq)`nF^VmUw5q5dIi3~^aN4yW*Ba< zFqAi=Vw9ff@Flhz3GU(=@~X|y33Qo=3t>WhlZl;qStmvUxD_J?*&yYu<#KT9fmhps zvHLe7V9hF0eAeHM`J1+VOn60upk1EvzLshTCazuD>R*Vq^n=FoYULQ3RK96+v<2ta zKD(pEIkr8+~ALboZ2p1sstVECc%-`=`bwPsZp}Jv+J}T=OV2Aku7)JzR{PoPM)>u(dA}8` z!sqiA33Dac(C6OXoKs$k&Cx%0>TT@AEss4PmhI|>8yDA{F~?#&)O1%pEja*+^6o9? zpD-~rSE4{gBNJI#(e%`iIs~67ig;Gs2T!L*JK}=65Xh4$FrTQWD^kfkefp;|knWoO8h`t_$)`I`)*MG;Cd>y;dT!2PEO9l5^h%(MRf9 z;`NCM->cdC0^`}3@EmvYu4@O$-SN~Ro+eyT9<|~+*9lT&V#KelHMrQOzGEVqj(eFI zdXM(jLiJbVE9p(W(7e6A=uA-#{8yFRUkIh3z%kU+%!dZ^w;Bq{*NFLYa>PfwnaB&l zLLCD6gNT;6=-gV-51X-cN2eWZ-0=8t=xPN6(#4i5^=21h?Tw+6WwS_#IC}d?Z(knj zTRJqjuGhlOz*0A#m4zEOpC94m>O{;s*^XhNUNEp+vVm_b4FVr@zbo(Ug8{=tuD-Jz zM-;P%ANce^(|na_`qvNGWYx=U(BBCU-&7rQ#TG2f7WpwW$i#|e!q*>&)Z7i%Qv#OpDx?yy*%v@P@ z083SPG+N*GfLFNYptLzr#}&9#xVDqwG34@sJko`WIoY8xyW6q8N6=6!iw4#EAulfE z5_Qft4WlSiD!%Rv=g}edS1g&2ATLu++!|3 zy~S`41xZ`xdU3FjI(**HbjbiTbp>+YFlab`_i)XxhizDPcx8-dRTuOX%Hp^3)FMk` z$Ju$G3SbdBm-AC03;yLA8bNu3NZ0ssCg%zT@#hz6+a-3uo)Q%Ph}dT;TR{EtDqs+S ze3ZDk#<^f_%`PN~b%SsHa_w`gvoR`p%WIi?GqFFhz^#i>fwq{Y<|NI}kf=HwSV$qm zWpCz!Fua5RzVTP_)$Mp`71A{S1QRrd@?xvlTDV@)zOGK>1Em8UBe#@mP_vM%v}Io@ zt_5w7DH-U+_)Eq6#xn0=bc`>uuA&+;H(jrFfr?m>CHDHn^TkA_Iq_+2q#Tuni}*cXRB=R&x*MpfRfrecWIVxX&7hjrBYk8UP? zU@iGX;|O6Pvu>YMDJdV6ZSu#TYQ=+cTjqsw(*vZbBnmyVtHWB`-D0i&ER=UZEZs)gVjJnze+17TVs`*-U@byl-Yg6@1UDt!Wja_PTYzo?VZ)J_=HXvVPYMClg zM-Ld!8$C*qNPgF?8^^n_wMbgv`UC@7&PQtn zCkK#H9n|wPI0icO6W?Bw`e0v^m*Lydjn%%)!jvaHFlc}5W6ErWantqX(YHvL|Dw`- z$c3o4JiEN)_@Yo?9)6k8-385xY6blVVeoIh=1{%26Ku1oWsSQSxY4@$=jfL%IG(&E z@kO^4y4JJ#T8Q_{ks6h+^7$=7U0fMToTa!>qQ+WwpdTd#99LyCbKxm0byo9TIX<>= z(!1U=k(cIwo=)i5%z-&C8sB8XVZX(iNINp5Ja25%&ST){!tFRZqnLNgW2@BI>-_;A^Xwdw1i^mGv=)j|||~Tsf1XyPYuV zluypvK!Z|K-YxyaZk)=PI$e2@*hjLHJa^zk19&fR#N;07fTF;|a=E}~I9>=NgOmY8 U9U7LsT#*UxbB;&;;;q1c05pB4ssI20 literal 0 HcmV?d00001 diff --git a/python/lib/sift_py/data_import/tdms.py b/python/lib/sift_py/data_import/tdms.py new file mode 100644 index 00000000..f2fdfc0f --- /dev/null +++ b/python/lib/sift_py/data_import/tdms.py @@ -0,0 +1,189 @@ +from pathlib import Path +from tempfile import NamedTemporaryFile +from typing import Dict, List, Optional, Union + +from nptdms import RootObject, TdmsChannel, TdmsFile, TdmsWriter, types # type: ignore + +from sift_py.data_import._config import DataColumn, TimeColumn +from sift_py.data_import.config import CsvConfig +from sift_py.data_import.csv import CsvUploadService +from sift_py.data_import.status import DataImportService +from sift_py.data_import.time_format import TimeFormatType +from sift_py.ingestion.channel import ChannelDataType +from sift_py.rest import SiftRestConfig + +TDMS_TO_SIFT_TYPES = { + types.Boolean: ChannelDataType.BOOL, + types.Int8: ChannelDataType.INT_32, + types.Int16: ChannelDataType.INT_32, + types.Int32: ChannelDataType.INT_32, + types.Int64: ChannelDataType.INT_64, + types.Uint8: ChannelDataType.UINT_32, + types.Uint16: ChannelDataType.UINT_32, + types.Uint32: ChannelDataType.UINT_32, + types.Uint64: ChannelDataType.UINT_64, + types.SingleFloat: ChannelDataType.FLOAT, + types.DoubleFloat: ChannelDataType.DOUBLE, +} + + +class TdmsUploadService: + """ + Service to upload TDMS files. + """ + + _csv_upload_service: CsvUploadService + + def __init__(self, rest_conf: SiftRestConfig): + self._csv_upload_service = CsvUploadService(rest_conf) + + def upload( + self, + path: Union[str, Path], + asset_name: str, + group_into_components: bool = False, + ignore_errors: bool = False, + run_name: Optional[str] = None, + run_id: Optional[str] = None, + ) -> DataImportService: + """ + Uploads the TDMS file pointed to by `path` to the specified asset. + + Set `group_into_components` to True if you want to upload the TDMS groups as + a Sift Component. + + If `ignore_errors` is True will skip channels without timing information. + + Override `run_name` to specify the name of the run to create for this data. Default is None. + Override `run_id` to specify the id of the run to add this data to. Default is None. + """ + posix_path = Path(path) if isinstance(path, str) else path + + if not posix_path.is_file(): + raise Exception(f"Provided path, '{path}', does not point to a regular file.") + + with NamedTemporaryFile(mode="w", suffix=".csv") as temp_file: + valid_channels = self._convert_to_csv(path, temp_file.name, ignore_errors) + csv_config = self._create_csv_config( + valid_channels, asset_name, group_into_components, run_name, run_id + ) + return self._csv_upload_service.upload(temp_file.name, csv_config) + + def _convert_to_csv( + self, src_path: Union[str, Path], dst_path: Union[str, Path], ignore_errors: bool + ) -> List[TdmsChannel]: + """Converts the TDMS file to a temporary CSV on disk that we will upload. + + Returns the valid channels after parsing the TDMS file. Valid channels contain + timing information. + """ + + def contains_timing(channel: TdmsChannel) -> bool: + """Returns true if the TDMS Channel contains timing information.""" + return all( + [ + "wf_increment" in channel.properties, + "wf_start_time" in channel.properties, + "wf_start_offset" in channel.properties, + ] + ) + + def normalize_channel_name(channel_name: str) -> str: + """Normalize channel names by invalid characters.""" + return " ".join(channel_name.replace("/", " ").split()) + + src_file = TdmsFile(src_path) + + original_groups = src_file.groups() + valid_channels: List[TdmsChannel] = [] + for group in original_groups: + for channel in group.channels(): + if contains_timing(channel): + valid_channels.append(channel) + else: + if ignore_errors: + print( + f"{group.name}:{channel.name} does not contain timing information. Skipping." + ) + else: + raise Exception( + f"{group.name}:{channel.name} does not contain timing information. " + "Set `ignore_errors` to True to skip channels without timing information." + ) + + # Write out the new TDMS file with invalid channels removed, then convert to csv. + with NamedTemporaryFile(mode="w") as f: + with TdmsWriter(f.name) as tdms_writer: + root_object = RootObject(src_file.properties) + tdms_writer.write_segment([root_object] + original_groups + valid_channels) + + filtered_tdms_file = TdmsFile.read(f.name) + df = filtered_tdms_file.as_dataframe(time_index=True, absolute_time=True) + + updated_names = { + original_name: normalize_channel_name(original_name) for original_name in df.keys() + } + df.rename(updated_names, axis=1, inplace=True) + df.to_csv(dst_path, encoding="utf-8") + + return valid_channels + + def _create_csv_config( + self, + channels: List[TdmsChannel], + asset_name: str, + group_into_components: bool, + run_name: Optional[str] = None, + run_id: Optional[str] = None, + ) -> CsvConfig: + """Construct a CsvConfig based on metadata within the TDMS file.""" + data_config: Dict[int, DataColumn] = {} + # Data columns start in column 2 (1-indexed) + first_data_column = 2 + for i, channel in enumerate(channels): + try: + data_type = TDMS_TO_SIFT_TYPES[channel.data_type].as_human_str(api_format=True) + except KeyError: + data_type = None + + if data_type is None: + raise Exception(f"{channel.name} data type not supported: {channel.data_type}") + + extra_info = "" + for k, v in channel.properties.items(): + # Skip these since the csv config has dedicated fields for them. + if k in ["description", "unit_string"]: + continue + # Must convert datetime to a string + elif k == "wf_start_time": + v = str(v) + extra_info += f"{k}: {v}\n" + + channel_config = DataColumn( + name=channel.name, + data_type=data_type, + description=f"{channel.properties.get('description')}\n{extra_info}", + units=channel.properties.get("unit_string") or "", + ) + if group_into_components and channel.group_name: + channel_config.component = channel.group_name + + data_config[first_data_column + i] = channel_config + + config_info = { + "asset_name": asset_name, + "first_data_row": first_data_column, + "time_column": TimeColumn( + format=TimeFormatType.ABSOLUTE_DATETIME, + column_number=1, + ), + "data_columns": data_config, + } + + if run_name is not None: + config_info["run_name"] = run_name + + if run_id is not None: + config_info["run_id"] = run_id + + return CsvConfig(config_info) diff --git a/python/pyproject.toml b/python/pyproject.toml index affa91b6..097be2f1 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -22,6 +22,7 @@ keywords = [ ] dependencies = [ "grpcio~=1.64", + "npTDMS~=1.9", "PyYAML~=6.0", "pandas~=2.0", "protobuf~=5.26",