diff --git a/fhir.ipynb b/fhir.ipynb index ae51861..a572f1d 100644 --- a/fhir.ipynb +++ b/fhir.ipynb @@ -33,7 +33,8 @@ "outputs": [], "source": [ "import ehrapy as ep" - ] + ], + "id": "8df675df4777689a" }, { "cell_type": "markdown", @@ -43,145 +44,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "4b27e7ec", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-12-18 13:57:47-- https://synthetichealth.github.io/synthea-sample-data/downloads/latest/synthea_sample_data_fhir_latest.zip\n", - "Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'\n", - "Resolving synthetichealth.github.io (synthetichealth.github.io)... 185.199.110.153, 185.199.108.153, 185.199.109.153, ...\n", - "Connecting to synthetichealth.github.io (synthetichealth.github.io)|185.199.110.153|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 24187011 (23M) [application/zip]\n", - "Saving to: ‘synthea_sample_data_fhir_latest.zip’\n", - "\n", - "synthea_sample_data 100%[===================>] 23.07M 11.8MB/s in 2.0s \n", - "\n", - "2023-12-18 13:57:49 (11.8 MB/s) - ‘synthea_sample_data_fhir_latest.zip’ saved [24187011/24187011]\n", - "\n", - "mkdir: cannot create directory ‘fhir_dataset’: File exists\n", - "Archive: synthea_sample_data_fhir_latest.zip\n", - " inflating: fhir_dataset/practitionerInformation1701791555719.json \n", - " inflating: fhir_dataset/Seth414_Stroman228_4bdb0674-f282-0681-98b4-ef790851bae1.json \n", - " inflating: fhir_dataset/Manuela585_Silvia880_Meza847_54c52c94-f607-417c-4432-cf1c62fc43e0.json \n", - " inflating: fhir_dataset/Paul232_Cole117_2bfcda1c-5415-4cfb-d676-f72b9fd4d326.json \n", - " inflating: fhir_dataset/Mellisa81_Kaila152_Friesen796_71082608-8ce2-8ecd-0566-5489237e689f.json \n", - " inflating: fhir_dataset/Gertude950_Koelpin146_1ebbc1de-b1f3-f3d8-bce0-39273a48c3a3.json \n", - " inflating: fhir_dataset/Elizebeth108_Wilkinson796_af882637-ecdc-9e0f-cfe2-ce1d24dcc309.json \n", - " inflating: fhir_dataset/Alfonso758_Bins636_e80d4c62-149a-a6a6-4b39-9d4aa3e07ba7.json \n", - " inflating: fhir_dataset/Luther918_Fay398_1ee80f20-8062-ec1f-f186-6deabf4d0742.json \n", - " inflating: fhir_dataset/Julieta402_Wuckert783_84057763-f99f-2988-240c-a0112804d0bc.json \n", - " inflating: fhir_dataset/Crissy767_Bulah832_Mante251_22ffb415-5e35-eb15-4fc8-f00f796539c0.json \n", - " inflating: fhir_dataset/Lyle846_Orn563_3641ad5c-4997-af36-ceda-1d72578a3bc4.json \n", - " inflating: fhir_dataset/Art115_Marks830_01cc5d2a-fdc1-1aa6-5df3-ab783a443edd.json \n", - " inflating: fhir_dataset/Elijah719_Walsh511_cd39cf2b-5c83-061c-8c70-3b9436b9abd9.json \n", - " inflating: fhir_dataset/America446_Juliann385_Doyle959_ec756e7b-193a-2dec-b17e-0481a349378b.json \n", - " inflating: fhir_dataset/Alix578_Kaitlin600_Klocko335_025df78a-98cb-3c2b-2aad-8cd6136580af.json \n", - " inflating: fhir_dataset/Glenn0_Hermiston71_5468222f-3080-71ee-08c5-4bd1f59497ab.json \n", - " inflating: fhir_dataset/Bradford382_Pacocha935_fe0d100a-13d2-b6c5-26e6-9f894d51a52d.json \n", - " inflating: fhir_dataset/Gerard367_Goodwin327_6e2fd722-b2b8-4cd7-d512-cb457f133c1a.json \n", - " inflating: fhir_dataset/Jeniffer557_Harris789_25db6afd-b081-7962-5fbc-a44836bbd5e5.json \n", - " inflating: fhir_dataset/Cammy883_Upton904_7eac2ca1-8c55-c7a0-c4d4-eabd7452e7ba.json \n", - " inflating: fhir_dataset/Samuel331_Juárez383_7a35825c-2b9c-715a-2e23-d7776ee6787f.json \n", - " inflating: fhir_dataset/Avery919_Velia837_Stokes453_b6e8e336-8a8b-48d4-3d6b-f8c782761a9a.json \n", - " inflating: fhir_dataset/Shannan727_Bosco882_76eaedd6-a422-750a-374c-d038e9b60f75.json \n", - " inflating: fhir_dataset/Franklyn361_McClure239_1f64b270-e512-4354-9604-25c6e7a6999f.json \n", - " inflating: fhir_dataset/Lanny564_Olson653_e9bfee1d-34a1-62ee-fca9-3180f2ecf51f.json \n", - " inflating: fhir_dataset/Shantae970_Kihn564_6a10849d-3940-0cd0-4b06-4b5375cbda3d.json \n", - " inflating: fhir_dataset/Celena734_Cordie578_Kulas532_81be7866-85eb-8dc9-ee04-645b33eb729d.json \n", - " inflating: fhir_dataset/Marylouise236_Dacia646_Heidenreich818_14f7b660-3445-6ace-7a36-7408778e4c39.json \n", - " inflating: fhir_dataset/Rea30_Gutkowski940_f89018e9-c460-e5ae-8818-ec075239a328.json \n", - " inflating: fhir_dataset/Whitley172_Sallie654_Crona259_88461643-2ebf-47bf-c3d5-b5bc77aaca94.json \n", - " inflating: fhir_dataset/Cyndi533_Heaney114_f7811cda-62e4-ac6a-08c1-731a93d8043d.json \n", - " inflating: fhir_dataset/Alfredo17_Briones478_b048e4ad-666b-7f53-25d7-29cfa102fd57.json \n", - " inflating: fhir_dataset/Jerald662_Harvey63_8c205099-f0d1-442d-fc56-262fc6b04469.json \n", - " inflating: fhir_dataset/Sammy219_Morar593_dc7c913b-f564-976e-6952-a4e9e2953c29.json \n", - " inflating: fhir_dataset/Lissette621_Shea825_Hermann103_1436618f-0475-2e7a-a577-3e9fcd71997a.json \n", - " inflating: fhir_dataset/Stanford577_Brown30_0e3fbc54-9408-cb1d-1704-0682a9d2904e.json \n", - " inflating: fhir_dataset/Ivory697_O'Kon634_58500c3f-9654-8400-7c10-f355ef166284.json \n", - " inflating: fhir_dataset/Martín25_Flórez858_c6653303-4914-c48f-0dbc-c89836d10e4b.json \n", - " inflating: fhir_dataset/José_Emilio366_Marrero674_36332912-19da-58c8-d269-77da327ac839.json \n", - " inflating: fhir_dataset/Odessa199_Hodkiewicz467_e9c6a5d9-e100-4b99-25e7-22c8219ee617.json \n", - " inflating: fhir_dataset/hospitalInformation1701791555719.json \n", - " inflating: fhir_dataset/Porter490_Gerlach374_d32b8ba4-4b24-bd16-8204-8202da355ba5.json \n", - " inflating: fhir_dataset/Valda518_Sharilyn202_Thompson596_957a9250-aea9-c7fb-fdad-6ee2fde26b23.json \n", - " inflating: fhir_dataset/Caitlin552_Johns824_a6ea2a8d-1b9d-27e3-02f9-2f689e149dcf.json \n", - " inflating: fhir_dataset/Alton320_Rippin620_23a0ff1e-9ae3-35ba-00e6-b02831a54cb5.json \n", - " inflating: fhir_dataset/Beau391_McLaughlin530_643c7d41-cca1-1dbc-c193-8215fc0fed4e.json \n", - " inflating: fhir_dataset/Will178_Hyatt152_de56ed62-0fd3-9bd6-0f65-8fc78199af95.json \n", - " inflating: fhir_dataset/Hipolito984_Gerhold939_23c133ce-777a-5b3d-7e46-f0445905c840.json \n", - " inflating: fhir_dataset/Corie618_Muoi890_Metz686_9516e413-e607-f110-bf9e-adeb7796c1f6.json \n", - " inflating: fhir_dataset/Alfonzo975_Wolf938_9149cde5-d230-0c39-2050-5a89907fd14b.json \n", - " inflating: fhir_dataset/Natisha940_Rodriguez71_2168afe9-5491-06ff-c545-ff573fec742d.json \n", - " inflating: fhir_dataset/Derrick232_Ebert178_4ddc5428-1bc6-f561-394d-0f9a9e18cf8d.json \n", - " inflating: fhir_dataset/Nam74_Pamella8_Conn188_c419d088-0c10-6666-235f-15f31c23d398.json \n", - " inflating: fhir_dataset/Krysta658_Abshire638_5d5ffcf3-196a-1ff9-9ff1-5b267285b7e6.json \n", - " inflating: fhir_dataset/Melina208_Carter549_b1ff4045-06ad-8617-488d-fc9dcbd53f6b.json \n", - " inflating: fhir_dataset/Deja232_Luciana251_Altenwerth646_1a8b9c69-ce6d-cd0b-35d8-9b3f10c708b8.json \n", - " inflating: fhir_dataset/Delmer311_Williamson769_6eed8667-cc62-85d6-a4c2-3d5c1042de8d.json \n", - " inflating: fhir_dataset/Cathleen724_Erika442_Steuber698_d1c9a984-6855-1dcb-cc96-3d92a36b29f8.json \n", - " inflating: fhir_dataset/Patrina117_Tabetha269_Trantow673_142df2dc-fb4c-964b-341a-fbacad0c5a3e.json \n", - " inflating: fhir_dataset/Darnell564_Susann104_Toy286_1c31ecd5-ec13-c307-2221-d5f7e9f498b2.json \n", - " inflating: fhir_dataset/Misty404_Langworth352_b6933489-46df-f18e-cb3a-31cc9ac588b7.json \n", - " inflating: fhir_dataset/Janett802_Kuvalis369_4136191d-9d9b-6f80-46fd-719b7b06a4d2.json \n", - " inflating: fhir_dataset/Lida218_Tammera223_Kohler843_a48fb728-e502-3920-ccc5-20c501b10730.json \n", - " inflating: fhir_dataset/Tonisha838_Chere867_Sauer652_a85db32c-12ba-1eaf-a867-69d693edbeac.json \n", - " inflating: fhir_dataset/Noe500_Parisian75_ae0561aa-940d-ac17-ab7b-e4ae027e566c.json \n", - " inflating: fhir_dataset/Leif534_Lynch190_0bc42dc4-6e9a-5258-6ad9-72810496642b.json \n", - " inflating: fhir_dataset/Kelly223_Upton904_1cdc2f83-0149-5cb7-0506-5d6dd15b75c1.json \n", - " inflating: fhir_dataset/Clare187_Watsica258_b4565047-d157-1663-de65-b65ed36833f6.json \n", - " inflating: fhir_dataset/Bernie827_Rodriguez71_b9aadf20-1afe-9de5-eeb8-811764221ba0.json \n", - " inflating: fhir_dataset/Linda558_Imelda608_Hessel84_b0690cbf-45d3-9405-c960-3dfe2be06b36.json \n", - " inflating: fhir_dataset/Gianna370_Fahey393_44294240-83ab-027a-2eab-271f07c5af24.json \n", - " inflating: fhir_dataset/Vicente970_Vandervort697_29ebffcd-a61a-5232-81db-ae1c97116e37.json \n", - " inflating: fhir_dataset/Silva841_Anastasia959_Lesch175_c004800b-da09-9c09-9195-98ab7357734a.json \n", - " inflating: fhir_dataset/Kenny207_Balistreri607_858f1b23-b20b-15b2-62e6-f81f7b638fa5.json \n", - " inflating: fhir_dataset/Mohammad578_Beahan375_f2dd49fb-1738-d88f-69b6-fc940716888e.json \n", - " inflating: fhir_dataset/Tommie457_Bayer639_8ac182b7-21fb-7820-ff95-75ecb613f05d.json \n", - " inflating: fhir_dataset/Maricela194_Keebler762_de06bde4-1cc2-9fa4-97b1-68afddef2726.json \n", - " inflating: fhir_dataset/Niki25_Dorathy429_Simonis280_9f28c283-de47-1615-2cf1-07787a604949.json \n", - " inflating: fhir_dataset/Lucius907_Dickinson688_a6726752-f995-1259-fdce-7bba2df7a563.json \n", - " inflating: fhir_dataset/Arron144_Romaguera67_d3eb63b8-6d96-57b2-0b40-315714e57358.json \n", - " inflating: fhir_dataset/Zora492_Jacobson885_355fc930-5934-ab15-8585-ce8542e9b4cd.json \n", - " inflating: fhir_dataset/Aurelio227_Balistreri607_147be7d2-0c12-219b-6098-091b3350ee0e.json \n", - " inflating: fhir_dataset/Arlette667_Kohler843_5731a0ac-ee4a-ad9b-d03a-947ab9287979.json \n", - " inflating: fhir_dataset/Sammie902_Aufderhar910_a4cb13fa-e6ce-82a3-2411-4df8f06a451f.json \n", - " inflating: fhir_dataset/Tod265_Barrows492_aeccf979-8347-f0f5-36ad-9296bb9eddf9.json \n", - " inflating: fhir_dataset/Laurence43_Hilpert278_6c15f057-eacd-bbf0-8bd1-f68872707395.json \n", - " inflating: fhir_dataset/Liana375_Clarita196_Ward668_ddf7cb33-5e5e-4462-1a5c-84caa10091da.json \n", - " inflating: fhir_dataset/Marvella276_Keeley419_Johnson679_a80a1513-9447-5622-1ddb-b926b9f0d467.json \n", - " inflating: fhir_dataset/Isabela97_Huerta329_f734a23f-fa93-93ee-f317-1bbd5de5c727.json \n", - " inflating: fhir_dataset/Leif534_O'Connell601_2f97d434-20b1-9f6c-107b-00416d57866f.json \n", - " inflating: fhir_dataset/Fredric73_McDermott739_ca4ac6d3-759c-bc0b-a425-d5672d078681.json \n", - " inflating: fhir_dataset/Tory770_Toy286_6d649102-c1b9-3614-7839-d7962c73835d.json \n", - " inflating: fhir_dataset/Elijah719_Runolfsson901_4a6bf168-4c64-5fa1-35ca-80a408163d59.json \n", - " inflating: fhir_dataset/Tracee120_Leffler128_07ea83e0-f968-6c01-f666-7f776e018da0.json \n", - " inflating: fhir_dataset/Teodoro374_Navarro863_6fa89a40-70f4-9024-2b01-9fc6d077f0cf.json \n", - " inflating: fhir_dataset/Timoteo39_Mora209_7b27d942-8bf7-1623-edbd-5774ab2919e9.json \n", - " inflating: fhir_dataset/Wilfred787_Wolf938_7dd8181b-996c-9d3b-5d99-fbabcb501264.json \n", - " inflating: fhir_dataset/Willetta882_Price929_85c996d6-f2db-20c0-39c5-1f046e6f1348.json \n", - " inflating: fhir_dataset/Ivette731_Friesen796_4523a036-90d1-89f6-5971-1723c51e962d.json \n", - " inflating: fhir_dataset/Beatriz277_Tempie784_Nienow652_6872a8c3-67aa-cd13-b55f-b794da87fae8.json \n", - " inflating: fhir_dataset/Ismael683_Rau926_541d4e96-c76d-af62-0c8e-26497f133335.json \n", - " inflating: fhir_dataset/Lakeisha206_Schulist381_d6c4fd02-f7ef-7c60-9ba4-23ea5979db72.json \n", - " inflating: fhir_dataset/Lynelle567_Digna973_Hermiston71_2da077b9-c84a-583f-81a7-258c84f232ed.json \n", - " inflating: fhir_dataset/Carmelo33_Lehner980_66adee84-6a77-ccdd-4055-b27be33c40ac.json \n", - " inflating: fhir_dataset/Ray930_Lueilwitz711_de289f79-7d99-8fff-4d7d-0c3c14eb1e27.json \n", - " inflating: fhir_dataset/Jerold208_Dare640_bf53227d-3529-42d9-0e43-ff3f76ca50c6.json \n", - " inflating: fhir_dataset/Suzy993_Bettye671_Okuneva707_2d5fb1b6-ba60-198e-7e24-65a40ab499d6.json \n", - " inflating: fhir_dataset/Shayne60_Howe413_79410e31-97d8-ddc0-112f-9e0fc096ed39.json \n", - " inflating: fhir_dataset/Samatha845_Mariann762_Collier206_ff6ddbc4-ec84-48e2-dc91-73a7fa28ebae.json \n", - " inflating: fhir_dataset/Adan632_Cassin499_076f046c-b448-ea99-b034-4d4baed17ef4.json \n", - " inflating: fhir_dataset/Rubén780_Alonso270_c7167dce-1e25-6c19-72f8-e7ebc1763698.json \n" - ] + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-20T14:32:11.608323Z", + "start_time": "2024-04-20T14:32:00.915480Z" } - ], + }, + "outputs": [], "source": [ - "%% capture\n", + "%%capture\n", "!wget https://synthetichealth.github.io/synthea-sample-data/downloads/latest/synthea_sample_data_fhir_latest.zip\n", "!mkdir fhir_dataset\n", "!unzip synthea_sample_data_fhir_latest.zip -d fhir_dataset" @@ -203,9 +76,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "id": "b036b8e1", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-20T14:32:31.229130Z", + "start_time": "2024-04-20T14:32:11.609110Z" + } + }, "outputs": [], "source": [ "df = ep.io.read_fhir(\"fhir_dataset\", return_df=True)\n", @@ -214,9 +92,14 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "id": "40d14b21", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-20T14:32:42.165701Z", + "start_time": "2024-04-20T14:32:41.387360Z" + } + }, "outputs": [], "source": [ "# Option 3: We're dropping any columns that contain lists or dictionaries and all columns that only contain NA values\n", @@ -226,26 +109,20 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "ab1920a0", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-20T14:32:43.842021Z", + "start_time": "2024-04-20T14:32:43.779379Z" + } + }, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35m2023-12-18 14:16:58,727\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Transformed passed DataFrame into an AnnData object with n_obs x n_vars = `1000` x `65`.\u001b[0m\n" - ] - }, { "data": { - "text/plain": [ - "AnnData object with n_obs × n_vars = 1000 × 65\n", - " uns: 'numerical_columns', 'non_numerical_columns'\n", - " layers: 'original'" - ] + "text/plain": "AnnData object with n_obs × n_vars = 1000 × 69\n var: 'ehrapy_column_type'\n layers: 'original'" }, - "execution_count": 8, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -257,824 +134,542 @@ }, { "cell_type": "code", - "execution_count": 9, - "id": "f74bfe98", - "metadata": {}, + "execution_count": 14, "outputs": [ - { - "data": { - "text/html": [ - "
Quality control metrics missing. Calculating...\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mQuality control metrics missing. Calculating\u001b[0m\u001b[1;33m...\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
ValueError! Setting quality control metrics to nan. Did you encode your data?\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mValueError! Setting quality control metrics to nan. Did you encode your data?\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[1;35m2023-12-18 14:16:58,901\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Added the calculated metrics to AnnData's `obs` and `var`.\u001b[0m\n" + "\u001B[1;35m2024-04-20 16:32:52,875\u001B[0m - \u001B[1;34mroot\u001B[0m \u001B[1;37mINFO - Feature types have been inferred and stored in adata.var['feature_type']. PLEASE CHECK and adjust if necessary using adata.var['feature_type']['feature1']='corrected_type'.\u001B[0m\n" ] }, { "data": { - "text/html": [ - "
Feature resource.issued had more than 40.10% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.issued \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m40.10\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1m Detected feature types for AnnData object with 1000 obs and 69 vars\u001B[0m\n╠══ 📅\u001B[1m Date features\u001B[0m\n╠══ 📐\u001B[1m Numerical features\u001B[0m\n║ ╠══ resource.payment.amount.value\n║ ╠══ resource.suppliedItem.quantity.value\n║ ╠══ resource.total.value\n║ ╚══ resource.valueQuantity.value\n╚══ 🗂️\u001B[1m Categorical features\u001B[0m\n ╠══ fullUrl (1000 categories)\n ╠══ patientId (5 categories)\n ╠══ request.method (1 categories)\n ╠══ request.url (16 categories)\n ╠══ resource.abatementDateTime (21 categories)\n ╠══ resource.authoredOn (23 categories)\n ╠══ resource.billablePeriod.end (84 categories)\n ╠══ resource.billablePeriod.start (86 categories)\n ╠══ resource.birthDate (2 categories)\n ╠══ resource.claim.reference (69 categories)\n ╠══ resource.class.code (2 categories)\n ╠══ resource.class.system (1 categories)\n ╠══ resource.code.text (142 categories)\n ╠══ resource.context.period.end (43 categories)\n ╠══ resource.context.period.start (43 categories)\n ╠══ resource.created (43 categories)\n ╠══ resource.custodian.reference (6 categories)\n ╠══ resource.date (43 categories)\n ╠══ resource.deceasedDateTime (1 categories)\n ╠══ resource.distinctIdentifier (5 categories)\n ╠══ resource.effectiveDateTime (85 categories)\n ╠══ resource.encounter.reference (44 categories)\n ╠══ resource.expirationDate (3 categories)\n ╠══ resource.facility.reference (6 categories)\n ╠══ resource.gender (1 categories)\n ╠══ resource.intent (1 categories)\n ╠══ resource.issued (85 categories)\n ╠══ resource.location.reference (5 categories)\n ╠══ resource.lotNumber (5 categories)\n ╠══ resource.manufactureDate (3 categories)\n ╠══ resource.maritalStatus.text (1 categories)\n ╠══ resource.medicationCodeableConcept.text (10 categories)\n ╠══ resource.multipleBirthBoolean (1 categories)\n ╠══ resource.occurrenceDateTime (18 categories)\n ╠══ resource.onsetDateTime (37 categories)\n ╠══ resource.outcome (1 categories)\n ╠══ resource.patient.reference (2 categories)\n ╠══ resource.payment.amount.currency (1 categories)\n ╠══ resource.performedPeriod.end (73 categories)\n ╠══ resource.performedPeriod.start (66 categories)\n ╠══ resource.period.end (44 categories)\n ╠══ resource.period.start (44 categories)\n ╠══ resource.prescription.reference (26 categories)\n ╠══ resource.primarySource (1 categories)\n ╠══ resource.provider.reference (12 categories)\n ╠══ resource.recorded (1 categories)\n ╠══ resource.recordedDate (37 categories)\n ╠══ resource.referral.reference (1 categories)\n ╠══ resource.requester.reference (4 categories)\n ╠══ resource.resourceType (16 categories)\n ╠══ resource.serialNumber (5 categories)\n ╠══ resource.serviceProvider.reference (6 categories)\n ╠══ resource.status (7 categories)\n ╠══ resource.subject.reference (2 categories)\n ╠══ resource.suppliedItem.itemCodeableConcept.text (2 categories)\n ╠══ resource.text.status (1 categories)\n ╠══ resource.total.currency (1 categories)\n ╠══ resource.type.text (4 categories)\n ╠══ resource.use (1 categories)\n ╠══ resource.vaccineCode.text (6 categories)\n ╠══ resource.valueCodeableConcept.text (19 categories)\n ╠══ resource.valueQuantity.code (24 categories)\n ╠══ resource.valueQuantity.system (1 categories)\n ╠══ resource.valueQuantity.unit (24 categories)\n ╚══ resource.valueString (1 categories)\n", + "text/html": "
 Detected feature types for AnnData object with 1000 obs and 69 vars\n╠══ 📅 Date features\n╠══ 📐 Numerical features\n║   ╠══ resource.payment.amount.value\n║   ╠══ resource.suppliedItem.quantity.value\n║   ╠══ resource.total.value\n║   ╚══ resource.valueQuantity.value\n╚══ 🗂️ Categorical features\n    ╠══ fullUrl (1000 categories)\n    ╠══ patientId (5 categories)\n    ╠══ request.method (1 categories)\n    ╠══ request.url (16 categories)\n    ╠══ resource.abatementDateTime (21 categories)\n    ╠══ resource.authoredOn (23 categories)\n    ╠══ resource.billablePeriod.end (84 categories)\n    ╠══ resource.billablePeriod.start (86 categories)\n    ╠══ resource.birthDate (2 categories)\n    ╠══ resource.claim.reference (69 categories)\n    ╠══ resource.class.code (2 categories)\n    ╠══ resource.class.system (1 categories)\n    ╠══ resource.code.text (142 categories)\n    ╠══ resource.context.period.end (43 categories)\n    ╠══ resource.context.period.start (43 categories)\n    ╠══ resource.created (43 categories)\n    ╠══ resource.custodian.reference (6 categories)\n    ╠══ resource.date (43 categories)\n    ╠══ resource.deceasedDateTime (1 categories)\n    ╠══ resource.distinctIdentifier (5 categories)\n    ╠══ resource.effectiveDateTime (85 categories)\n    ╠══ resource.encounter.reference (44 categories)\n    ╠══ resource.expirationDate (3 categories)\n    ╠══ resource.facility.reference (6 categories)\n    ╠══ resource.gender (1 categories)\n    ╠══ resource.intent (1 categories)\n    ╠══ resource.issued (85 categories)\n    ╠══ resource.location.reference (5 categories)\n    ╠══ resource.lotNumber (5 categories)\n    ╠══ resource.manufactureDate (3 categories)\n    ╠══ resource.maritalStatus.text (1 categories)\n    ╠══ resource.medicationCodeableConcept.text (10 categories)\n    ╠══ resource.multipleBirthBoolean (1 categories)\n    ╠══ resource.occurrenceDateTime (18 categories)\n    ╠══ resource.onsetDateTime (37 categories)\n    ╠══ resource.outcome (1 categories)\n    ╠══ resource.patient.reference (2 categories)\n    ╠══ resource.payment.amount.currency (1 categories)\n    ╠══ resource.performedPeriod.end (73 categories)\n    ╠══ resource.performedPeriod.start (66 categories)\n    ╠══ resource.period.end (44 categories)\n    ╠══ resource.period.start (44 categories)\n    ╠══ resource.prescription.reference (26 categories)\n    ╠══ resource.primarySource (1 categories)\n    ╠══ resource.provider.reference (12 categories)\n    ╠══ resource.recorded (1 categories)\n    ╠══ resource.recordedDate (37 categories)\n    ╠══ resource.referral.reference (1 categories)\n    ╠══ resource.requester.reference (4 categories)\n    ╠══ resource.resourceType (16 categories)\n    ╠══ resource.serialNumber (5 categories)\n    ╠══ resource.serviceProvider.reference (6 categories)\n    ╠══ resource.status (7 categories)\n    ╠══ resource.subject.reference (2 categories)\n    ╠══ resource.suppliedItem.itemCodeableConcept.text (2 categories)\n    ╠══ resource.text.status (1 categories)\n    ╠══ resource.total.currency (1 categories)\n    ╠══ resource.type.text (4 categories)\n    ╠══ resource.use (1 categories)\n    ╠══ resource.vaccineCode.text (6 categories)\n    ╠══ resource.valueCodeableConcept.text (19 categories)\n    ╠══ resource.valueQuantity.code (24 categories)\n    ╠══ resource.valueQuantity.system (1 categories)\n    ╠══ resource.valueQuantity.unit (24 categories)\n    ╚══ resource.valueString (1 categories)\n
\n" }, "metadata": {}, "output_type": "display_data" - }, + } + ], + "source": [ + "ep.ad.infer_feature_types(adata)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-04-20T14:32:52.914050Z", + "start_time": "2024-04-20T14:32:52.855166Z" + } + }, + "id": "f4b42a0963eddc92" + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [], + "source": [ + "adata.var[\"feature_type\"][\"resource.abatementDateTime\"] = \"date\"\n", + "adata.var[\"feature_type\"][\"resource.authoredOn\"] = \"date\"\n", + "adata.var[\"feature_type\"][\"resource.billablePeriod.end\"] = \"date\"\n", + "adata.var[\"feature_type\"][\"resource.billablePeriod.start\"] = \"date\"\n", + "adata.var[\"feature_type\"][\"resource.birthDate\"] = \"date\"\n", + "adata.var[\"feature_type\"][\"resource.context.period.end\"] = \"date\"\n", + "adata.var[\"feature_type\"][\"resource.context.period.start\"] = \"date\"\n", + "adata.var[\"feature_type\"][\"resource.date\"] = \"date\"" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-04-20T14:44:05.763949Z", + "start_time": "2024-04-20T14:44:05.755016Z" + } + }, + "id": "745e434402e4f676" + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f74bfe98", + "metadata": { + "is_executing": true, + "ExecuteTime": { + "start_time": "2024-04-20T14:44:07.503009Z" + } + }, + "outputs": [ { "data": { - "text/html": [ - "
Feature resource.performedPeriod.start had more than 93.80% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.performedPeriod.start \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m93.80\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mQuality control metrics missing. Calculating\u001B[0m\u001B[1;33m...\u001B[0m\n", + "text/html": "
Quality control metrics missing. Calculating...\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.started had more than 99.80% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.started \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.80\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.period.start \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m94.80\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.period.start had more than 94.80% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.abatementDateTime had more than 98.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.abatementDateTime \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m98.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.medicationCodeableConcept.text \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m97.40\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.medicationCodeableConcept.text had more than 97.40% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.date had more than 95.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.date \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m95.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.distinctIdentifier \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.50\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.distinctIdentifier had more than 99.50% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.payment.amount.value had more than 92.50% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.payment.amount.value \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m92.50\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.type.text \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.50\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.type.text had more than 99.50% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.provider.reference had more than 85.00% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.provider.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m85.00\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.text.status \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.40\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.text.status had more than 99.40% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.custodian.reference had more than 95.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.custodian.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m95.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.facility.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m88.80\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.facility.reference had more than 88.80% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.valueQuantity.unit had more than 56.20% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.valueQuantity.unit \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m56.20\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.abatementDateTime \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m97.70\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.abatementDateTime had more than 97.70% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.claim.reference had more than 92.50% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.claim.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m92.50\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.vaccineCode.text \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m98.10\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.vaccineCode.text had more than 98.10% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.prescription.reference had more than 97.10% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.prescription.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m97.10\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.referral.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m93.10\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.referral.reference had more than 93.10% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.valueCodeableConcept.text had more than 98.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.valueCodeableConcept.text \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m98.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.lotNumber \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.50\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.lotNumber had more than 99.50% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.numberOfInstances had more than 99.80% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.numberOfInstances \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.80\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.outcome \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m93.10\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.outcome had more than 93.10% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.maritalStatus.text had more than 99.90% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.maritalStatus.text \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.90\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.primarySource \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m98.10\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.primarySource had more than 98.10% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.context.period.end had more than 95.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.context.period.end \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m95.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.location.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m90.80\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.location.reference had more than 90.80% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.class.code had more than 95.30% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.class.code \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m95.30\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.class.code \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.60\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.class.code had more than 95.60% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.intent had more than 96.30% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.intent \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m96.30\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.created \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m86.20\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.created had more than 86.20% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.numberOfSeries had more than 99.80% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.numberOfSeries \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.80\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.context.period.start \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.70\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.context.period.start had more than 95.70% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.authoredOn had more than 97.10% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.authoredOn \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m97.10\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.custodian.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.70\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.custodian.reference had more than 95.70% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.multipleBirthBoolean had more than 99.90% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.multipleBirthBoolean \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.90\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.patient.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m82.40\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.patient.reference had more than 82.40% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.requester.reference had more than 97.10% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.requester.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m97.10\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.authoredOn \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m97.40\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.authoredOn had more than 97.40% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.context.period.start had more than 95.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.context.period.start \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m95.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.claim.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m93.10\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.claim.reference had more than 93.10% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.performedPeriod.end had more than 93.80% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.performedPeriod.end \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m93.80\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.valueCodeableConcept.text \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m96.90\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.valueCodeableConcept.text had more than 96.90% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.birthDate had more than 99.90% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.birthDate \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.90\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.valueString \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.90\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.valueString had more than 99.90% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.payment.amount.currency had more than 92.50% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.payment.amount.currency \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m92.50\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.context.period.end \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.70\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.context.period.end had more than 95.70% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.period.end had more than 94.90% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.period.end \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m94.90\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.suppliedItem.itemCodeableConcept.text \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m98.60\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.suppliedItem.itemCodeableConcept.text had more than 98.60% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.medicationReference.reference had more than 99.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.medicationReference.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.total.currency \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m93.10\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.total.currency had more than 93.10% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.primarySource had more than 99.00% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.primarySource \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.00\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.payment.amount.value \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m93.10\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.payment.amount.value had more than 93.10% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.onsetDateTime had more than 97.70% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.onsetDateTime \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m97.70\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.deceasedDateTime \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.90\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.deceasedDateTime had more than 99.90% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.patient.reference had more than 83.70% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.patient.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m83.70\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.total.value \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m93.10\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.total.value had more than 93.10% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.serviceProvider.reference had more than 95.30% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.serviceProvider.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m95.30\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.serialNumber \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.50\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.serialNumber had more than 99.50% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.gender had more than 99.90% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.gender \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.90\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.requester.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m97.40\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.requester.reference had more than 97.40% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.deceasedDateTime had more than 99.90% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.deceasedDateTime \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.90\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.recordedDate \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.90\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.recordedDate had more than 95.90% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.created had more than 85.00% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.created \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m85.00\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.payment.amount.currency \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m93.10\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.payment.amount.currency had more than 93.10% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.vaccineCode.text had more than 99.00% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.vaccineCode.text \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.00\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.performedPeriod.start \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m92.70\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.performedPeriod.start had more than 92.70% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.valueQuantity.value had more than 56.20% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.valueQuantity.value \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m56.20\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.gender \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.80\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.gender had more than 99.80% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.occurrenceDateTime had more than 99.00% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.occurrenceDateTime \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.00\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.onsetDateTime \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.90\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.onsetDateTime had more than 95.90% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.total.currency had more than 92.50% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.total.currency \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m92.50\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.date \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.70\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.date had more than 95.70% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.use had more than 85.00% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.use \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m85.00\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.provider.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m86.20\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.provider.reference had more than 86.20% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.valueQuantity.system had more than 56.20% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.valueQuantity.system \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m56.20\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.recorded \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.90\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.recorded had more than 99.90% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.total.value had more than 92.50% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.total.value \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m92.50\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.manufactureDate \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.50\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.manufactureDate had more than 99.50% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.effectiveDateTime had more than 39.50% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.effectiveDateTime \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m39.50\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.billablePeriod.start \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m86.20\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.billablePeriod.start had more than 86.20% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.recordedDate had more than 97.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.recordedDate \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m97.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.class.system \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.60\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.class.system had more than 95.60% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.billablePeriod.start had more than 85.00% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.billablePeriod.start \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m85.00\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.maritalStatus.text \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.80\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.maritalStatus.text had more than 99.80% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.valueQuantity.code had more than 56.20% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.valueQuantity.code \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m56.20\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.multipleBirthBoolean \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.80\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.multipleBirthBoolean had more than 99.80% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.context.reference had more than 99.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.context.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.serviceProvider.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.60\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.serviceProvider.reference had more than 95.60% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.location.reference had more than 92.60% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.location.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m92.60\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.use \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m86.20\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.use had more than 86.20% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.medicationCodeableConcept.text had more than 97.10% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.medicationCodeableConcept.text \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m97.10\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.performedPeriod.end \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m92.70\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.performedPeriod.end had more than 92.70% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.outcome had more than 92.50% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.outcome \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m92.50\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.period.end \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m95.60\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.period.end had more than 95.60% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.criticality had more than 99.70% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.criticality \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.70\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.expirationDate \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.50\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.expirationDate had more than 99.50% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.text.status had more than 99.10% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.text.status \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m99.10\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.birthDate \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m99.80\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.birthDate had more than 99.80% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.referral.reference had more than 92.50% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.referral.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m92.50\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.occurrenceDateTime \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m96.70\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.occurrenceDateTime had more than 96.70% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.facility.reference had more than 87.90% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.facility.reference \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m87.90\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.intent \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m97.00\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.intent had more than 97.00% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.period.start had more than 93.70% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.period.start \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m93.70\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.billablePeriod.end \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m86.20\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.billablePeriod.end had more than 86.20% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.class.system had more than 95.30% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.class.system \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m95.30\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.suppliedItem.quantity.value \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m98.60\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.suppliedItem.quantity.value had more than 98.60% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.code.text had more than 35.40% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.code.text \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m35.40\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mFeature \u001B[0m\u001B[1;34mresource.prescription.reference \u001B[0m\u001B[1;33mhad more than \u001B[0m\u001B[1;34m97.40\u001B[0m\u001B[1;34m% \u001B[0m\u001B[1;33mmissing values!\u001B[0m\n", + "text/html": "
Feature resource.prescription.reference had more than 97.40% missing values!\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { - "text/html": [ - "
Feature resource.billablePeriod.end had more than 85.00% missing values!\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1;33mFeature \u001b[0m\u001b[1;34mresource.billablePeriod.end \u001b[0m\u001b[1;33mhad more than \u001b[0m\u001b[1;34m85.00\u001b[0m\u001b[1;34m% \u001b[0m\u001b[1;33mmissing values!\u001b[0m\n" - ] + "text/plain": "\u001B[1;33mscikit-learn-intelex is not available. Install via \u001B[0m\u001B[1;34mpip install scikit-learn-intelex \u001B[0m\u001B[1;33m for faster imputations.\u001B[0m\n", + "text/html": "
scikit-learn-intelex is not available. Install via pip install scikit-learn-intelex  for faster imputations.\n
\n" }, "metadata": {}, "output_type": "display_data" }, { "data": { + "text/plain": "Output()", "application/vnd.jupyter.widget-view+json": { - "model_id": "4d9f0ea6b6ed40978d412c31ed93d818", "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] + "version_minor": 0, + "model_id": "38e945dd973e47f0bc4d6233a4c59773" + } }, "metadata": {}, "output_type": "display_data" }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)\n" - ] - }, { "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
+      "text/plain": "",
+      "text/html": "
\n"
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] + "text/plain": "\n", + "text/html": "
\n
\n" }, "metadata": {}, "output_type": "display_data" @@ -1086,27 +681,30 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 26, "id": "d73b7085", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-20T14:44:36.874001Z", + "start_time": "2024-04-20T14:44:36.540997Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[1;35m2023-12-18 14:17:26,540\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - The original categorical values `['resource.performedPeriod.start', 'resource.started', 'resource.date', 'resource.custodian.reference', 'resource.maritalStatus.text', 'fullUrl', 'resource.class.code', 'resource.intent', 'resource.authoredOn', 'resource.requester.reference', 'resource.birthDate', 'resource.payment.amount.currency', 'resource.primarySource', 'resource.onsetDateTime', 'resource.gender', 'resource.deceasedDateTime', 'resource.created', 'resource.effectiveDateTime', 'resource.recordedDate', 'resource.valueQuantity.code', 'resource.location.reference', 'resource.medicationCodeableConcept.text', 'resource.outcome', 'resource.subject.reference', 'resource.context.period.start', 'resource.billablePeriod.end', 'resource.issued', 'resource.abatementDateTime', 'resource.provider.reference', 'resource.valueQuantity.unit', 'resource.claim.reference', 'resource.prescription.reference', 'resource.valueCodeableConcept.text', 'resource.context.period.end', 'resource.status', 'resource.multipleBirthBoolean', 'resource.performedPeriod.end', 'patientId', 'resource.period.end', 'resource.medicationReference.reference', 'request.url', 'resource.patient.reference', 'resource.serviceProvider.reference', 'resource.vaccineCode.text', 'resource.occurrenceDateTime', 'resource.total.currency', 'resource.use', 'resource.valueQuantity.system', 'resource.billablePeriod.start', 'resource.encounter.reference', 'resource.context.reference', 'request.method', 'resource.criticality', 'resource.resourceType', 'resource.text.status', 'resource.referral.reference', 'resource.facility.reference', 'resource.period.start', 'resource.class.system', 'resource.code.text']` were added to uns.\u001b[0m\n" + "\u001B[1;35m2024-04-20 16:44:36,559\u001B[0m - \u001B[1;34mroot\u001B[0m \u001B[1;37mINFO - The original categorical values `['fullUrl', 'resource.resourceType', 'resource.text.status', 'resource.gender', 'resource.birthDate', 'resource.maritalStatus.text', 'resource.multipleBirthBoolean', 'request.method', 'request.url', 'resource.status', 'resource.class.system', 'resource.class.code', 'resource.subject.reference', 'resource.period.start', 'resource.period.end', 'resource.serviceProvider.reference', 'resource.code.text', 'resource.encounter.reference', 'resource.onsetDateTime', 'resource.recordedDate', 'resource.effectiveDateTime', 'resource.issued', 'resource.date', 'resource.custodian.reference', 'resource.context.period.start', 'resource.context.period.end', 'resource.use', 'resource.patient.reference', 'resource.billablePeriod.start', 'resource.billablePeriod.end', 'resource.created', 'resource.provider.reference', 'resource.facility.reference', 'resource.total.currency', 'resource.referral.reference', 'resource.claim.reference', 'resource.outcome', 'resource.payment.amount.currency', 'resource.abatementDateTime', 'resource.distinctIdentifier', 'resource.manufactureDate', 'resource.expirationDate', 'resource.lotNumber', 'resource.serialNumber', 'resource.type.text', 'resource.valueQuantity.unit', 'resource.valueQuantity.system', 'resource.valueQuantity.code', 'resource.valueCodeableConcept.text', 'resource.performedPeriod.start', 'resource.performedPeriod.end', 'resource.location.reference', 'resource.vaccineCode.text', 'resource.occurrenceDateTime', 'resource.primarySource', 'resource.valueString', 'resource.intent', 'resource.medicationCodeableConcept.text', 'resource.authoredOn', 'resource.requester.reference', 'resource.prescription.reference', 'resource.recorded', 'patientId', 'resource.deceasedDateTime', 'resource.suppliedItem.itemCodeableConcept.text']` were added to uns.\u001B[0m\n" ] }, { "data": { + "text/plain": "Output()", "application/vnd.jupyter.widget-view+json": { - "model_id": "db6f642183724de88a17a52275af46c5", "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Output()" - ] + "version_minor": 0, + "model_id": "0480af17d59640328d3bbb509c2ba869" + } }, "metadata": {}, "output_type": "display_data" @@ -1115,45 +713,32 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[1;35m2023-12-18 14:17:29,908\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Encoding strings in X to save to .h5ad. Loading the file will reverse the encoding.\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35m2023-12-18 14:17:29,996\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Updated the original layer after encoding.\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35m2023-12-18 14:17:32,026\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - The original categorical values `['resource.performedPeriod.start', 'resource.started', 'resource.date', 'resource.custodian.reference', 'resource.maritalStatus.text', 'fullUrl', 'resource.class.code', 'resource.intent', 'resource.authoredOn', 'resource.requester.reference', 'resource.birthDate', 'resource.payment.amount.currency', 'resource.primarySource', 'resource.onsetDateTime', 'resource.gender', 'resource.deceasedDateTime', 'resource.created', 'resource.effectiveDateTime', 'resource.recordedDate', 'resource.valueQuantity.code', 'resource.location.reference', 'resource.medicationCodeableConcept.text', 'resource.outcome', 'resource.subject.reference', 'resource.context.period.start', 'resource.billablePeriod.end', 'resource.issued', 'resource.abatementDateTime', 'resource.provider.reference', 'resource.valueQuantity.unit', 'resource.claim.reference', 'resource.prescription.reference', 'resource.valueCodeableConcept.text', 'resource.context.period.end', 'resource.status', 'resource.multipleBirthBoolean', 'resource.performedPeriod.end', 'patientId', 'resource.period.end', 'resource.medicationReference.reference', 'request.url', 'resource.patient.reference', 'resource.serviceProvider.reference', 'resource.vaccineCode.text', 'resource.occurrenceDateTime', 'resource.total.currency', 'resource.use', 'resource.valueQuantity.system', 'resource.billablePeriod.start', 'resource.encounter.reference', 'resource.context.reference', 'request.method', 'resource.criticality', 'resource.resourceType', 'resource.text.status', 'resource.referral.reference', 'resource.facility.reference', 'resource.period.start', 'resource.class.system', 'resource.code.text']` were added to obs.\u001b[0m\n" + "\u001B[1;35m2024-04-20 16:44:36,740\u001B[0m - \u001B[1;34mroot\u001B[0m \u001B[1;37mINFO - Updated the original layer after encoding.\u001B[0m\n" ] }, { "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
+      "text/plain": "",
+      "text/html": "
\n"
      },
      "metadata": {},
      "output_type": "display_data"
     },
     {
      "data": {
-      "text/html": [
-       "
\n",
-       "
\n" - ], - "text/plain": [ - "\n" - ] + "text/plain": "\n", + "text/html": "
\n
\n" }, "metadata": {}, "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/mamba/PycharmProjects/ehrapy/ehrapy/preprocessing/_encoding.py:282: DeprecationWarning: Converting `np.inexact` or `np.floating` to a dtype is deprecated. The current result is `float64` which is not strictly correct.\n", + " encoded_ann_data.X = encoded_ann_data.X.astype(np.number)\n" + ] } ], "source": [ diff --git a/mimic_2_fate.ipynb b/mimic_2_fate.ipynb index c4951df..5ec5b5b 100644 --- a/mimic_2_fate.ipynb +++ b/mimic_2_fate.ipynb @@ -90,20 +90,15 @@ "cell_type": "code", "execution_count": 2, "id": "fa8b61d4", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/zeth/miniconda3/envs/ehrapy/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-20T14:18:40.718206Z", + "start_time": "2024-04-20T14:18:40.715649Z" } - ], + }, + "outputs": [], "source": [ "import ehrapy as ep\n", - "from matplotlib import rcParams\n", "import cellrank as cr\n", "import numpy as np" ] @@ -120,7 +115,12 @@ "cell_type": "code", "execution_count": 3, "id": "4a0e8284", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-20T14:18:40.723074Z", + "start_time": "2024-04-20T14:18:40.718327Z" + } + }, "outputs": [], "source": [ "import warnings\n", @@ -156,23 +156,24 @@ "cell_type": "code", "execution_count": 4, "id": "a44104d0", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-20T14:18:47.657492Z", + "start_time": "2024-04-20T14:18:47.557851Z" + } + }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35m2023-12-19 11:30:31,347\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Transformed passed DataFrame into an AnnData object with n_obs x n_vars = `1776` x `46`.\u001b[0m\n", - "\u001b[1;35m2023-12-19 11:30:31,350\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - The original categorical values `['day_icu_intime', 'service_unit']` were added to uns.\u001b[0m\n", - "\u001b[1;35m2023-12-19 11:30:31,375\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Encoding strings in X to save to .h5ad. Loading the file will reverse the encoding.\u001b[0m\n", - "\u001b[1;35m2023-12-19 11:30:31,381\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Updated the original layer after encoding.\u001b[0m\n", - "\u001b[1;35m2023-12-19 11:30:31,410\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - The original categorical values `['day_icu_intime', 'service_unit']` were added to obs.\u001b[0m\n" - ] + "data": { + "text/plain": "AnnData object with n_obs × n_vars = 1776 × 46\n var: 'ehrapy_column_type'\n layers: 'original'" + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "%%capture\n", - "adata = ep.dt.mimic_2(encoded=True)\n", + "adata = ep.dt.mimic_2(encoded=False)\n", "adata" ] }, @@ -189,25 +190,73 @@ { "cell_type": "code", "execution_count": 5, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[1;35m2024-04-20 16:18:52,518\u001B[0m - \u001B[1;34mroot\u001B[0m \u001B[1;37mINFO - Feature types have been inferred and stored in adata.var['feature_type']. PLEASE CHECK and adjust if necessary using adata.var['feature_type']['feature1']='corrected_type'.\u001B[0m\n" + ] + }, + { + "data": { + "text/plain": "\u001B[1m Detected feature types for AnnData object with 1776 obs and 46 vars\u001B[0m\n╠══ 📅\u001B[1m Date features\u001B[0m\n╠══ 📐\u001B[1m Numerical features\u001B[0m\n║ ╠══ abg_count\n║ ╠══ age\n║ ╠══ bmi\n║ ╠══ bun_first\n║ ╠══ chloride_first\n║ ╠══ creatinine_first\n║ ╠══ hgb_first\n║ ╠══ hospital_los_day\n║ ╠══ hr_1st\n║ ╠══ icu_los_day\n║ ╠══ iv_day_1\n║ ╠══ map_1st\n║ ╠══ mort_day_censored\n║ ╠══ pco2_first\n║ ╠══ platelet_first\n║ ╠══ po2_first\n║ ╠══ potassium_first\n║ ╠══ sapsi_first\n║ ╠══ sodium_first\n║ ╠══ sofa_first\n║ ╠══ spo2_1st\n║ ╠══ tco2_first\n║ ╠══ temp_1st\n║ ╠══ wbc_first\n║ ╚══ weight_first\n╚══ 🗂️\u001B[1m Categorical features\u001B[0m\n ╠══ afib_flg (2 categories)\n ╠══ aline_flg (2 categories)\n ╠══ cad_flg (2 categories)\n ╠══ censor_flg (2 categories)\n ╠══ chf_flg (2 categories)\n ╠══ copd_flg (2 categories)\n ╠══ day_28_flg (2 categories)\n ╠══ day_icu_intime (7 categories)\n ╠══ day_icu_intime_num (7 categories)\n ╠══ gender_num (2 categories)\n ╠══ hosp_exp_flg (2 categories)\n ╠══ hour_icu_intime (24 categories)\n ╠══ icu_exp_flg (2 categories)\n ╠══ liver_flg (2 categories)\n ╠══ mal_flg (2 categories)\n ╠══ renal_flg (2 categories)\n ╠══ resp_flg (2 categories)\n ╠══ sepsis_flg (1 categories)\n ╠══ service_num (2 categories)\n ╠══ service_unit (3 categories)\n ╚══ stroke_flg (2 categories)\n", + "text/html": "
 Detected feature types for AnnData object with 1776 obs and 46 vars\n╠══ 📅 Date features\n╠══ 📐 Numerical features\n║   ╠══ abg_count\n║   ╠══ age\n║   ╠══ bmi\n║   ╠══ bun_first\n║   ╠══ chloride_first\n║   ╠══ creatinine_first\n║   ╠══ hgb_first\n║   ╠══ hospital_los_day\n║   ╠══ hr_1st\n║   ╠══ icu_los_day\n║   ╠══ iv_day_1\n║   ╠══ map_1st\n║   ╠══ mort_day_censored\n║   ╠══ pco2_first\n║   ╠══ platelet_first\n║   ╠══ po2_first\n║   ╠══ potassium_first\n║   ╠══ sapsi_first\n║   ╠══ sodium_first\n║   ╠══ sofa_first\n║   ╠══ spo2_1st\n║   ╠══ tco2_first\n║   ╠══ temp_1st\n║   ╠══ wbc_first\n║   ╚══ weight_first\n╚══ 🗂️ Categorical features\n    ╠══ afib_flg (2 categories)\n    ╠══ aline_flg (2 categories)\n    ╠══ cad_flg (2 categories)\n    ╠══ censor_flg (2 categories)\n    ╠══ chf_flg (2 categories)\n    ╠══ copd_flg (2 categories)\n    ╠══ day_28_flg (2 categories)\n    ╠══ day_icu_intime (7 categories)\n    ╠══ day_icu_intime_num (7 categories)\n    ╠══ gender_num (2 categories)\n    ╠══ hosp_exp_flg (2 categories)\n    ╠══ hour_icu_intime (24 categories)\n    ╠══ icu_exp_flg (2 categories)\n    ╠══ liver_flg (2 categories)\n    ╠══ mal_flg (2 categories)\n    ╠══ renal_flg (2 categories)\n    ╠══ resp_flg (2 categories)\n    ╠══ sepsis_flg (1 categories)\n    ╠══ service_num (2 categories)\n    ╠══ service_unit (3 categories)\n    ╚══ stroke_flg (2 categories)\n
\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ep.ad.infer_feature_types(adata, output=\"print\")" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-04-20T14:18:52.532063Z", + "start_time": "2024-04-20T14:18:52.486431Z" + } + }, + "id": "88151a13ed556af2" + }, + { + "cell_type": "code", + "execution_count": 6, "id": "f055282a-0df0-4c08-9f3d-0e5cb0c25c78", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-20T14:19:01.632120Z", + "start_time": "2024-04-20T14:18:55.593397Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[1;35m2023-12-19 11:30:33,160\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Values in columns ['iv_day_1', 'po2_first'] were replaced by [[5.28320373 7.71059732]\n", + "\u001B[1;35m2024-04-20 16:18:55,591\u001B[0m - \u001B[1;34mroot\u001B[0m \u001B[1;37mINFO - The original categorical values `['service_unit', 'day_icu_intime']` were added to uns.\u001B[0m\n", + "\u001B[1;35m2024-04-20 16:18:55,606\u001B[0m - \u001B[1;34mroot\u001B[0m \u001B[1;37mINFO - Updated the original layer after encoding.\u001B[0m\n", + "\u001B[1;35m2024-04-20 16:18:55,615\u001B[0m - \u001B[1;34mroot\u001B[0m \u001B[1;37mINFO - The original categorical values `['service_unit', 'day_icu_intime']` were added to obs.\u001B[0m\n", + "\u001B[1;35m2024-04-20 16:18:55,870\u001B[0m - \u001B[1;34mroot\u001B[0m \u001B[1;37mINFO - Values in columns ['iv_day_1', 'po2_first'] were replaced by [[5.28320373 7.71059732]\n", " [5.19739145 6.39859493]\n", " [5.70044357 7.6438663 ]\n", " ...\n", " [5.75700724 5.06575459]\n", " [4.44265126 3.1254439 ]\n", - " [6.35437004 8.38228943]].\u001b[0m\n" + " [6.35437004 8.38228943]].\u001B[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.\n" ] } ], "source": [ "%%capture\n", + "adata = ep.pp.encode(adata, autodetect=True)\n", "ep.pp.knn_impute(adata, n_neighbours=5)\n", "ep.pp.log_norm(adata, vars=[\"iv_day_1\", \"po2_first\"], offset=1)\n", "ep.pp.pca(adata)\n", @@ -1152,7 +1201,7 @@ "\n" ], "text/plain": [ - "\u001b[1;33mUnable to fetch versions for one or more dependencies.\u001b[0m\n" + "\u001B[1;33mUnable to fetch versions for one or more dependencies.\u001B[0m\n" ] }, "metadata": {}, diff --git a/mimic_2_introduction.ipynb b/mimic_2_introduction.ipynb index 6ec60f8..a9010ff 100644 --- a/mimic_2_introduction.ipynb +++ b/mimic_2_introduction.ipynb @@ -88,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "ea172c3a-e8aa-469a-ba7c-5d4730fee1e0", "metadata": { "tags": [] @@ -124,26 +124,19 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 3, "id": "2b467275", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35m2023-12-18 16:18:17,206\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Transformed passed DataFrame into an AnnData object with n_obs x n_vars = `1776` x `46`.\u001b[0m\n" - ] - }, { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 1776 × 46\n", - " uns: 'numerical_columns', 'non_numerical_columns'\n", + " var: 'ehrapy_column_type'\n", " layers: 'original'" ] }, - "execution_count": 4, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -184,7 +177,146 @@ "id": "8923e25b", "metadata": {}, "source": [ - "The dataset contains 46 features as previously mentioned. However, not all features are numerical as some are categorical. Such categorical features need an encoding. We first identify the categorical (non-numerical) features in our dataset." + "The dataset contains 46 features, as previously mentioned. We distinguish between numerical, categorical, and date features. First, we need to clarify which features fall into what category. Ehrapy offers the method `infer_feature_types` for this, which guesses the type of each feature. It is important to always check that the type guesses are correct, and if not, to correct them." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2faa2b71-e769-486f-930c-88456cf9ea2e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1;35m2024-04-20 15:47:14,929\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Feature types have been inferred and stored in adata.var['feature_type']. PLEASE CHECK and adjust if necessary using adata.var['feature_type']['feature1']='corrected_type'.\u001b[0m\n" + ] + }, + { + "data": { + "text/html": [ + "
 Detected feature types for AnnData object with 1776 obs and 46 vars\n",
+       "╠══ 📅 Date features\n",
+       "╠══ 📐 Numerical features\n",
+       "║   ╠══ abg_count\n",
+       "║   ╠══ age\n",
+       "║   ╠══ bmi\n",
+       "║   ╠══ bun_first\n",
+       "║   ╠══ chloride_first\n",
+       "║   ╠══ creatinine_first\n",
+       "║   ╠══ hgb_first\n",
+       "║   ╠══ hospital_los_day\n",
+       "║   ╠══ hr_1st\n",
+       "║   ╠══ icu_los_day\n",
+       "║   ╠══ iv_day_1\n",
+       "║   ╠══ map_1st\n",
+       "║   ╠══ mort_day_censored\n",
+       "║   ╠══ pco2_first\n",
+       "║   ╠══ platelet_first\n",
+       "║   ╠══ po2_first\n",
+       "║   ╠══ potassium_first\n",
+       "║   ╠══ sapsi_first\n",
+       "║   ╠══ sodium_first\n",
+       "║   ╠══ sofa_first\n",
+       "║   ╠══ spo2_1st\n",
+       "║   ╠══ tco2_first\n",
+       "║   ╠══ temp_1st\n",
+       "║   ╠══ wbc_first\n",
+       "║   ╚══ weight_first\n",
+       "╚══ 🗂️ Categorical features\n",
+       "    ╠══ afib_flg (2 categories)\n",
+       "    ╠══ aline_flg (2 categories)\n",
+       "    ╠══ cad_flg (2 categories)\n",
+       "    ╠══ censor_flg (2 categories)\n",
+       "    ╠══ chf_flg (2 categories)\n",
+       "    ╠══ copd_flg (2 categories)\n",
+       "    ╠══ day_28_flg (2 categories)\n",
+       "    ╠══ day_icu_intime (7 categories)\n",
+       "    ╠══ day_icu_intime_num (7 categories)\n",
+       "    ╠══ gender_num (2 categories)\n",
+       "    ╠══ hosp_exp_flg (2 categories)\n",
+       "    ╠══ hour_icu_intime (24 categories)\n",
+       "    ╠══ icu_exp_flg (2 categories)\n",
+       "    ╠══ liver_flg (2 categories)\n",
+       "    ╠══ mal_flg (2 categories)\n",
+       "    ╠══ renal_flg (2 categories)\n",
+       "    ╠══ resp_flg (2 categories)\n",
+       "    ╠══ sepsis_flg (1 categories)\n",
+       "    ╠══ service_num (2 categories)\n",
+       "    ╠══ service_unit (3 categories)\n",
+       "    ╚══ stroke_flg (2 categories)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Detected feature types for AnnData object with 1776 obs and 46 vars\u001b[0m\n", + "╠══ 📅\u001b[1m Date features\u001b[0m\n", + "╠══ 📐\u001b[1m Numerical features\u001b[0m\n", + "║ ╠══ abg_count\n", + "║ ╠══ age\n", + "║ ╠══ bmi\n", + "║ ╠══ bun_first\n", + "║ ╠══ chloride_first\n", + "║ ╠══ creatinine_first\n", + "║ ╠══ hgb_first\n", + "║ ╠══ hospital_los_day\n", + "║ ╠══ hr_1st\n", + "║ ╠══ icu_los_day\n", + "║ ╠══ iv_day_1\n", + "║ ╠══ map_1st\n", + "║ ╠══ mort_day_censored\n", + "║ ╠══ pco2_first\n", + "║ ╠══ platelet_first\n", + "║ ╠══ po2_first\n", + "║ ╠══ potassium_first\n", + "║ ╠══ sapsi_first\n", + "║ ╠══ sodium_first\n", + "║ ╠══ sofa_first\n", + "║ ╠══ spo2_1st\n", + "║ ╠══ tco2_first\n", + "║ ╠══ temp_1st\n", + "║ ╠══ wbc_first\n", + "║ ╚══ weight_first\n", + "╚══ 🗂️\u001b[1m Categorical features\u001b[0m\n", + " ╠══ afib_flg (2 categories)\n", + " ╠══ aline_flg (2 categories)\n", + " ╠══ cad_flg (2 categories)\n", + " ╠══ censor_flg (2 categories)\n", + " ╠══ chf_flg (2 categories)\n", + " ╠══ copd_flg (2 categories)\n", + " ╠══ day_28_flg (2 categories)\n", + " ╠══ day_icu_intime (7 categories)\n", + " ╠══ day_icu_intime_num (7 categories)\n", + " ╠══ gender_num (2 categories)\n", + " ╠══ hosp_exp_flg (2 categories)\n", + " ╠══ hour_icu_intime (24 categories)\n", + " ╠══ icu_exp_flg (2 categories)\n", + " ╠══ liver_flg (2 categories)\n", + " ╠══ mal_flg (2 categories)\n", + " ╠══ renal_flg (2 categories)\n", + " ╠══ resp_flg (2 categories)\n", + " ╠══ sepsis_flg (1 categories)\n", + " ╠══ service_num (2 categories)\n", + " ╠══ service_unit (3 categories)\n", + " ╚══ stroke_flg (2 categories)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ep.ad.infer_feature_types(adata)" + ] + }, + { + "cell_type": "markdown", + "id": "572738a8-063d-4b14-a9f9-611af4572483", + "metadata": {}, + "source": [ + "When checking the detected feature types above, we find that for this dataset, all features were detected correctly. If you find that for your dataset this isn't the case, you can correct the feature type using `adata.var['feature_type']['feature1'] = 'correct_type'`.\n", + "\n", + "Categorical features could either already be stored numerically (e.g., as `0`/`1` for flags) or as another type such as strings. Such categorical features need an encoding. We first identify the categorical features stored non-numerically in our dataset:" ] }, { @@ -195,8 +327,48 @@ "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ehrapy_column_typefeature_type
service_unitnon_numericcategorical
day_icu_intimenon_numericcategorical
\n", + "
" + ], "text/plain": [ - "['day_icu_intime', 'service_unit']" + " ehrapy_column_type feature_type\n", + "service_unit non_numeric categorical\n", + "day_icu_intime non_numeric categorical" ] }, "execution_count": 5, @@ -213,7 +385,7 @@ "id": "de9e47de", "metadata": {}, "source": [ - "We identified **service_unit** and **day_icu_intime** as categorical features. We will therefore encode them first with one-hot encoding. This ensures that no ordering is preserved for the respective features. ehrapy also offers other [encoding](https://ehrapy.readthedocs.io/en/latest/usage/usage.html#encoding) functions." + "We identified **service_unit** and **day_icu_intime** as categorical features stored non-numerically. We will therefore encode them first with one-hot encoding. This ensures that no ordering is preserved for the respective features. ehrapy also offers other [encoding](https://ehrapy.readthedocs.io/en/latest/usage/usage.html#encoding) functions." ] }, { @@ -228,13 +400,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[1;35m2023-12-18 16:18:23,495\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - The original categorical values `['service_unit', 'day_icu_intime']` were added to uns.\u001b[0m\n" + "\u001b[1;35m2024-04-20 15:47:33,242\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - The original categorical values `['service_unit', 'day_icu_intime']` were added to uns.\u001b[0m\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "764a7c1530eb4072a57a03e06ca1fe7e", + "model_id": "a7e18efe2d2844f2a62255ce6eaf0399", "version_major": 2, "version_minor": 0 }, @@ -272,9 +444,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[1;35m2023-12-18 16:18:23,647\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Updated the original layer after encoding.\u001b[0m\n", - "\u001b[1;35m2023-12-18 16:18:23,703\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Added `['service_unit', 'day_icu_intime']` columns to `X`.\u001b[0m\n", - "\u001b[1;35m2023-12-18 16:18:23,727\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - The original categorical values `['service_unit', 'day_icu_intime']` were added to obs.\u001b[0m\n" + "\u001b[1;35m2024-04-20 15:47:33,272\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - Updated the original layer after encoding.\u001b[0m\n", + "\u001b[1;35m2024-04-20 15:47:33,279\u001b[0m - \u001b[1;34mroot\u001b[0m \u001b[1;37mINFO - The original categorical values `['service_unit', 'day_icu_intime']` were added to obs.\u001b[0m\n" ] } ], @@ -295,7 +466,8 @@ "text/plain": [ "AnnData object with n_obs × n_vars = 1776 × 54\n", " obs: 'service_unit', 'day_icu_intime'\n", - " uns: 'numerical_columns', 'non_numerical_columns', 'encoding_to_var', 'original_values_categoricals', 'var_to_encoding', 'encoded_non_numerical_columns'\n", + " var: 'ehrapy_column_type', 'feature_type'\n", + " uns: 'encoding_to_var', 'original_values_categoricals', 'var_to_encoding'\n", " layers: 'original'" ] }, @@ -4239,7 +4411,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.10.13" } }, "nbformat": 4,