Skip to content

Commit

Permalink
Notebook updates
Browse files Browse the repository at this point in the history
  • Loading branch information
ianfore committed Jan 22, 2021
1 parent 3b787f8 commit 8b207a5
Show file tree
Hide file tree
Showing 4 changed files with 248 additions and 7 deletions.
16 changes: 13 additions & 3 deletions notebooks/FASPNotebook10.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<Response [404]>\n"
]
}
],
"source": [
"wesClient.GetRunLog('5baa8d0be4b0db63859e6843')"
]
},
{
"cell_type": "code",
Expand Down
6 changes: 3 additions & 3 deletions notebooks/GTEX_TCGA_Federated_Analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"The following sets up two Clients to Search data. Note that at the moment one of these is a placeholder to search a local file. That file contains file ids downloaded as a manifest from the Gen3 Anvil portal. Note that DRS ids are prefixed with CURIEs (crdc for the Cancer Research Data Commons and anv for Anvil). This indicates which namespace the ids come from and allows the referenced file to be retrieved from the correct DRS server. "
"The following sets up two Clients to Search data. Note that at the moment one of these is a placeholder to search a local file. That file contains file ids downloaded as a manifest from the Gen3 Anvil portal. That list of files in that manifest had already been filtered to relevant samples. Note that the DRS ids prefixed with CURIEs (crdc for the Cancer Research Data Commons and anv for Anvil). This indicates which namespace the ids come from and allows the referenced file to be retrieved from the correct DRS server. In the case of the gtex manifest file the anv: prefix was added in an edited version of the file."
]
},
{
Expand Down Expand Up @@ -78,8 +78,8 @@
" limit 3\"\"\"\t\t\n",
"\n",
"\n",
"# Run both queriues and aggregate results\n",
"results = discoveryClients['anv'].runQuery(3) # Send the query\n",
"# Run both queries and aggregate results\n",
"results = discoveryClients['anv'].runQuery(3) # Send the query for the first three items\n",
"results += discoveryClients['crdc'].runQuery(crdcquery) \n",
"results\n"
]
Expand Down
228 changes: 228 additions & 0 deletions notebooks/registry/DemoForLifebit.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Scripts run live during hackathon session to illustrate use of Search and DRS relevant to using those GA4GH APIs from with a NextFlow workflow.\n",
"\n",
"22 Jan 2021"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Searching the GA4GH registry for drs services\n"
]
},
{
"data": {
"text/plain": [
"{'timestamp': '2021-01-22T15:55:24Z',\n",
" 'status': 400,\n",
" 'error': 'Bad Request',\n",
" 'message': \"Could not instantiate ServiceType from 'type' string\",\n",
" 'path': '/v1/services'}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from fasp.loc import GA4GHRegistryClient\n",
"cl = GA4GHRegistryClient('https://registry.ga4gh.org/v1')\n",
"services = cl.getRegisteredServices(type='drs')\n",
"services"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "If using all scalar values, you must pass an index",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-4-c147e6c2fc51>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mservices\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mfrom_dict\u001b[0;34m(cls, data, orient, dtype, columns)\u001b[0m\n\u001b[1;32m 1307\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"only recognize index or columns for orient\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1308\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1309\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1310\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1311\u001b[0m def to_numpy(\n",
"\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 466\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 467\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 468\u001b[0;31m \u001b[0mmgr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minit_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 469\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMaskedArray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 470\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmrecords\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mmrecords\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/internals/construction.py\u001b[0m in \u001b[0;36minit_dict\u001b[0;34m(data, index, columns, dtype)\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0marr\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_datetime64tz_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0marr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m ]\n\u001b[0;32m--> 283\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marrays_to_mgr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 284\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/internals/construction.py\u001b[0m in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, arr_names, index, columns, dtype, verify_integrity)\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[0;31m# figure out the index, if necessary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mextract_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 79\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/internals/construction.py\u001b[0m in \u001b[0;36mextract_index\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 385\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mindexes\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mraw_lengths\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 387\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"If using all scalar values, you must pass an index\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 388\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 389\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhave_series\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mValueError\u001b[0m: If using all scalar values, you must pass an index"
]
}
],
"source": [
"import pandas as pd\n",
"df = pd.DataFrame.from_dict(services)\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from fasp.loc import crdcDRSClient\n",
"cl = crdcDRSClient('~/.keys/crdc_credentials.json', access_id='gs')"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'access_methods': [{'access_id': 'gs',\n",
" 'access_url': {'url': 'gs://gdc-tcga-phs000178-controlled/00589653-5840-4c11-8572-5aa7d00a73f8/TCGA-A1-A0SD-01A-11D-A10Y-09_IlluminaGA-DNASeq_exome_gdc_realn.bam'},\n",
" 'region': '',\n",
" 'type': 'gs'},\n",
" {'access_id': 's3',\n",
" 'access_url': {'url': 's3://tcga-2-controlled/00589653-5840-4c11-8572-5aa7d00a73f8/TCGA-A1-A0SD-01A-11D-A10Y-09_IlluminaGA-DNASeq_exome_gdc_realn.bam'},\n",
" 'region': '',\n",
" 'type': 's3'}],\n",
" 'aliases': [],\n",
" 'checksums': [{'checksum': 'c9bd036984e77df13990b77ce1d33245',\n",
" 'type': 'md5'}],\n",
" 'contents': [],\n",
" 'created_time': '2018-06-26T12:58:10.724877',\n",
" 'description': None,\n",
" 'form': 'object',\n",
" 'id': '00589653-5840-4c11-8572-5aa7d00a73f8',\n",
" 'mime_type': 'application/json',\n",
" 'name': None,\n",
" 'self_uri': 'drs://nci-crdc.datacommons.io/00589653-5840-4c11-8572-5aa7d00a73f8',\n",
" 'size': 15897657241,\n",
" 'updated_time': '2018-06-26T12:58:10.724888',\n",
" 'version': '111e84ad'}"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cl.getObject('00589653-5840-4c11-8572-5aa7d00a73f8')"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'https://storage.googleapis.com/gdc-tcga-phs000178-controlled/00589653-5840-4c11-8572-5aa7d00a73f8/TCGA-A1-A0SD-01A-11D-A10Y-09_IlluminaGA-DNASeq_exome_gdc_realn.bam?GoogleAccessId=forei-2417@dcf-prod.iam.gserviceaccount.com&Expires=1611336405&Signature=rO%2B0vJ09k5xyG5U6TbWS1E29zHKE2BMZHDztxPBxdZzPUYS%2BgPjt0uQ9R1SLrNYWRcwuIv%2BdajcseHeqaxDK%2BlYDY7LvHHXjfnLuw7CdlJdS0L48tfl8lHW4wvRj3LlvH8o1cPE1GXtH%2BXOjAEZCYNJW5QLAZVu%2BZzNZTdxxdm7hGCyY6E4vQGkKBM%2FJSfWM5YxYy2zSMPhnFFrZhTWyc8UDx%2FeBcsvRMO4cOd00SCkpTO%2BQcR%2B80BUCt0oSiw6nz3Q1dFBfl99xbTXRGc%2FbyzR3TPTYPKi9XeqXdDN1IDw54knW7M9lGvHEjX4orQ3P%2BDtYiAITeE1cN3D%2BmQpVEg=='"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gcpurl = cl.getAccessURL('00589653-5840-4c11-8572-5aa7d00a73f8')\n",
"gcpurl"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'https://tcga-2-controlled.s3.amazonaws.com/00589653-5840-4c11-8572-5aa7d00a73f8/TCGA-A1-A0SD-01A-11D-A10Y-09_IlluminaGA-DNASeq_exome_gdc_realn.bam?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAINBJ6QVTSWMR7UZQ%2F20210122%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20210122T162616Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&user_id=2417&username=forei&X-Amz-Signature=266218ddd4ce2cb64d0787318643dbdf1b73d8c547dfbbebff1d339f5bb2b2b4'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"url = cl.getAccessURL('00589653-5840-4c11-8572-5aa7d00a73f8',access_id='s3')\n",
"url"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"\n",
"from fasp.workflow import DNAStackWESClient\n",
"wesClient = DNAStackWESClient('~/.keys/dnastack_wes_credentials.json')\n",
"run_id = wesClient.runWorkflow('gcpurl', 'test.txt')"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'EXECUTOR_ERROR'"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wesClient.getTaskStatus(run_id)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
5 changes: 4 additions & 1 deletion notebooks/search/NCPI_FHIR_approaches.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"#TODO Do the same query directly via FHIR. Is it any easier to specify the query and to unpack the results?"
"#TODO Do the same query directly via FHIR. Is it any easier to specify the query and to unpack the results?\n",
"\n",
"Example query for DRS ids from Jim\n",
"https://ncpi-api-fhir-service-dev.kidsfirstdrc.org/DocumentReference?_format=json&_profile=http://fhir.ncpi-project-forge.io/StructureDefinition/ncpi-drs-document-reference&subject:Patient._has:Condition:patient:code=NCIT:C50715\n"
]
},
{
Expand Down

0 comments on commit 8b207a5

Please sign in to comment.