diff --git a/notebooks/FASPNotebook10.ipynb b/notebooks/FASPNotebook10.ipynb index eff4cb2..4eadf50 100644 --- a/notebooks/FASPNotebook10.ipynb +++ b/notebooks/FASPNotebook10.ipynb @@ -106,10 +106,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "wesClient.GetRunLog('5baa8d0be4b0db63859e6843')" + ] }, { "cell_type": "code", diff --git a/notebooks/GTEX_TCGA_Federated_Analysis.ipynb b/notebooks/GTEX_TCGA_Federated_Analysis.ipynb index 2f583d0..8faddbc 100644 --- a/notebooks/GTEX_TCGA_Federated_Analysis.ipynb +++ b/notebooks/GTEX_TCGA_Federated_Analysis.ipynb @@ -31,7 +31,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The following sets up two Clients to Search data. Note that at the moment one of these is a placeholder to search a local file. That file contains file ids downloaded as a manifest from the Gen3 Anvil portal. Note that DRS ids are prefixed with CURIEs (crdc for the Cancer Research Data Commons and anv for Anvil). This indicates which namespace the ids come from and allows the referenced file to be retrieved from the correct DRS server. " + "The following sets up two Clients to Search data. Note that at the moment one of these is a placeholder to search a local file. That file contains file ids downloaded as a manifest from the Gen3 Anvil portal. That list of files in that manifest had already been filtered to relevant samples. Note that the DRS ids prefixed with CURIEs (crdc for the Cancer Research Data Commons and anv for Anvil). This indicates which namespace the ids come from and allows the referenced file to be retrieved from the correct DRS server. In the case of the gtex manifest file the anv: prefix was added in an edited version of the file." ] }, { @@ -78,8 +78,8 @@ " limit 3\"\"\"\t\t\n", "\n", "\n", - "# Run both queriues and aggregate results\n", - "results = discoveryClients['anv'].runQuery(3) # Send the query\n", + "# Run both queries and aggregate results\n", + "results = discoveryClients['anv'].runQuery(3) # Send the query for the first three items\n", "results += discoveryClients['crdc'].runQuery(crdcquery) \n", "results\n" ] diff --git a/notebooks/registry/DemoForLifebit.ipynb b/notebooks/registry/DemoForLifebit.ipynb new file mode 100644 index 0000000..93d3c78 --- /dev/null +++ b/notebooks/registry/DemoForLifebit.ipynb @@ -0,0 +1,228 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Scripts run live during hackathon session to illustrate use of Search and DRS relevant to using those GA4GH APIs from with a NextFlow workflow.\n", + "\n", + "22 Jan 2021" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Searching the GA4GH registry for drs services\n" + ] + }, + { + "data": { + "text/plain": [ + "{'timestamp': '2021-01-22T15:55:24Z',\n", + " 'status': 400,\n", + " 'error': 'Bad Request',\n", + " 'message': \"Could not instantiate ServiceType from 'type' string\",\n", + " 'path': '/v1/services'}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from fasp.loc import GA4GHRegistryClient\n", + "cl = GA4GHRegistryClient('https://registry.ga4gh.org/v1')\n", + "services = cl.getRegisteredServices(type='drs')\n", + "services" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "If using all scalar values, you must pass an index", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mpandas\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mservices\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0mdf\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mfrom_dict\u001b[0;34m(cls, data, orient, dtype, columns)\u001b[0m\n\u001b[1;32m 1307\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"only recognize index or columns for orient\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1308\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1309\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mcls\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1310\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1311\u001b[0m def to_numpy(\n", + "\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 466\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 467\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 468\u001b[0;31m \u001b[0mmgr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minit_dict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 469\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMaskedArray\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 470\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mma\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmrecords\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mmrecords\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/internals/construction.py\u001b[0m in \u001b[0;36minit_dict\u001b[0;34m(data, index, columns, dtype)\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0marr\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_datetime64tz_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0marr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0marr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0marrays\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m ]\n\u001b[0;32m--> 283\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0marrays_to_mgr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata_names\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 284\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 285\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/internals/construction.py\u001b[0m in \u001b[0;36marrays_to_mgr\u001b[0;34m(arrays, arr_names, index, columns, dtype, verify_integrity)\u001b[0m\n\u001b[1;32m 76\u001b[0m \u001b[0;31m# figure out the index, if necessary\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 78\u001b[0;31m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mextract_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marrays\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 79\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 80\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.7/site-packages/pandas/core/internals/construction.py\u001b[0m in \u001b[0;36mextract_index\u001b[0;34m(data)\u001b[0m\n\u001b[1;32m 385\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 386\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mindexes\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mraw_lengths\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 387\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"If using all scalar values, you must pass an index\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 388\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 389\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhave_series\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: If using all scalar values, you must pass an index" + ] + } + ], + "source": [ + "import pandas as pd\n", + "df = pd.DataFrame.from_dict(services)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from fasp.loc import crdcDRSClient\n", + "cl = crdcDRSClient('~/.keys/crdc_credentials.json', access_id='gs')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'access_methods': [{'access_id': 'gs',\n", + " 'access_url': {'url': 'gs://gdc-tcga-phs000178-controlled/00589653-5840-4c11-8572-5aa7d00a73f8/TCGA-A1-A0SD-01A-11D-A10Y-09_IlluminaGA-DNASeq_exome_gdc_realn.bam'},\n", + " 'region': '',\n", + " 'type': 'gs'},\n", + " {'access_id': 's3',\n", + " 'access_url': {'url': 's3://tcga-2-controlled/00589653-5840-4c11-8572-5aa7d00a73f8/TCGA-A1-A0SD-01A-11D-A10Y-09_IlluminaGA-DNASeq_exome_gdc_realn.bam'},\n", + " 'region': '',\n", + " 'type': 's3'}],\n", + " 'aliases': [],\n", + " 'checksums': [{'checksum': 'c9bd036984e77df13990b77ce1d33245',\n", + " 'type': 'md5'}],\n", + " 'contents': [],\n", + " 'created_time': '2018-06-26T12:58:10.724877',\n", + " 'description': None,\n", + " 'form': 'object',\n", + " 'id': '00589653-5840-4c11-8572-5aa7d00a73f8',\n", + " 'mime_type': 'application/json',\n", + " 'name': None,\n", + " 'self_uri': 'drs://nci-crdc.datacommons.io/00589653-5840-4c11-8572-5aa7d00a73f8',\n", + " 'size': 15897657241,\n", + " 'updated_time': '2018-06-26T12:58:10.724888',\n", + " 'version': '111e84ad'}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cl.getObject('00589653-5840-4c11-8572-5aa7d00a73f8')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://storage.googleapis.com/gdc-tcga-phs000178-controlled/00589653-5840-4c11-8572-5aa7d00a73f8/TCGA-A1-A0SD-01A-11D-A10Y-09_IlluminaGA-DNASeq_exome_gdc_realn.bam?GoogleAccessId=forei-2417@dcf-prod.iam.gserviceaccount.com&Expires=1611336405&Signature=rO%2B0vJ09k5xyG5U6TbWS1E29zHKE2BMZHDztxPBxdZzPUYS%2BgPjt0uQ9R1SLrNYWRcwuIv%2BdajcseHeqaxDK%2BlYDY7LvHHXjfnLuw7CdlJdS0L48tfl8lHW4wvRj3LlvH8o1cPE1GXtH%2BXOjAEZCYNJW5QLAZVu%2BZzNZTdxxdm7hGCyY6E4vQGkKBM%2FJSfWM5YxYy2zSMPhnFFrZhTWyc8UDx%2FeBcsvRMO4cOd00SCkpTO%2BQcR%2B80BUCt0oSiw6nz3Q1dFBfl99xbTXRGc%2FbyzR3TPTYPKi9XeqXdDN1IDw54knW7M9lGvHEjX4orQ3P%2BDtYiAITeE1cN3D%2BmQpVEg=='" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gcpurl = cl.getAccessURL('00589653-5840-4c11-8572-5aa7d00a73f8')\n", + "gcpurl" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://tcga-2-controlled.s3.amazonaws.com/00589653-5840-4c11-8572-5aa7d00a73f8/TCGA-A1-A0SD-01A-11D-A10Y-09_IlluminaGA-DNASeq_exome_gdc_realn.bam?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAINBJ6QVTSWMR7UZQ%2F20210122%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20210122T162616Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&user_id=2417&username=forei&X-Amz-Signature=266218ddd4ce2cb64d0787318643dbdf1b73d8c547dfbbebff1d339f5bb2b2b4'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "url = cl.getAccessURL('00589653-5840-4c11-8572-5aa7d00a73f8',access_id='s3')\n", + "url" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from fasp.workflow import DNAStackWESClient\n", + "wesClient = DNAStackWESClient('~/.keys/dnastack_wes_credentials.json')\n", + "run_id = wesClient.runWorkflow('gcpurl', 'test.txt')" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'EXECUTOR_ERROR'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "wesClient.getTaskStatus(run_id)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/search/NCPI_FHIR_approaches.ipynb b/notebooks/search/NCPI_FHIR_approaches.ipynb index 880fbd8..95a857d 100644 --- a/notebooks/search/NCPI_FHIR_approaches.ipynb +++ b/notebooks/search/NCPI_FHIR_approaches.ipynb @@ -77,7 +77,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#TODO Do the same query directly via FHIR. Is it any easier to specify the query and to unpack the results?" + "#TODO Do the same query directly via FHIR. Is it any easier to specify the query and to unpack the results?\n", + "\n", + "Example query for DRS ids from Jim\n", + "https://ncpi-api-fhir-service-dev.kidsfirstdrc.org/DocumentReference?_format=json&_profile=http://fhir.ncpi-project-forge.io/StructureDefinition/ncpi-drs-document-reference&subject:Patient._has:Condition:patient:code=NCIT:C50715\n" ] }, {