diff --git a/sjng/config.json b/sjng/config.json new file mode 100644 index 0000000..2cbac9f --- /dev/null +++ b/sjng/config.json @@ -0,0 +1,66 @@ +{ + "version": "v0.1", + "templates_folder" : "templates", + "output_folder" : "output", + "defaults" : { + "values" : { + "SJNG_SCICAT_URL" : "https://staging.scicat.ess.eu", + "SJNT_INSTANCE_REQUESTER": "Anonymous", + "SJNT_INSTANCE_DESCRIPTION": "Jupyter Notebook auto-generated with SciCat Jupyter Notebook Generator" + }, + "mapping" : { + "scicat_url" : "SJNG_SCICAT_URL" + }, + "auto": { + "SJNT_INSTANCE_DATETIME": "", + "SJNT_INSTANCE_FILE_NAME": ".ipynb", + "SJNT_INSTANCE_NAME": "SciCat generated Jupyter Notebook ", + "SJNT_INSTANCE_ID": "" + } + }, + "templates": [ + { + "id" : "67ddc126-9a7f-11ee-a1ea-3f215a800ed6", + "name" : "Starter with datasets and files", + "file" : "sjnt_67ddc126_9a7f_11ee_a1ea_3f215a800ed6.ipynb", + "parameters" : { + "title": "Parameters starter with datasets and files", + "description": "Parameters for starter template to retrieve specific files from selected datasets", + "type":"object", + "properties":{ + "files":{ + "description":"The list of files that needs to be loaded in the notebook", + "type":"array", + "items": { + "type": "array", + "items":{ + "type":"string", + "minItems":2, + "maxItems":2 + } + } + }, + "token" : { + "description":"Authorization token used to connect to SciCat", + "type":"string" + }, + "requester": { + "description": "username of the user requesting the Jupyter Notebook", + "type" : "string" + } + }, + "required":[ + "files", + "token" + ] + }, + "defaults" : { + "mapping" : { + "files" : "SJNG_DATASETS_FILES", + "token" : "SJNG_SCICAT_TOKEN", + "requester" : "SJNG_INSTANCE_REQUESTER" + } + } + } + ] +} diff --git a/sjng/notebook/sjng-draft-1.ipynb b/sjng/notebook/sjng-draft-1.ipynb new file mode 100644 index 0000000..aa29c1f --- /dev/null +++ b/sjng/notebook/sjng-draft-1.ipynb @@ -0,0 +1,762 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e8e9aede-0ba0-44c6-9fb7-19a9266cac92", + "metadata": {}, + "outputs": [], + "source": [ + "import jsonschema" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a3e5b5c2-3b36-4d72-87e5-12cc83d9fb4b", + "metadata": {}, + "outputs": [], + "source": [ + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f071df20-cab6-4b80-9178-1acad76ae351", + "metadata": {}, + "outputs": [], + "source": [ + "import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "55012686-f1cb-4466-a350-cc80fbce2722", + "metadata": {}, + "outputs": [], + "source": [ + "import uuid" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bdd3db4b-33ae-4b29-ad15-9bed8035d538", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import nbformat as nbf" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4d5afb43-0f3e-49e2-a1d1-01ab67fc2027", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "763b8a01-9be9-4082-b06f-130d7c22e18e", + "metadata": {}, + "outputs": [], + "source": [ + "config_file=\"../config.json\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "4b5cb3cd-d77a-4e1d-bc77-b6bc3448993d", + "metadata": {}, + "outputs": [], + "source": [ + "example_input={\n", + " \"templateId\":\"67ddc126-9a7f-11ee-a1ea-3f215a800ed6\",\n", + " \"parameters\" : {\n", + " \"files\" : [\n", + " [\"20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4\", \"60395-2022-02-28_2215.nxs\"],\n", + " [\"20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4\", \"60394-2022-02-28_2215.nxs\"],\n", + " [\"20.500.12269/74467d52-d70b-489a-ad0b-4e1d4bc04bb2\", \"60390-2022-02-28_2215.nxs\"],\n", + " ],\n", + " \"token\" : \"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJfaWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJ1c2VybmFtZSI6Ik1hc3NpbWlsaWFubyBOb3ZlbGxpIiwiZW1haWwiOiJNYXguTm92ZWxsaUBlc3MuZXUiLCJhdXRoU3RyYXRlZ3kiOiJsZGFwIiwiaWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJ1c2VySWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJpYXQiOjE3MDQzNzgyOTcsImV4cCI6MTcwNDM4MTg5N30.m4TkavIwfYpA4NLaYOCCLleegngjPsYI16LNBoZQrG8\"\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a8efcff5-0385-45f4-9c83-efe37b697721", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "with open(config_file,\"r\") as fh:\n", + " config = json.load(fh) " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "bb5dfb2e-5833-4487-bcb4-833c12aee5d0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'version': 'v0.1',\n", + " 'templates_folder': 'templates',\n", + " 'output_folder': 'output',\n", + " 'defaults': {'values': {'SJNG_SCICAT_URL': 'https://staging.scicat.ess.eu',\n", + " 'SJNT_INSTANCE_REQUESTER': 'Anonymous',\n", + " 'SJNT_INSTANCE_DESCRIPTION': 'Jupyter Notebook auto-generated with SciCat Jupyter Notebook Generator'},\n", + " 'mapping': {'scicat_url': 'SJNG_SCICAT_URL'},\n", + " 'auto': {'SJNT_INSTANCE_DATETIME': '',\n", + " 'SJNT_INSTANCE_FILE_NAME': '.ipynb',\n", + " 'SJNT_INSTANCE_NAME': 'SciCat generated Jupyter Notebook ',\n", + " 'SJNT_INSTANCE_ID': ''}},\n", + " 'templates': [{'id': '67ddc126-9a7f-11ee-a1ea-3f215a800ed6',\n", + " 'name': 'Starter with datasets and files',\n", + " 'file': 'sjnt_67ddc126_9a7f_11ee_a1ea_3f215a800ed6.ipynb',\n", + " 'parameters': {'title': 'Parameters starter with datasets and files',\n", + " 'description': 'Parameters for starter template to retrieve specific files from selected datasets',\n", + " 'type': 'object',\n", + " 'properties': {'files': {'description': 'The list of files that needs to be loaded in the notebook',\n", + " 'type': 'array',\n", + " 'items': {'type': 'array',\n", + " 'items': {'type': 'string', 'minItems': 2, 'maxItems': 2}}},\n", + " 'token': {'description': 'Authorization token used to connect to SciCat',\n", + " 'type': 'string'},\n", + " 'requester': {'description': 'username of the user requesting the Jupyter Notebook',\n", + " 'type': 'string'}},\n", + " 'required': ['files', 'token']},\n", + " 'defaults': {'mapping': {'files': 'SJNG_DATASETS_FILES',\n", + " 'token': 'SJNG_SCICAT_TOKEN',\n", + " 'requester': 'SJNG_INSTANCE_REQUESTER'}}}]}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "750f2b3b-c32b-4209-bf2f-b0521905dcea", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "config[\"templates\"] = {\n", + " template[\"id\"] : template\n", + " for template \n", + " in config[\"templates\"]\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ecaafc0f-5f73-4bf5-a170-1924bcbc3f0e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'version': 'v0.1',\n", + " 'templates_folder': 'templates',\n", + " 'output_folder': 'output',\n", + " 'defaults': {'values': {'SJNG_SCICAT_URL': 'https://staging.scicat.ess.eu',\n", + " 'SJNT_INSTANCE_REQUESTER': 'Anonymous',\n", + " 'SJNT_INSTANCE_DESCRIPTION': 'Jupyter Notebook auto-generated with SciCat Jupyter Notebook Generator'},\n", + " 'mapping': {'scicat_url': 'SJNG_SCICAT_URL'},\n", + " 'auto': {'SJNT_INSTANCE_DATETIME': '',\n", + " 'SJNT_INSTANCE_FILE_NAME': '.ipynb',\n", + " 'SJNT_INSTANCE_NAME': 'SciCat generated Jupyter Notebook ',\n", + " 'SJNT_INSTANCE_ID': ''}},\n", + " 'templates': {'67ddc126-9a7f-11ee-a1ea-3f215a800ed6': {'id': '67ddc126-9a7f-11ee-a1ea-3f215a800ed6',\n", + " 'name': 'Starter with datasets and files',\n", + " 'file': 'sjnt_67ddc126_9a7f_11ee_a1ea_3f215a800ed6.ipynb',\n", + " 'parameters': {'title': 'Parameters starter with datasets and files',\n", + " 'description': 'Parameters for starter template to retrieve specific files from selected datasets',\n", + " 'type': 'object',\n", + " 'properties': {'files': {'description': 'The list of files that needs to be loaded in the notebook',\n", + " 'type': 'array',\n", + " 'items': {'type': 'array',\n", + " 'items': {'type': 'string', 'minItems': 2, 'maxItems': 2}}},\n", + " 'token': {'description': 'Authorization token used to connect to SciCat',\n", + " 'type': 'string'},\n", + " 'requester': {'description': 'username of the user requesting the Jupyter Notebook',\n", + " 'type': 'string'}},\n", + " 'required': ['files', 'token']},\n", + " 'defaults': {'mapping': {'files': 'SJNG_DATASETS_FILES',\n", + " 'token': 'SJNG_SCICAT_TOKEN',\n", + " 'requester': 'SJNG_INSTANCE_REQUESTER'}}}}}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "65c23844-101f-463f-991f-e36c10748a14", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'files': [['20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4',\n", + " '60395-2022-02-28_2215.nxs'],\n", + " ['20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4',\n", + " '60394-2022-02-28_2215.nxs'],\n", + " ['20.500.12269/74467d52-d70b-489a-ad0b-4e1d4bc04bb2',\n", + " '60390-2022-02-28_2215.nxs']],\n", + " 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJfaWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJ1c2VybmFtZSI6Ik1hc3NpbWlsaWFubyBOb3ZlbGxpIiwiZW1haWwiOiJNYXguTm92ZWxsaUBlc3MuZXUiLCJhdXRoU3RyYXRlZ3kiOiJsZGFwIiwiaWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJ1c2VySWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJpYXQiOjE3MDQzNzgyOTcsImV4cCI6MTcwNDM4MTg5N30.m4TkavIwfYpA4NLaYOCCLleegngjPsYI16LNBoZQrG8'}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parameters = example_input[\"parameters\"]\n", + "parameters" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "bdd8883a-2783-4cf9-88f8-320e7a793a23", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "selected_template = config[\"templates\"][example_input[\"templateId\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "7a2127c5-d5f2-4e1f-866c-8151882a3cd5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "jsonschema.validate(instance=parameters,schema=selected_template[\"parameters\"])" + ] + }, + { + "cell_type": "markdown", + "id": "c979c69c-37b2-4b7b-ab51-80349ac72d5a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Check this post for handling the error: \n", + "http://donofden.com/blog/2020/03/15/How-to-Validate-JSON-Schema-using-Python" + ] + }, + { + "cell_type": "markdown", + "id": "2f2af90a-830f-4d18-b436-361b758c3dab", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "Load notebook. \n", + "https://gist.github.com/fperez/9716279" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "f5e397f0-7f62-4544-8071-9955a9c803c4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "parameters_mappings = config[\"defaults\"][\"mapping\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a8f4de4b-f879-4ca9-8bc9-d57a98437024", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "parameters_values = config[\"defaults\"][\"values\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "29c35edc-20c2-4680-a5fc-6eb8f15116db", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "if \"defaults\" in selected_template.keys():\n", + " if \"mapping\" in selected_template[\"defaults\"].keys():\n", + " parameters_mappings = {**parameters_mappings, **selected_template[\"defaults\"][\"mapping\"]}\n", + " if \"values\" in selected_template[\"defaults\"].keys():\n", + " parameters_values = {**parameters_values, **selected_template[\"defaults\"][\"values\"]}" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "f388aead-46ea-4b57-8aaf-89774079e951", + "metadata": {}, + "outputs": [], + "source": [ + "auto_values = {\n", + " \"DATETIME\" : datetime.datetime.isoformat(datetime.datetime.now()),\n", + " \"UUID4\" : str(uuid.uuid4())\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "b57e7117-54e5-4466-9810-8a079f68983c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'DATETIME': '2024-01-04T15:25:47.376916',\n", + " 'UUID4': 'f027c955-73fb-4c80-9439-4f8a2d735432'}" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "auto_values" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "dcb0c3a6-ebc4-4f81-b731-f3e2781c28f9", + "metadata": {}, + "outputs": [], + "source": [ + "default_auto_values = json.dumps(config[\"defaults\"][\"auto\"])\n", + "for key in auto_values.keys(): \n", + " default_auto_values = default_auto_values.replace(\n", + " \"<\" + key + \">\",\n", + " auto_values[key]\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "3325cdb6-6e76-4f6e-8abc-e2d6eac213cb", + "metadata": {}, + "outputs": [], + "source": [ + "parameters_values = {**parameters_values, **json.loads(default_auto_values)}" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "5bb5359e-351b-431b-9d0a-d3536caa648f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'SJNG_SCICAT_URL': 'https://staging.scicat.ess.eu',\n", + " 'SJNT_INSTANCE_REQUESTER': 'Anonymous',\n", + " 'SJNT_INSTANCE_DESCRIPTION': 'Jupyter Notebook auto-generated with SciCat Jupyter Notebook Generator',\n", + " 'SJNT_INSTANCE_DATETIME': '2024-01-04T15:25:47.376916',\n", + " 'SJNT_INSTANCE_FILE_NAME': 'f027c955-73fb-4c80-9439-4f8a2d735432.ipynb',\n", + " 'SJNT_INSTANCE_NAME': 'SciCat generated Jupyter Notebook f027c955-73fb-4c80-9439-4f8a2d735432',\n", + " 'SJNT_INSTANCE_ID': 'f027c955-73fb-4c80-9439-4f8a2d735432'}" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parameters_values" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "2ecabddc-b0f9-49e3-a627-47801bb135aa", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'scicat_url': 'SJNG_SCICAT_URL',\n", + " 'files': 'SJNG_DATASETS_FILES',\n", + " 'token': 'SJNG_SCICAT_TOKEN',\n", + " 'requester': 'SJNG_INSTANCE_REQUESTER'}" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parameters_mappings" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "6a1a9c91-e7c4-44f0-bb2e-2ea7a680959c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "for key in parameters.keys():\n", + " parameters_values[\n", + " parameters_mappings[key] if key in parameters_mappings.keys() else key\n", + " ] = parameters[key]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "904c8191-8ee3-4821-a92a-18a298a229c1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'SJNG_SCICAT_URL': 'https://staging.scicat.ess.eu',\n", + " 'SJNT_INSTANCE_REQUESTER': 'Anonymous',\n", + " 'SJNT_INSTANCE_DESCRIPTION': 'Jupyter Notebook auto-generated with SciCat Jupyter Notebook Generator',\n", + " 'SJNT_INSTANCE_DATETIME': '2024-01-04T15:25:47.376916',\n", + " 'SJNT_INSTANCE_FILE_NAME': 'f027c955-73fb-4c80-9439-4f8a2d735432.ipynb',\n", + " 'SJNT_INSTANCE_NAME': 'SciCat generated Jupyter Notebook f027c955-73fb-4c80-9439-4f8a2d735432',\n", + " 'SJNT_INSTANCE_ID': 'f027c955-73fb-4c80-9439-4f8a2d735432',\n", + " 'SJNG_DATASETS_FILES': [['20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4',\n", + " '60395-2022-02-28_2215.nxs'],\n", + " ['20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4',\n", + " '60394-2022-02-28_2215.nxs'],\n", + " ['20.500.12269/74467d52-d70b-489a-ad0b-4e1d4bc04bb2',\n", + " '60390-2022-02-28_2215.nxs']],\n", + " 'SJNG_SCICAT_TOKEN': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJfaWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJ1c2VybmFtZSI6Ik1hc3NpbWlsaWFubyBOb3ZlbGxpIiwiZW1haWwiOiJNYXguTm92ZWxsaUBlc3MuZXUiLCJhdXRoU3RyYXRlZ3kiOiJsZGFwIiwiaWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJ1c2VySWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJpYXQiOjE3MDQzNzgyOTcsImV4cCI6MTcwNDM4MTg5N30.m4TkavIwfYpA4NLaYOCCLleegngjPsYI16LNBoZQrG8'}" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "parameters_values" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "032f978c-c7c0-46c6-99db-f0f1226b8026", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/nitrosx/repos/scicat-jupyter-notebook-generator/templates/sjnt_67ddc126_9a7f_11ee_a1ea_3f215a800ed6.ipynb'" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "notebook_template_fullpath = os.path.abspath(os.path.join(\n", + " \"..\",\n", + " config[\"templates_folder\"],\n", + " selected_template[\"file\"]\n", + "))\n", + "notebook_template_fullpath" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "f28340e4-9864-4a60-a212-27844661e60f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "with open(notebook_template_fullpath,'r') as fh:\n", + " nb = nbf.read(fh,as_version=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "176fcca2-738f-45d6-9157-013aa630e2f8", + "metadata": {}, + "outputs": [], + "source": [ + "def inject_parameters(cell, parameters_values):\n", + " if cell[\"cell_type\"] in [\"code\",\"markdown\"] and \"tags\" in cell[\"metadata\"].keys():\n", + " source = cell[\"source\"]\n", + " tags = [tag for tag in cell[\"metadata\"][\"tags\"] if tag.startswith(\"SJNG_\") or tag.startswith(\"SJNT_\")]\n", + " for key in list(set(tags) & set(parameters_values.keys())):\n", + " source = source.replace(\n", + " \"<\" + key + \">\",\n", + " ( \n", + " json.dumps(parameters_values[key]) \n", + " if isinstance(parameters_values[key],dict | list) \n", + " else parameters_values[key]\n", + " )\n", + " )\n", + " cell[\"source\"] = source\n", + " return cell" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "a415707c-4555-47a7-b17a-f8e40b446cca", + "metadata": {}, + "outputs": [], + "source": [ + "nb['cells'] = [inject_parameters(cell,parameters_values) for cell in nb['cells']]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "cf8d5041-a086-4b5f-bddc-1e4c0aab6dd7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'cell_type': 'markdown',\n", + " 'id': '5740b29e-c32a-4181-8065-8c3e5fe3df36',\n", + " 'metadata': {'editable': True,\n", + " 'slideshow': {'slide_type': ''},\n", + " 'tags': ['SJNT_INSTANCE_FILE_NAME',\n", + " 'SJNT_INSTANCE_REQUESTER',\n", + " 'SJNT_INSTANCE_DATETIME',\n", + " 'SJNT_INSTANCE_NAME',\n", + " 'SJNT_INSTANCE_DESCRIPTION']},\n", + " 'source': '## Instance Info\\n- File name: f027c955-73fb-4c80-9439-4f8a2d735432.ipynb \\n- Requester: Anonymous \\n- Created: 2024-01-04T15:25:47.376916 \\n- Name: SciCat generated Jupyter Notebook f027c955-73fb-4c80-9439-4f8a2d735432 \\n- Description: \\n Jupyter Notebook auto-generated with SciCat Jupyter Notebook Generator '}" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nb['cells'][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "50934482-da8b-40c7-91a4-6fca2be1e7d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/home/nitrosx/repos/scicat-jupyter-notebook-generator/output/f027c955-73fb-4c80-9439-4f8a2d735432.ipynb'" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "output_notebook_fullpath = os.path.abspath(os.path.join(\n", + " \"..\",\n", + " config[\"output_folder\"],\n", + " parameters_values[\"SJNT_INSTANCE_FILE_NAME\"]\n", + "))\n", + "output_notebook_fullpath" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "d050bb4d-fdb0-441a-a1af-85d21f09ac49", + "metadata": {}, + "outputs": [], + "source": [ + "with open(output_notebook_fullpath,\"w\") as fh:\n", + " nbf.write(nb,fh)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f671b831-b85f-405a-a4fe-acb48d219a02", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sjng/output/f027c955-73fb-4c80-9439-4f8a2d735432.ipynb b/sjng/output/f027c955-73fb-4c80-9439-4f8a2d735432.ipynb new file mode 100644 index 0000000..bb4f6cf --- /dev/null +++ b/sjng/output/f027c955-73fb-4c80-9439-4f8a2d735432.ipynb @@ -0,0 +1,411 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c31d316a-c228-4a42-a427-73fc2e6b1fc5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNT_INSTANCE_NAME" + ] + }, + "source": [ + "# SciCat generated Jupyter Notebook f027c955-73fb-4c80-9439-4f8a2d735432" + ] + }, + { + "cell_type": "markdown", + "id": "5740b29e-c32a-4181-8065-8c3e5fe3df36", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNT_INSTANCE_FILE_NAME", + "SJNT_INSTANCE_REQUESTER", + "SJNT_INSTANCE_DATETIME", + "SJNT_INSTANCE_NAME", + "SJNT_INSTANCE_DESCRIPTION" + ] + }, + "source": [ + "## Instance Info\n", + "- File name: f027c955-73fb-4c80-9439-4f8a2d735432.ipynb \n", + "- Requester: Anonymous \n", + "- Created: 2024-01-04T15:25:47.376916 \n", + "- Name: SciCat generated Jupyter Notebook f027c955-73fb-4c80-9439-4f8a2d735432 \n", + "- Description: \n", + " Jupyter Notebook auto-generated with SciCat Jupyter Notebook Generator " + ] + }, + { + "cell_type": "markdown", + "id": "42f9d024-7dc2-4c5e-bc32-d73bd768ddf4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Template Info\n", + "- Template Id: 67ddc126-9a7f-11ee-a1ea-3f215a800ed6 \n", + "- File name: sjnt_67ddc126_9a7f_11ee_a1ea_3f215a800ed6.ipynb \n", + "- Author: Max Novelli\n", + "- Email: max.novelli@ess.eu\n", + "- Institution: European Spallation Source\n", + "- Version: 0.1\n", + "- Name: Starter with datasets and files\n", + "- Description: \n", + " This template will produce a notebook that loads a list of SciCat datasets and downloads the requested files. \n", + " Please be aware that the file download might take a considerable length of time as the file size is not known.\n", + " This notebook assumes that you have access to the ESS sftp server." + ] + }, + { + "cell_type": "markdown", + "id": "437199de-d72c-46c6-a6d9-e31498c97282", + "metadata": {}, + "source": [ + "### Variables defined by the selections made in SciCat" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c8220471-2cff-44a4-8116-8746c5e5c061", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNG_SCICAT_URL" + ] + }, + "outputs": [], + "source": [ + "scicat_url=\"https://staging.scicat.ess.eu\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "06be562f-84eb-4b3b-87c7-6fc009e10794", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNG_SCICAT_TOKEN" + ] + }, + "outputs": [], + "source": [ + "scicat_token=\"eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJfaWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJ1c2VybmFtZSI6Ik1hc3NpbWlsaWFubyBOb3ZlbGxpIiwiZW1haWwiOiJNYXguTm92ZWxsaUBlc3MuZXUiLCJhdXRoU3RyYXRlZ3kiOiJsZGFwIiwiaWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJ1c2VySWQiOiI2MDc4NGY2ZDZmODhiMDZiYmVlNTM4MTciLCJpYXQiOjE3MDQzNzgyOTcsImV4cCI6MTcwNDM4MTg5N30.m4TkavIwfYpA4NLaYOCCLleegngjPsYI16LNBoZQrG8\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "0fcb3058-438c-4710-975f-fae581eab838", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sftp_server=\"login.esss.dk\"" + ] + }, + { + "cell_type": "markdown", + "id": "e2e3116e-c726-4c8d-8299-fc084425c53e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This variable contains the list of each individual file and the pid of the dataset that it belongs to.\n", + "Its format should be:\n", + "```json\n", + "[\n", + " {pid1, file1}\n", + " ...\n", + "]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "9ecc86db-81ae-46a5-8866-22a0112fccfa", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNG_DATASETS_FILES" + ] + }, + "outputs": [], + "source": [ + "files_to_be_downloaded=[[\"20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4\", \"60395-2022-02-28_2215.nxs\"], [\"20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4\", \"60394-2022-02-28_2215.nxs\"], [\"20.500.12269/74467d52-d70b-489a-ad0b-4e1d4bc04bb2\", \"60390-2022-02-28_2215.nxs\"]]" + ] + }, + { + "cell_type": "markdown", + "id": "ea52b7a9-920b-4518-9d46-b0ced0207a49", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "### User defined variables" + ] + }, + { + "cell_type": "markdown", + "id": "c542c0b6-4d67-484f-8dd9-cd8b6a6ffee3", + "metadata": {}, + "source": [ + "Please change the local data folder to your liking. \n", + "This is the folder where the selected files will be downloaded." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0c92323f-1505-42ff-a78b-b70f8a8f2e03", + "metadata": {}, + "outputs": [], + "source": [ + "local_data_folder=\"downloads\"" + ] + }, + { + "cell_type": "markdown", + "id": "94b1ba99-1705-47ca-8abf-fb0edd3dbd0b", + "metadata": {}, + "source": [ + "Creates a list of unique datasets' pids." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "48d3c47d-c1af-482d-b459-a2f55be8d744", + "metadata": {}, + "outputs": [], + "source": [ + "datasets_pids = list(set([\n", + " item[0]\n", + " for item \n", + " in files_to_be_downloaded\n", + "]))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "9551fb69-c4f2-49ac-84df-523cf0679571", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['20.500.12269/74467d52-d70b-489a-ad0b-4e1d4bc04bb2',\n", + " '20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datasets_pids" + ] + }, + { + "cell_type": "markdown", + "id": "198c9f14-3597-40ca-88f1-07aa3f4c02d0", + "metadata": {}, + "source": [ + "Loading the libraries needed to interface with SciCat" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a9297336-cf6c-4d1c-80c6-9c6714f858e6", + "metadata": {}, + "outputs": [], + "source": [ + "from scitacean import Client\n", + "from scitacean.transfer.sftp import SFTPFileTransfer" + ] + }, + { + "cell_type": "markdown", + "id": "682e2fba-cc28-4987-9ee6-e8d888f1253c", + "metadata": {}, + "source": [ + "Instantiate scitacean client" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ff450938-214c-463f-bdcd-a31827587d53", + "metadata": {}, + "outputs": [], + "source": [ + "sc_client = Client.from_token(\n", + " url=scicat_url,\n", + " token=scicat_token,\n", + " file_transfer=SFTPFileTransfer(\n", + " host=sftp_server\n", + " ))" + ] + }, + { + "cell_type": "markdown", + "id": "051541b0-cdd4-4a43-932a-1eec02f6bc99", + "metadata": {}, + "source": [ + "Load all datasets from SciCat" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "312cd197-1e21-4903-9941-2b8281e36662", + "metadata": {}, + "outputs": [ + { + "ename": "JSONDecodeError", + "evalue": "Expecting value: line 1 column 1 (char 0)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/micromamba/envs/sjng/lib/python3.11/site-packages/requests/models.py:971\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 970\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcomplexjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtext\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n", + "File \u001b[0;32m~/micromamba/envs/sjng/lib/python3.11/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 344\u001b[0m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m 345\u001b[0m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m~/micromamba/envs/sjng/lib/python3.11/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;124;03mcontaining a JSON document).\u001b[39;00m\n\u001b[1;32m 335\u001b[0m \n\u001b[1;32m 336\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraw_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_w\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 338\u001b[0m end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n", + "File \u001b[0;32m~/micromamba/envs/sjng/lib/python3.11/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpecting value\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, err\u001b[38;5;241m.\u001b[39mvalue) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n", + "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mJSONDecodeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m datasets \u001b[38;5;241m=\u001b[39m \u001b[43msc_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_dataset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/micromamba/envs/sjng/lib/python3.11/site-packages/scitacean/client.py:192\u001b[0m, in \u001b[0;36mClient.get_dataset\u001b[0;34m(self, pid, strict_validation, attachments)\u001b[0m\n\u001b[1;32m 167\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Download a dataset from SciCat.\u001b[39;00m\n\u001b[1;32m 168\u001b[0m \n\u001b[1;32m 169\u001b[0m \u001b[38;5;124;03mDoes not download any files.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 189\u001b[0m \u001b[38;5;124;03m A new dataset.\u001b[39;00m\n\u001b[1;32m 190\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 191\u001b[0m pid \u001b[38;5;241m=\u001b[39m PID\u001b[38;5;241m.\u001b[39mparse(pid)\n\u001b[0;32m--> 192\u001b[0m dataset \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mscicat\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_dataset_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 193\u001b[0m \u001b[43m \u001b[49m\u001b[43mpid\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstrict_validation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstrict_validation\u001b[49m\n\u001b[1;32m 194\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 196\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 197\u001b[0m orig_datablocks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscicat\u001b[38;5;241m.\u001b[39mget_orig_datablocks(\n\u001b[1;32m 198\u001b[0m pid, strict_validation\u001b[38;5;241m=\u001b[39mstrict_validation\n\u001b[1;32m 199\u001b[0m )\n", + "File \u001b[0;32m~/micromamba/envs/sjng/lib/python3.11/site-packages/scitacean/client.py:636\u001b[0m, in \u001b[0;36mScicatClient.get_dataset_model\u001b[0;34m(self, pid, strict_validation)\u001b[0m\n\u001b[1;32m 610\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_dataset_model\u001b[39m(\n\u001b[1;32m 611\u001b[0m \u001b[38;5;28mself\u001b[39m, pid: PID, strict_validation: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 612\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m model\u001b[38;5;241m.\u001b[39mDownloadDataset:\n\u001b[1;32m 613\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Fetch a dataset from SciCat.\u001b[39;00m\n\u001b[1;32m 614\u001b[0m \n\u001b[1;32m 615\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 634\u001b[0m \u001b[38;5;124;03m If the dataset does not exist or communication fails for some other reason.\u001b[39;00m\n\u001b[1;32m 635\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 636\u001b[0m dset_json \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_endpoint\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 637\u001b[0m \u001b[43m \u001b[49m\u001b[43mcmd\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mget\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 638\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdatasets/\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mquote_plus\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mpid\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 639\u001b[0m \u001b[43m \u001b[49m\u001b[43moperation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mget_dataset_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 640\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 641\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m dset_json:\n\u001b[1;32m 642\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ScicatCommError(\n\u001b[1;32m 643\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot get dataset with \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpid\u001b[38;5;132;01m=}\u001b[39;00m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 644\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mno such dataset in SciCat at \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_base_url\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 645\u001b[0m )\n", + "File \u001b[0;32m~/micromamba/envs/sjng/lib/python3.11/site-packages/scitacean/client.py:935\u001b[0m, in \u001b[0;36mScicatClient._call_endpoint\u001b[0;34m(self, cmd, url, data, operation)\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ScicatCommError(\n\u001b[1;32m 930\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError in operation \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00moperation\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 931\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mreason\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mtext\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 932\u001b[0m )\n\u001b[1;32m 933\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAPI call successful for operation \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m\"\u001b[39m, operation)\n\u001b[0;32m--> 935\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m response\u001b[38;5;241m.\u001b[39mtext \u001b[38;5;28;01melse\u001b[39;00m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjson\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/micromamba/envs/sjng/lib/python3.11/site-packages/requests/models.py:975\u001b[0m, in \u001b[0;36mResponse.json\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 971\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m complexjson\u001b[38;5;241m.\u001b[39mloads(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtext, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 972\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m JSONDecodeError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 973\u001b[0m \u001b[38;5;66;03m# Catch JSON-related errors and raise as requests.JSONDecodeError\u001b[39;00m\n\u001b[1;32m 974\u001b[0m \u001b[38;5;66;03m# This aliases json.JSONDecodeError and simplejson.JSONDecodeError\u001b[39;00m\n\u001b[0;32m--> 975\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m RequestsJSONDecodeError(e\u001b[38;5;241m.\u001b[39mmsg, e\u001b[38;5;241m.\u001b[39mdoc, e\u001b[38;5;241m.\u001b[39mpos)\n", + "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)" + ] + } + ], + "source": [ + "datasets = sc_client.get_dataset(\"20.500.12269/761fd17f-e0a8-4bd4-9e70-67ff8647b3f4\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "980a5aa7-cb8b-461e-a26f-5fb08b8fe0a8", + "metadata": {}, + "outputs": [], + "source": [ + "datasets = {\n", + " pid: sc_client.get_dataset(pid)\n", + " for pid\n", + " in datasets_pids\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "1d1b84d1-8ede-4e9c-ade6-cdbe6545d8dd", + "metadata": {}, + "source": [ + "Now loops through the files list and download each individual file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e56027e9-46c6-45d5-b8cf-d0bd4980b2a5", + "metadata": {}, + "outputs": [], + "source": [ + "for item in files_to_be_downloaded:\n", + " \n", + " pid = item[0]\n", + " file = item[1]\n", + " \n", + " datasets[pid] = sc_client.download_files(\n", + " datasets[pid],\n", + " target=local_data_folder,\n", + " select=file\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "id": "87591591-d499-4839-af0a-3664a4ed9dc5", + "metadata": {}, + "source": [ + "## Analysis\n", + "\n", + "Now datasets' metadata and associated data files are available locally. \n", + "You can start with your analysis or data exploration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8be0479c-8cd0-4398-be20-e00ddf06b7aa", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sjng/templates/sjnt_67ddc126_9a7f_11ee_a1ea_3f215a800ed6.ipynb b/sjng/templates/sjnt_67ddc126_9a7f_11ee_a1ea_3f215a800ed6.ipynb new file mode 100644 index 0000000..69ab087 --- /dev/null +++ b/sjng/templates/sjnt_67ddc126_9a7f_11ee_a1ea_3f215a800ed6.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c31d316a-c228-4a42-a427-73fc2e6b1fc5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNT_INSTANCE_NAME" + ] + }, + "source": [ + "# " + ] + }, + { + "cell_type": "markdown", + "id": "5740b29e-c32a-4181-8065-8c3e5fe3df36", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNT_INSTANCE_FILE_NAME", + "SJNT_INSTANCE_REQUESTER", + "SJNT_INSTANCE_DATETIME", + "SJNT_INSTANCE_NAME", + "SJNT_INSTANCE_DESCRIPTION" + ] + }, + "source": [ + "## Instance Info\n", + "- File name: \n", + "- Requester: \n", + "- Created: \n", + "- Name: \n", + "- Description: \n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "42f9d024-7dc2-4c5e-bc32-d73bd768ddf4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Template Info\n", + "- Template Id: 67ddc126-9a7f-11ee-a1ea-3f215a800ed6 \n", + "- File name: sjnt_67ddc126_9a7f_11ee_a1ea_3f215a800ed6.ipynb \n", + "- Author: Max Novelli\n", + "- Email: max.novelli@ess.eu\n", + "- Institution: European Spallation Source\n", + "- Version: 0.1\n", + "- Name: Starter with datasets and files\n", + "- Description: \n", + " This template will produce a notebook that loads a list of SciCat datasets and downloads the requested files. \n", + " Please be aware that the file download might take a considerable length of time as the file size is not known.\n", + " This notebook assumes that you have access to the ESS sftp server." + ] + }, + { + "cell_type": "markdown", + "id": "437199de-d72c-46c6-a6d9-e31498c97282", + "metadata": {}, + "source": [ + "### Variables defined by the selections made in SciCat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8220471-2cff-44a4-8116-8746c5e5c061", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNG_SCICAT_URL" + ] + }, + "outputs": [], + "source": [ + "scicat_url=\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06be562f-84eb-4b3b-87c7-6fc009e10794", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNG_SCICAT_TOKEN" + ] + }, + "outputs": [], + "source": [ + "scicat_token=\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0fcb3058-438c-4710-975f-fae581eab838", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "sftp_server=\"login.esss.dk\"" + ] + }, + { + "cell_type": "markdown", + "id": "e2e3116e-c726-4c8d-8299-fc084425c53e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "This variable contains the list of each individual file and the pid of the dataset that it belongs to.\n", + "Its format should be:\n", + "```json\n", + "[\n", + " {pid1, file1}\n", + " ...\n", + "]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ecc86db-81ae-46a5-8866-22a0112fccfa", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "SJNG_DATASETS_FILES" + ] + }, + "outputs": [], + "source": [ + "files_to_be_downloaded=" + ] + }, + { + "cell_type": "markdown", + "id": "ea52b7a9-920b-4518-9d46-b0ced0207a49", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "### User defined variables" + ] + }, + { + "cell_type": "markdown", + "id": "c542c0b6-4d67-484f-8dd9-cd8b6a6ffee3", + "metadata": {}, + "source": [ + "Please change the local data folder to your liking. \n", + "This is the folder where the selected files will be downloaded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c92323f-1505-42ff-a78b-b70f8a8f2e03", + "metadata": {}, + "outputs": [], + "source": [ + "local_data_folder=\"downloads\"" + ] + }, + { + "cell_type": "markdown", + "id": "94b1ba99-1705-47ca-8abf-fb0edd3dbd0b", + "metadata": {}, + "source": [ + "Creates a list of unique datasets' pids." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48d3c47d-c1af-482d-b459-a2f55be8d744", + "metadata": {}, + "outputs": [], + "source": [ + "datasets_pids = list(set([\n", + " item[0]\n", + " for item \n", + " in files_to_be_downloaded\n", + "]))" + ] + }, + { + "cell_type": "markdown", + "id": "198c9f14-3597-40ca-88f1-07aa3f4c02d0", + "metadata": {}, + "source": [ + "Loading the libraries needed to interface with SciCat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9297336-cf6c-4d1c-80c6-9c6714f858e6", + "metadata": {}, + "outputs": [], + "source": [ + "from scitacean import Client\n", + "from scitacean.transfer.sftp import SFTPFileTransfer" + ] + }, + { + "cell_type": "markdown", + "id": "682e2fba-cc28-4987-9ee6-e8d888f1253c", + "metadata": {}, + "source": [ + "Instantiate scitacean client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff450938-214c-463f-bdcd-a31827587d53", + "metadata": {}, + "outputs": [], + "source": [ + "sc_client = Client.from_token(\n", + " url=scicat_url,\n", + " token=scicat_token,\n", + " file_transfer=SFTPFileTransfer(\n", + " host=sftp_server\n", + " ))" + ] + }, + { + "cell_type": "markdown", + "id": "051541b0-cdd4-4a43-932a-1eec02f6bc99", + "metadata": {}, + "source": [ + "Load all datasets from SciCat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "980a5aa7-cb8b-461e-a26f-5fb08b8fe0a8", + "metadata": {}, + "outputs": [], + "source": [ + "datasets = {\n", + " pid: sc_client.get_dataset(pid)\n", + " for pid\n", + " in datasets_pids\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "1d1b84d1-8ede-4e9c-ade6-cdbe6545d8dd", + "metadata": {}, + "source": [ + "Now loops through the files list and download each individual file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e56027e9-46c6-45d5-b8cf-d0bd4980b2a5", + "metadata": {}, + "outputs": [], + "source": [ + "for item in files_to_be_downloaded:\n", + " \n", + " pid = item[0]\n", + " file = item[1]\n", + " \n", + " datasets[pid] = sc_client.download_files(\n", + " datasets[pid],\n", + " target=local_data_folder,\n", + " select=file\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "id": "87591591-d499-4839-af0a-3664a4ed9dc5", + "metadata": {}, + "source": [ + "## Analysis\n", + "\n", + "Now datasets' metadata and associated data files are available locally. \n", + "You can start with your analysis or data exploration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8be0479c-8cd0-4398-be20-e00ddf06b7aa", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}