From 591692fba26d6b849210e55730b04e50d7d0ee44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= Date: Fri, 14 Apr 2023 16:19:20 -0300 Subject: [PATCH 1/3] chore(docs): add infodengue API documentation --- docs/source/Infodengue.ipynb | 438 +++++++++++++++++++++++++++++++++++ 1 file changed, 438 insertions(+) create mode 100644 docs/source/Infodengue.ipynb diff --git a/docs/source/Infodengue.ipynb b/docs/source/Infodengue.ipynb new file mode 100644 index 00000000..45c0e291 --- /dev/null +++ b/docs/source/Infodengue.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ce6b8e3e-8640-4fe0-9156-c5aac8ae3bab", + "metadata": {}, + "source": [ + "# Working with Infodengue datasets\n", + "[InfoDengue](https://info.dengue.mat.br/) is an alert system designed to track arboviruses using a unique hybrid data approach that integrates social web data with climatic and epidemiological data. In this tutorial, we will walk through the process of using InfoDengue's API with Python to fetch up-to-date arbovirus data." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4936cdc7-ab6b-4659-b7a3-497c80b97e17", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from pysus.online_data.Infodengue import search_string, download" + ] + }, + { + "cell_type": "markdown", + "id": "44fb4cec-c0f9-4754-9fb0-57463df46410", + "metadata": {}, + "source": [ + "Infodengue is a national-wide system, use the `search_string` method to check how the city is found in the API:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "56af134b-fc1f-4b7b-aaee-46aa7ba436ff", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Algodão de Jandaíra': 2500577,\n", + " 'Arroio do Meio': 4301008,\n", + " 'Brejo de Areia': 2102150,\n", + " 'Granjeiro': 2304806,\n", + " 'Jardim de Angicos': 2405504,\n", + " 'Piquet Carneiro': 2310902,\n", + " 'Primeiro de Maio': 4120507,\n", + " 'Rafael Jambeiro': 2925956,\n", + " 'Rio Claro': 3304409,\n", + " 'Rio Grande da Serra': 3544103,\n", + " 'Rio Largo': 2707701,\n", + " 'Rio Manso': 3155306,\n", + " 'Rio Negrinho': 4215000,\n", + " 'Rio Negro': 4122305,\n", + " 'Rio Pardo': 4315701,\n", + " 'Rio da Conceição': 1718659,\n", + " 'Rio de Janeiro': 3304557,\n", + " 'Rio do Antônio': 2926806,\n", + " 'Rio do Pires': 2926905,\n", + " 'Rio dos Cedros': 4214706,\n", + " 'Rodeiro': 3156304,\n", + " 'Três de Maio': 4321808}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "search_string('Rio de janeiro')" + ] + }, + { + "cell_type": "markdown", + "id": "7a38a112-5229-4ec7-b57b-af1e93d58305", + "metadata": {}, + "source": [ + "The download method extracts data for a specified range of Epidemiological Weeks (SE in pt) in the format `YYYYWW`. The output is a Pandas DataFrame containing all the EWs within this range." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7be2747e-d54d-464a-aec1-7e4f2f353a00", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "df = download('dengue', 202301, 202304, 'Rio de Janeiro')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "3eefdfe6-9c28-4614-8ee5-46a345480cc5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SE202304202303202302202301
data_iniSE2023-01-222023-01-152023-01-082023-01-01
casos_est295.0236.0211.0228.0
casos_est_min295236211228
casos_est_max295236211228
casos305236211228
p_rt10.9993560.9459770.9832910.999999
p_inc100k4.3717863.4974283.1269383.378871
Localidade_id0000
nivel2222
id330455720230419461330455720230319461330455720230219461330455720230119461
versao_modelo2023-04-142023-04-142023-04-142023-04-14
tweet0.00.00.00.0
Rt1.01.01.02.0
pop6747815.06747815.06747815.06747815.0
tempmin25.14285726.71428623.42857122.428571
umidmax82.14379377.15708489.98082992.587399
receptivo1111
transmissao0000
nivel_inc0000
umidmed82.14379377.15708482.59239578.188093
umidmin82.14379377.15708469.33168263.034302
tempmed25.14285726.71428625.07142924.976191
tempmax25.14285726.71428628.28571428.428571
casprovNaNNaNNaNNaN
casprov_estNaNNaNNaNNaN
casprov_est_minNaNNaNNaNNaN
casprov_est_maxNaNNaNNaNNaN
casconfNaNNaNNaNNaN
notif_accum_year980980980980
\n", + "
" + ], + "text/plain": [ + "SE 202304 ... 202301\n", + "data_iniSE 2023-01-22 ... 2023-01-01\n", + "casos_est 295.0 ... 228.0\n", + "casos_est_min 295 ... 228\n", + "casos_est_max 295 ... 228\n", + "casos 305 ... 228\n", + "p_rt1 0.999356 ... 0.999999\n", + "p_inc100k 4.371786 ... 3.378871\n", + "Localidade_id 0 ... 0\n", + "nivel 2 ... 2\n", + "id 330455720230419461 ... 330455720230119461\n", + "versao_modelo 2023-04-14 ... 2023-04-14\n", + "tweet 0.0 ... 0.0\n", + "Rt 1.0 ... 2.0\n", + "pop 6747815.0 ... 6747815.0\n", + "tempmin 25.142857 ... 22.428571\n", + "umidmax 82.143793 ... 92.587399\n", + "receptivo 1 ... 1\n", + "transmissao 0 ... 0\n", + "nivel_inc 0 ... 0\n", + "umidmed 82.143793 ... 78.188093\n", + "umidmin 82.143793 ... 63.034302\n", + "tempmed 25.142857 ... 24.976191\n", + "tempmax 25.142857 ... 28.428571\n", + "casprov NaN ... NaN\n", + "casprov_est NaN ... NaN\n", + "casprov_est_min NaN ... NaN\n", + "casprov_est_max NaN ... NaN\n", + "casconf NaN ... NaN\n", + "notif_accum_year 980 ... 980\n", + "\n", + "[29 rows x 4 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "id": "08fd951a-26c0-4814-bf2e-e8b8b72cab3a", + "metadata": {}, + "source": [ + "You can save the dataframe in a CSV file" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "012e70f2-a204-46d9-983c-cd6b971999a7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "df.to_csv('rio_se01_04.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aab47913-cdf3-4f8d-b544-db8f3dd5e8d3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pysus", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "vscode": { + "interpreter": { + "hash": "2a96a5ccec8dfcba7d06b2e71f6eef3b5dac5716461bf5d73ea1bb7ee462cdaa" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From d7356a05250cdb7670b32fce743535ef414f1ee2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= Date: Fri, 14 Apr 2023 18:27:22 -0300 Subject: [PATCH 2/3] Downloading multiple csv files --- docs/source/Infodengue.ipynb | 166 ++++++++++++++++++++++++----------- 1 file changed, 114 insertions(+), 52 deletions(-) diff --git a/docs/source/Infodengue.ipynb b/docs/source/Infodengue.ipynb index 45c0e291..40044125 100644 --- a/docs/source/Infodengue.ipynb +++ b/docs/source/Infodengue.ipynb @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "id": "56af134b-fc1f-4b7b-aaee-46aa7ba436ff", "metadata": { "tags": [] @@ -40,37 +40,35 @@ { "data": { "text/plain": [ - "{'Algodão de Jandaíra': 2500577,\n", - " 'Arroio do Meio': 4301008,\n", - " 'Brejo de Areia': 2102150,\n", + "{'Arroio do Meio': 4301008,\n", " 'Granjeiro': 2304806,\n", - " 'Jardim de Angicos': 2405504,\n", - " 'Piquet Carneiro': 2310902,\n", - " 'Primeiro de Maio': 4120507,\n", - " 'Rafael Jambeiro': 2925956,\n", + " 'Jerônimo Monteiro': 3203106,\n", + " 'Minador do Negrão': 2705309,\n", + " 'Rio Branco': 5107206,\n", " 'Rio Claro': 3304409,\n", - " 'Rio Grande da Serra': 3544103,\n", + " 'Rio Grande': 4315602,\n", " 'Rio Largo': 2707701,\n", " 'Rio Manso': 3155306,\n", " 'Rio Negrinho': 4215000,\n", " 'Rio Negro': 4122305,\n", " 'Rio Pardo': 4315701,\n", " 'Rio da Conceição': 1718659,\n", + " 'Rio das Antas': 4214409,\n", " 'Rio de Janeiro': 3304557,\n", " 'Rio do Antônio': 2926806,\n", " 'Rio do Pires': 2926905,\n", " 'Rio dos Cedros': 4214706,\n", " 'Rodeiro': 3156304,\n", - " 'Três de Maio': 4321808}" + " 'Roteiro': 2707800}" ] }, - "execution_count": 4, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "search_string('Rio de janeiro')" + "search_string('Rio d janeiro')" ] }, { @@ -83,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "7be2747e-d54d-464a-aec1-7e4f2f353a00", "metadata": { "tags": [] @@ -95,7 +93,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "3eefdfe6-9c28-4614-8ee5-46a345480cc5", "metadata": { "tags": [] @@ -337,41 +335,70 @@ "" ], "text/plain": [ - "SE 202304 ... 202301\n", - "data_iniSE 2023-01-22 ... 2023-01-01\n", - "casos_est 295.0 ... 228.0\n", - "casos_est_min 295 ... 228\n", - "casos_est_max 295 ... 228\n", - "casos 305 ... 228\n", - "p_rt1 0.999356 ... 0.999999\n", - "p_inc100k 4.371786 ... 3.378871\n", - "Localidade_id 0 ... 0\n", - "nivel 2 ... 2\n", - "id 330455720230419461 ... 330455720230119461\n", - "versao_modelo 2023-04-14 ... 2023-04-14\n", - "tweet 0.0 ... 0.0\n", - "Rt 1.0 ... 2.0\n", - "pop 6747815.0 ... 6747815.0\n", - "tempmin 25.142857 ... 22.428571\n", - "umidmax 82.143793 ... 92.587399\n", - "receptivo 1 ... 1\n", - "transmissao 0 ... 0\n", - "nivel_inc 0 ... 0\n", - "umidmed 82.143793 ... 78.188093\n", - "umidmin 82.143793 ... 63.034302\n", - "tempmed 25.142857 ... 24.976191\n", - "tempmax 25.142857 ... 28.428571\n", - "casprov NaN ... NaN\n", - "casprov_est NaN ... NaN\n", - "casprov_est_min NaN ... NaN\n", - "casprov_est_max NaN ... NaN\n", - "casconf NaN ... NaN\n", - "notif_accum_year 980 ... 980\n", + "SE 202304 202303 202302 \\\n", + "data_iniSE 2023-01-22 2023-01-15 2023-01-08 \n", + "casos_est 295.0 236.0 211.0 \n", + "casos_est_min 295 236 211 \n", + "casos_est_max 295 236 211 \n", + "casos 305 236 211 \n", + "p_rt1 0.999356 0.945977 0.983291 \n", + "p_inc100k 4.371786 3.497428 3.126938 \n", + "Localidade_id 0 0 0 \n", + "nivel 2 2 2 \n", + "id 330455720230419461 330455720230319461 330455720230219461 \n", + "versao_modelo 2023-04-14 2023-04-14 2023-04-14 \n", + "tweet 0.0 0.0 0.0 \n", + "Rt 1.0 1.0 1.0 \n", + "pop 6747815.0 6747815.0 6747815.0 \n", + "tempmin 25.142857 26.714286 23.428571 \n", + "umidmax 82.143793 77.157084 89.980829 \n", + "receptivo 1 1 1 \n", + "transmissao 0 0 0 \n", + "nivel_inc 0 0 0 \n", + "umidmed 82.143793 77.157084 82.592395 \n", + "umidmin 82.143793 77.157084 69.331682 \n", + "tempmed 25.142857 26.714286 25.071429 \n", + "tempmax 25.142857 26.714286 28.285714 \n", + "casprov NaN NaN NaN \n", + "casprov_est NaN NaN NaN \n", + "casprov_est_min NaN NaN NaN \n", + "casprov_est_max NaN NaN NaN \n", + "casconf NaN NaN NaN \n", + "notif_accum_year 980 980 980 \n", "\n", - "[29 rows x 4 columns]" + "SE 202301 \n", + "data_iniSE 2023-01-01 \n", + "casos_est 228.0 \n", + "casos_est_min 228 \n", + "casos_est_max 228 \n", + "casos 228 \n", + "p_rt1 0.999999 \n", + "p_inc100k 3.378871 \n", + "Localidade_id 0 \n", + "nivel 2 \n", + "id 330455720230119461 \n", + "versao_modelo 2023-04-14 \n", + "tweet 0.0 \n", + "Rt 2.0 \n", + "pop 6747815.0 \n", + "tempmin 22.428571 \n", + "umidmax 92.587399 \n", + "receptivo 1 \n", + "transmissao 0 \n", + "nivel_inc 0 \n", + "umidmed 78.188093 \n", + "umidmin 63.034302 \n", + "tempmed 24.976191 \n", + "tempmax 28.428571 \n", + "casprov NaN \n", + "casprov_est NaN \n", + "casprov_est_min NaN \n", + "casprov_est_max NaN \n", + "casconf NaN \n", + "notif_accum_year 980 " ] }, - "execution_count": 8, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -390,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "012e70f2-a204-46d9-983c-cd6b971999a7", "metadata": { "tags": [] @@ -401,17 +428,52 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "aab47913-cdf3-4f8d-b544-db8f3dd5e8d3", + "cell_type": "markdown", + "id": "01c98131-d49d-48e8-bc85-e6dbcdca2ecc", "metadata": {}, + "source": [ + "In order to fetch data with different parameters, it is possible to iterate over a list, for instance:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b4ddf6e9-ad86-438a-add0-8b7068307b98", + "metadata": { + "tags": [] + }, "outputs": [], - "source": [] + "source": [ + "from itertools import product\n", + "\n", + "diseases = ['dengue', 'zika']\n", + "cities = ['Rio de Janeiro', 'Rio do Antônio', 'Rio do Pires']\n", + "\n", + "for disease, city in product(diseases, cities):\n", + " df = download(disease, 202301, 202304, city)\n", + " df.to_csv(f'{disease}_{city.lower().replace(\" \", \"_\")}_se01_04.csv')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a84fc3e6-ee12-4dac-907e-3a33bf8f9509", + "metadata": {}, + "source": [ + "Expected files:\n", + "\n", + "- dengue_rio_de_janeiro_se01_04.csv\n", + "- dengue_rio_do_antônio_se01_04.csv\n", + "- dengue_rio_do_pires_se01_04.csv\n", + "- zika_rio_de_janeiro_se01_04.csv\n", + "- zika_rio_do_antônio_se01_04.csv\n", + "- zika_rio_do_pires_se01_04.csv" + ] } ], "metadata": { "kernelspec": { - "display_name": "pysus", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, From 51d7189f55c5d1486697ae0580f3e4f2753cf4b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lu=C3=A3=20Bida=20Vacaro?= Date: Tue, 18 Apr 2023 15:05:19 -0300 Subject: [PATCH 3/3] Add infodengue to index --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index aa7788f1..971b617b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -25,6 +25,7 @@ Contents: Analyzing Zika data Downloading COVID data from ESUS Downloading Infogripe data + Downloading Infodengue data Getting Official Statistics