Merge pull request #42 from fccoelho/vaccine_data

Vaccine data
AlertaDengue · May 31, 2021 · 031cb8d · 031cb8d
2 parents 9ddcd73 + c218df1
commit 031cb8d
Showing 1 changed file with 95 additions and 0 deletions.
diff --git a/pysus/online_data/vaccine.py b/pysus/online_data/vaccine.py
@@ -0,0 +1,95 @@
+"""
+Download of vacination data.
+
+This module contains function to download from specific campains:
+
+- COVID-19 in 2020-2021 Downloaded as described [here](http://opendatasus.saude.gov.br/dataset/b772ee55-07cd-44d8-958f-b12edd004e0b/resource/5916b3a4-81e7-4ad5-adb6-b884ff198dc1/download/manual_api_vacina_covid-19.pdf)
+"""
+import pandas as pd
+from pysus.online_data import CACHEPATH
+from elasticsearch import Elasticsearch
+import elasticsearch.helpers
+import requests
+from requests.auth import HTTPBasicAuth
+import os
+import time
+from json import JSONDecodeError
+import json
+from datetime import date
+
+
+def download_covid(uf=None):
+    """
+    Download covid vaccination data for a give UF
+    :param uf: 'RJ' | 'SP', etc.
+    :return: dataframe iterator as returned by pandas `read_csv('Vaccine_temp_<uf>.csv.gz', chunksize=5000)`
+    """
+    user = 'imunizacao_public'
+    pwd = 'qlto5t&7r_@+#Tlstigi'
+    index = "desc-imunizacao"
+    url = f"https://imunizacao-es.saude.gov.br/_search?scroll=1m"
+    if uf is None:
+        query = {"query": {"match_all": {}},
+                 "size": 10000}
+    else:
+        UF = uf.upper()
+        query = {"query": {"match": {"paciente_endereco_uf": UF}},
+                 "size": 10000
+                 }
+    # es = Elasticsearch([url], send_get_body_as='POST', headers=)
+    auth = HTTPBasicAuth(user, pwd)
+    data_gen = elasticsearch_fetch(url, auth, query)
+    tempfile = os.path.join(CACHEPATH, f'Vaccine_temp_{UF}.csv.gz')
+    h = 1
+    for dt in data_gen:
+        df = pd.DataFrame(dt)
+        if h:
+            df.to_csv(tempfile)
+            h = 0
+        else:
+            df.to_csv(tempfile, mode='a', header=False)
+    df = pd.read_csv(tempfile, chunk_size=5000)
+    return df
+
+
+def elasticsearch_fetch(uri, auth, json_body={}):
+    headers = {
+        'Content-Type': 'application/json',
+    }
+
+    scroll_id = ''
+    total = 0
+    while True:
+        if scroll_id:
+            uri = "https://imunizacao-es.saude.gov.br/_search/scroll"
+            json_body['scroll_id'] = scroll_id
+            json_body['scroll'] = '1m'
+            if 'query' in json_body:
+                del json_body['query'] # for the continuation of the download, query parameter is not allowed
+                del json_body['size']
+        try:
+            response = requests.post(uri, auth=auth, headers=headers, json=json_body)
+            text = response.text
+            try:
+                resp = json.loads(text)
+            except JSONDecodeError:
+                resp = text
+        except Exception as error:
+            print('\nelasticsearch_fetch() error:', error)
+            raise error
+        try:
+            if resp['hits']['hits'] == []:
+                break
+        except KeyError:
+            print(resp)
+        total += len(resp['hits']['hits'])
+        print(f"Downloaded {total} records\r", end='')
+        # print(resp)
+        # print(uri)
+        yield [h['_source'] for h in resp['hits']['hits']]
+        if '_scroll_id' in resp:
+            scroll_id = resp['_scroll_id']
+
+
+if __name__ == "__main__":
+    print(download_covid('ba'))