Skip to content

Commit

Permalink
Implement get_available_years for all online_data databases
Browse files Browse the repository at this point in the history
  • Loading branch information
luabida committed Dec 11, 2023
1 parent e031c50 commit 368c22c
Show file tree
Hide file tree
Showing 12 changed files with 170 additions and 52 deletions.
10 changes: 7 additions & 3 deletions pysus/ftp/databases/cnes.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,13 @@ def load(
self.__loaded__.add(directory.name)
return self

def describe(self, file: File):
def describe(self, file: File) -> dict:
if not isinstance(file, File):
return file
return {}

if file.name == "GMufAAmm":
# Leftover
return {}

if file.extension.upper() in [".DBC", ".DBF"]:
group, _uf, year, month = self.format(file)
Expand All @@ -92,7 +96,7 @@ def describe(self, file: File):
}

return description
return file
return {}

def format(self, file: File) -> tuple:
group, _uf = file.name[:2].upper(), file.name[2:4].upper()
Expand Down
6 changes: 3 additions & 3 deletions pysus/ftp/databases/pni.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Union, Optional
from typing import List, Union, Optional, Literal

from pysus.ftp import Database, Directory, File
from pysus.ftp.utils import zfill_year, to_list, parse_UFs, UFs
Expand Down Expand Up @@ -65,15 +65,15 @@ def format(self, file: File) -> tuple:

def get_files(
self,
group: Union[List[str], str],
group: Union[list, Literal["CNPI", "DPNI"]],
uf: Optional[Union[List[str], str]] = None,
year: Optional[Union[list, str, int]] = None,
) -> List[File]:
files = list(filter(
lambda f: f.extension.upper() in [".DBC", ".DBF"], self.files
))

groups = list(self.groups)
groups = [gr.upper() for gr in to_list(group)]

if not all(gr in list(self.groups) for gr in groups):
raise ValueError(
Expand Down
2 changes: 1 addition & 1 deletion pysus/ftp/databases/sia.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def get_files(

if not all(gr in list(self.groups) for gr in groups):
raise ValueError(
"Unknown SIH Group(s): "
"Unknown SIA Group(s): "
f"{set(groups).difference(list(self.groups))}"
)

Expand Down
4 changes: 2 additions & 2 deletions pysus/ftp/databases/sinan.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ def get_files(

if codes and not all(code in self.diseases for code in codes):
raise ValueError(
f"Unknown disease(s): {set(
codes).difference(set(self.diseases))}"
"Unknown disease(s): "
f"{set(codes).difference(set(self.diseases))}"
)

files = list(filter(lambda f: self.format(f)[0] in codes, files))
Expand Down
31 changes: 23 additions & 8 deletions pysus/online_data/CIHA.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,26 @@
"""
from typing import Union

from pysus.online_data import FTP_Downloader
from pysus.ftp.databases.ciha import CIHA
from pysus.ftp import CACHEPATH

ciha = CIHA().load()


def get_available_years(
states: Union[list, str] = None,
months: Union[str, int, list] = None
) -> dict[str:set[int]]:
"""
Fetch available years for the `states` and/or `months`.
:param states: UF code. E.g: "SP" or ["SP", "RJ"]
:param months: month or months, 2 digits. E.g.: 1 or [1, 2]
:return: list of years in integers
"""

files = ciha.get_files(uf=states, month=months)
return sorted(list(set([ciha.describe(f)["year"] for f in files])))


def download(
states: Union[str, list],
Expand All @@ -19,14 +36,12 @@ def download(
data_dir: str = CACHEPATH,
) -> list:
"""
Download CIHA records for state, year and month and returns dataframe
Download CIHA records for state, year and month and returns the Parquets
files as a list of PartquetData
:param months: 1 to 12, can be a list
:param states: 2 letter state code,
:param years: 4 digit integer
"""
return FTP_Downloader('CIHA').download(
UFs=states,
years=years,
months=months,
local_dir=data_dir,
)

files = ciha.get_files(uf=states, year=years, month=months)
return ciha.download(files, local_dir=data_dir)
32 changes: 32 additions & 0 deletions pysus/online_data/CNES.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
from pysus.ftp.databases.cnes import CNES
from pysus.ftp import CACHEPATH


cnes = CNES().load()


group_dict = {
'LT': ['Leitos - A partir de Out/2005', 10, 2005],
'ST': ['Estabelecimentos - A partir de Ago/2005', 8, 2005],
Expand All @@ -22,6 +24,36 @@
}


def get_available_years(
group: str,
states: Union[str, list] = None,
months: Union[str, list, int] = None,
):
"""
Get CNES years for group and/or state and/or month and returns a
list of years
:param group:
LT – Leitos - A partir de Out/2005
ST – Estabelecimentos - A partir de Ago/2005
DC - Dados Complementares - A partir de Ago/2005
EQ – Equipamentos - A partir de Ago/2005
SR - Serviço Especializado - A partir de Ago/2005
HB – Habilitação - A partir de Mar/2007
PF – Profissional - A partir de Ago/2005
EP – Equipes - A partir de Abr/2007
IN – Incentivos - A partir de Nov/2007
RC - Regra Contratual - A partir de Mar/2007
EE - Estabelecimento de Ensino - A partir de Mar/2007
EF - Estabelecimento Filantrópico - A partir de Mar/2007
GM - Gestão e Metas - A partir de Jun/2007
:param months: 1 to 12, can be a list of years
:param states: 2 letter state code, can be a list of UFs
"""
cnes.load(group)
files = cnes.get_files(group, uf=states, month=months)
return sorted(list(set(cnes.describe(f)["year"] for f in files)))


def download(
group: str,
states: Union[str, list],
Expand Down
44 changes: 23 additions & 21 deletions pysus/online_data/PNI.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,39 @@
"""
Download data from the national immunization program
"""
from typing import Union
from typing import Union, Literal

from pysus.online_data import FTP_Downloader, FTP_Inspect
from pysus.ftp.databases.pni import PNI
from pysus.ftp import CACHEPATH


pni = PNI().load()


def get_available_years(group, states):
"""
Fetch available years for `group` and/or `months`.
:param group: PNI group, options are "CPNI" or "DPNI"
:param state: UF code, can be a list. E.g: "SP" or ["SP", "RJ"]
:return: list of available years
"""
files = pni.get_files(group=group, uf=states)
return sorted(list(set(pni.describe(f)["year"] for f in files)))


def download(
group: Union[list, Literal["CNPI", "DPNI"]],
states: Union[str, list],
years: Union[str, list, int],
data_dir: str = CACHEPATH,
) -> list:
"""
Download imunization records for a given States and years.
:param state: uf two letter code, can be a list
:param year: year in 4 digits, can be a list
:param group: PNI group, options are "CPNI" or "DPNI"
:param state: uf two letter code, can be a list. E.g: "SP" or ["SP", "RJ"]
:param year: year in 4 digits, can be a list. E.g: 1 or [1, 2, 3]
:param data_dir: directory where data will be downloaded
:return: list of downloaded parquet paths
:return: list of downloaded ParquetData
"""
return FTP_Downloader('PNI').download(
PNI_group='CPNI', UFs=states, years=years, local_dir=data_dir
)


def get_available_years(state):
"""
Fetch available years (dbf names) for the `state`.
:param state: uf code
:return: list of strings (filenames)
"""
return FTP_Inspect('PNI').list_available_years(UF=state, PNI_group='CPNI')


def available_docs():
return FTP_Inspect('PNI').list_all(PNI_group='CPNI')
files = pni.get_files(group, uf=states, year=years)
return pni.download(files, local_dir=data_dir)
30 changes: 29 additions & 1 deletion pysus/online_data/SIA.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,34 @@
}


def get_available_years(
group: str,
states: Union[str, list] = None,
months: Union[str, list, int] = None,
):
"""
Get SIA years for group and/or state and/or month and returns a list of years
:param group:
PA: Produção Ambulatorial (7, 1994)
BI: Boletim de Produção Ambulatorial individualizado (1, 2008)
AD: APAC de Laudos Diversos (1, 2008)
AM: APAC de Medicamentos (1, 2008)
AN: APAC de Nefrologia (1, 2008)
AQ: APAC de Quimioterapia (1, 2008)
AR: APAC de Radioterapia (1, 2008)
AB: APAC de Cirurgia Bariátrica (1, 2008)
ACF: APAC de Confecção de Fístula (1, 2008)
ATD: APAC de Tratamento Dialítico (1, 2008)
AMP: APAC de Acompanhamento Multiprofissional (1, 2008)
SAD: RAAS de Atenção Domiciliar (1, 2008)
PS: RAAS Psicossocial (1, 2008)
:param months: 1 to 12, can be a list of years
:param states: 2 letter state code, can be a list of UFs
"""
files = sia.get_files(group, uf=states, month=months)
return sorted(list(set(sia.describe(f)["year"] for f in files)))


def show_datatypes():
pprint(group_dict)

Expand All @@ -50,7 +78,7 @@ def download(
:param months: 1 to 12, can be a list
:param data_dir: whether to cache files locally. default is True
:param group: SIA groups. For all groups, refer to `sia.groups`
:return: list of downloaded files
:return: list of downloaded ParquetData
"""
files = sia.get_files(
group=groups, uf=states, year=years, month=months
Expand Down
24 changes: 23 additions & 1 deletion pysus/online_data/SIH.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,28 @@
sih = SIH().load()


def get_available_years(
group: str,
states: Union[str, list] = None,
months: Union[str, list, int] = None,
) -> list:
"""
Get SIH years for group and/or state and/or month and returns a list of years
:param group:
RD: AIH Reduzida
RJ: AIH Rejeitada
ER: AIH Rejeitada com erro
SP: Serviços Profissionais
CH: Cadastro Hospitalar
CM: # TODO
:param months: 1 to 12, can be a list of years. E.g.: 1 or [1, 2, 3]
:param states: 2 letter uf code, can be a list. E.g: "SP" or ["SP", "RJ"]
:return: list of available years
"""
files = sih.get_files(group, uf=states, month=months)
return sorted(list(set(sih.describe(f)["year"] for f in files)))


def download(
states: Union[str, list],
years: Union[str, list, int],
Expand All @@ -27,7 +49,7 @@ def download(
:param groups: the groups of datasets to be downloaded.
See `sih.groups`
:param data_dir: Directory where parquets will be downloaded.
:return: list with the downloaded files
:return: list with the downloaded files as ParquetData objects
"""
files = sih.get_files(
group=groups, uf=states, month=months, year=years
Expand Down
15 changes: 15 additions & 0 deletions pysus/online_data/SIM.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,21 @@
sim = SIM().load()


def get_available_years(
group: str,
states: Union[str, list] = None,
months: Union[str, list, int] = None,
) -> list:
"""
Get SIH years for group and/or state and/or month and returns a list of years
:param group: CID9 or CID10
:param states: 2 letter uf code, can be a list. E.g: "SP" or ["SP", "RJ"]
:return: list of available years
"""
files = sim.get_files(group, uf=states)
return sorted(list(set(sim.describe(f)["year"] for f in files)))


def download(
groups: Union[str, list],
states: Union[str, list],
Expand Down
3 changes: 2 additions & 1 deletion pysus/online_data/SINAN.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def get_available_years(disease_code: str) -> list:
:param disease_code: Disease code. See `SINAN.list_diseases` for valid codes
:return: A list of DBC files from a specific disease found in the FTP Server.
"""
return sinan.get_files(dis_code=disease_code)
files = sinan.get_files(dis_code=disease_code)
return sorted(list(set(sinan.describe(f)["year"] for f in files)))


def download(
Expand Down
21 changes: 10 additions & 11 deletions pysus/online_data/sinasc.py → pysus/online_data/SINASC.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
sinasc = SINASC().load()


def get_available_years(states):
"""
Get SIH years for states
:param states: 2 letter UF code, can be a list. E.g: "SP" or ["SP", "RJ"]
:return: list of available years
"""
files = sinasc.get_files(["DN", "DNR"], uf=states)
return sorted(list(set(sinasc.describe(f)["year"] for f in files)))


def download(
groups: Union[str, list],
states: Union[str, list],
Expand All @@ -28,14 +38,3 @@ def download(
"""
files = sinasc.get_files(groups, uf=states, year=years)
return sinasc.download(files, local_dir=data_dir)


def get_available_years(state):
years = list(set([f.name[-2:] for f in sinasc.files]))
files = set(sinasc.get_files(["DN", "DNR"], uf=state, year=years))

def sort_year(file):
_, year = sinasc.format(file)
return int(year)

return sorted(files, key=sort_year)

0 comments on commit 368c22c

Please sign in to comment.