diff --git a/pysus/ftp/__init__.py b/pysus/ftp/__init__.py index 71f50d0..93d0dd0 100644 --- a/pysus/ftp/__init__.py +++ b/pysus/ftp/__init__.py @@ -5,7 +5,19 @@ import pathlib from datetime import datetime from ftplib import FTP -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import ( + Any, + Dict, + Final, + List, + Optional, + Protocol, + Tuple, + TypedDict, + TypeVar, + Union, + runtime_checkable, +) import humanize from aioftp import Client @@ -14,330 +26,342 @@ from tqdm import tqdm from typing_extensions import Self -CACHEPATH = os.getenv( +# Type aliases +PathLike = Union[str, pathlib.Path] +FileContent = Dict[str, Union["Directory", "File"]] +T = TypeVar("T") + +# Constants +CACHEPATH: Final[str] = os.getenv( "PYSUS_CACHEPATH", os.path.join(str(pathlib.Path.home()), "pysus") ) - -__cachepath__ = pathlib.Path(CACHEPATH) +__cachepath__: Final[pathlib.Path] = pathlib.Path(CACHEPATH) __cachepath__.mkdir(exist_ok=True) -def to_list(ite: Any) -> list: +def to_list(item: Union[T, List[T], Tuple[T, ...], None]) -> List[T]: """Parse any builtin data type into a list""" - return ( - [ite] if type(ite) in [str, float, int, Directory, File] else list(ite) - ) + if item is None: + return [] + return [item] if not isinstance(item, (list, tuple)) else list(item) -class File: - """ - FTP File class. This class will contain methods for interacting with - files inside DataSUS FTP server. The databases will be responsible for - parsing the files found for each db into File classes, enabling the - databases' files to share state and its reusability. +# Cache storage +DIRECTORY_CACHE: Dict[str, "Directory"] = {} - Parameters - path [str]: entire directory path where the file is located - inside the FTP server - name [str]: basename of the file - info [dict]: a dict containing the keys [size, type, modify], which - are present in every FTP server. In PySUS, this info - is extract using `line_file_parser` with FTP LIST. - Methods - download(local_dir): extract the file to local_dir - async_download(local_dir): async extract the file to local_dir - """ +class FileInfo(TypedDict): + """File information dictionary type""" - name: str - extension: str - basename: str - path: str - # parent: Directory # TODO: This causes too much overhead - __info__: dict + size: Union[int, str] + type: str + modify: datetime - def __init__(self, path: str, name: str, info: dict) -> None: - name, extension = os.path.splitext(name) - self.name = name - self.extension = extension - self.basename = self.name + self.extension - self.path = ( - path + self.basename - if path.endswith("/") - else path + "/" + self.basename - ) - ppath = self.path.replace(self.basename, "") - self.parent_path = ppath[:-1] if ppath.endswith("/") else ppath - self.__info__ = info - def __str__(self) -> str: - return str(self.basename) +@runtime_checkable +class Downloadable(Protocol): + async def download(self, local_dir: str) -> Data: + """Protocol for downloadable objects""" + ... - def __repr__(self) -> str: - return str(self.basename) - def __hash__(self): - return hash(self.path) +class FTPSingleton: + """Singleton FTP client manager""" - def __eq__(self, other): - if isinstance(other, File): - return self.path == other.path - return False + _instance: Optional[FTP] = None - @property - def info(self): - """ - Parse File info to human format - """ - info = {} - info["size"] = humanize.naturalsize(self.__info__["size"]) - info["type"] = self.extension[1:].upper() + " file" - info["modify"] = self.__info__["modify"].strftime("%Y-%m-%d %I:%M%p") - return info - - def download(self, local_dir: str = CACHEPATH, _pbar=None) -> Data: - _dir = pathlib.Path(local_dir) - _dir.mkdir(exist_ok=True, parents=True) - filepath = _dir / self.basename - filesize = int(self.__info__["size"]) + @classmethod + def get_instance(cls) -> FTP: + """Get or create the singleton FTP instance""" + if cls._instance is None or not cls._instance.sock: + cls._instance = FTP("ftp.datasus.gov.br") + cls._instance.login() + return cls._instance - if _pbar: - _pbar.unit = "B" - _pbar.unit_scale = True - _pbar.reset(total=filesize) + @classmethod + def close(cls) -> None: + """Close the singleton FTP instance""" + if cls._instance and cls._instance.sock: + cls._instance.close() + cls._instance = None - _parquet = filepath.with_suffix(".parquet") - if _parquet.exists(): - if _pbar: - _pbar.update(filesize - _pbar.n) - return Data(str(_parquet), _pbar=_pbar) - _dbf = filepath.with_suffix(".dbf") - if _dbf.exists(): - if _pbar: - _pbar.update(filesize - _pbar.n) - return Data(str(_dbf), _pbar=_pbar) +class File: + """ + FTP File representation with improved type safety. + + This class provides methods for interacting with files on the DataSUS FTP + server. It includes functionality for downloading files synchronously and + asynchronously, as well as retrieving file information in a human-readable + format. + + Attributes: + name (str): The name of the file without the extension. + extension (str): The file extension. + basename (str): The full name of the file including the extension. + path (str): The full path to the file on the FTP server. + parent_path (str): The directory path where the file is located on the + FTP server. + __info (FileInfo): Metadata about the file, including size, type, and + modification date. + + Methods: + info() -> Dict[str, str]: + Returns a dictionary with human-readable file information, + including size, type, and modification date. + + download( + local_dir: str = CACHEPATH, _pbar: Optional[tqdm] = None + ) -> Data: + Downloads the file to the specified local directory. If a progress + bar (_pbar) is provided, it updates the progress bar during the + download. + + async_download(local_dir: str = CACHEPATH) -> Data: + Asynchronously downloads the file to the specified local directory. + + _line_parser(file_line: bytes) -> Tuple[str, Dict[str, Any]]: + Static method to parse a line from the FTP LIST command and + extract file information. + """ + + def __init__(self, path: str, name: str, info: FileInfo) -> None: + self.name, self.extension = os.path.splitext(name) + self.basename: str = f"{self.name}{self.extension}" + self.path: str = ( + f"{path}/{self.basename}" + if not path.endswith("/") + else f"{path}{self.basename}" + ) + self.parent_path: str = os.path.dirname(self.path) + self.__info: FileInfo = info - if filepath.exists(): - if _pbar: - _pbar.update(filesize - _pbar.n) - return Data(str(filepath), _pbar=_pbar) + @property + def info(self) -> Dict[str, str]: + """Returns a dictionary with human-readable file information""" + return { + "size": humanize.naturalsize(self.__info["size"]), + "type": f"{self.extension[1:].upper()} file", + "modify": self.__info["modify"].strftime("%Y-%m-%d %I:%M%p"), + } + + def download( + self, local_dir: str = CACHEPATH, _pbar: Optional[tqdm] = None + ) -> Data: + """Downloads the file to the specified local directory""" + target_dir = pathlib.Path(local_dir) + target_dir.mkdir(exist_ok=True, parents=True) + + filepath = target_dir / self.basename + filesize = int(self.__info["size"]) + + # Check for existing files + for ext in (".parquet", ".dbf", ""): + existing = filepath.with_suffix(ext) + if existing.exists(): + if _pbar: + _pbar.update(filesize - _pbar.n) + return Data(str(existing), _pbar=_pbar) # type: ignore if _pbar: - _pbar.set_description(f"{self.basename}") + _pbar.unit = "B" + _pbar.unit_scale = True + _pbar.reset(total=filesize) + _pbar.set_description(self.basename) try: - ftp = FTP("ftp.datasus.gov.br") - ftp.login() - output = open(f"{filepath}", "wb") + ftp = FTPSingleton.get_instance() + with open(filepath, "wb") as output: - def callback(data): - output.write(data) - if _pbar: - _pbar.update(len(data)) + def callback(data: bytes) -> None: + output.write(data) + if _pbar: + _pbar.update(len(data)) + + ftp.retrbinary(f"RETR {self.path}", callback) - ftp.retrbinary( - f"RETR {self.path}", - callback, - ) except Exception as exc: + if filepath.exists(): + filepath.unlink() raise exc finally: - ftp.close() - output.close() + FTPSingleton.close() if _pbar: _pbar.update(filesize - _pbar.n) - return Data(str(filepath), _pbar=_pbar) + return Data(str(filepath), _pbar=_pbar) # type: ignore async def async_download(self, local_dir: str = CACHEPATH) -> Data: - # aioftp.Client.parse_list_line_custom - def line_file_parser(file_line): - line = file_line.decode("utf-8") - info = {} - if "" in line: - date, time, _, *name = str(line).strip().split() - info["size"] = 0 - info["type"] = "dir" - name = " ".join(name) - else: - date, time, size, name = str(line).strip().split() - info["size"] = size - info["type"] = "file" - - modify = datetime.strptime( - " ".join([date, time]), "%m-%d-%y %I:%M%p" - ) - info["modify"] = modify.strftime("%m/%d/%Y %I:%M%p") - - return name, info - - _dir = pathlib.Path(local_dir) - _dir.mkdir(exist_ok=True, parents=True) - filepath = _dir / self.basename - - output = ( - local_dir + str(self.basename) - if local_dir.endswith("/") - else local_dir + "/" + str(self.basename) - ) - - _parquet = filepath.with_suffix(".parquet") - if _parquet.exists(): - return Data(str(_parquet)) - - _dbf = filepath.with_suffix(".dbf") - if _dbf.exists(): - return Data(str(_dbf)) + """ + Asynchronously downloads the file to the specified local directory + """ + target_dir = pathlib.Path(local_dir) + target_dir.mkdir(exist_ok=True, parents=True) + filepath = target_dir / self.basename - if filepath.exists(): - return Data(output) + # Check existing files + for ext in (".parquet", ".dbf", ""): + existing = filepath.with_suffix(ext) + if existing.exists(): + return Data(str(existing)) # type: ignore async with Client.context( - host="ftp.datasus.gov.br", - parse_list_line_custom=line_file_parser, + host="ftp.datasus.gov.br", parse_list_line_custom=self._line_parser ) as client: await client.login() - await client.download(self.path, output, write_into=True) + await client.download(self.path, str(filepath), write_into=True) + + return Data(str(filepath)) # type: ignore + + @staticmethod + def _line_parser(file_line: bytes) -> Tuple[str, Dict[str, Any]]: + """Static method to parse a line from the FTP LIST command and extract + file information + """ + line = file_line.decode("utf-8") + if "" in line: + date, time, _, *name = line.strip().split() + info = {"size": 0, "type": "dir"} + name = " ".join(name) + else: + date, time, size, name = line.strip().split() + info = {"size": size, "type": "file"} + + modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p") + info["modify"] = modify.strftime("%m/%d/%Y %I:%M%p") + return name, info - return Data(output) + def __str__(self) -> str: + return str(self.basename) + def __repr__(self) -> str: + return str(self.basename) -CACHE: Dict = {} + def __hash__(self): + return hash(self.path) + + def __eq__(self, other): + if isinstance(other, File): + return self.path == other.path + return False class Directory: """ - FTP Directory class. The Directory does not load its content when called. - Instead, it will cache all the parents Directories until root "/". To load - the content, the attr content or the method load() should be called. When - firstly instantiated, it will CWD into the path provided and store self and - all parents in cache - - Parameters - path [str]: entire directory path where the directory is located - inside the FTP server - Attrs - name [str]: Directory name - path [str]: Directory path - parent [Directory]: parent Directory - loaded [bool]: True if content is loaded - content [dict[str:[File, Directory]]]: A dictionary with name and File - or Directory inside the Directory (e.g: "name": Directory("name")) + Directory class with caching and lazy loading. + + The Directory class represents a directory in a file system and includes + mechanisms for caching instances and lazy loading of directory content. + When a Directory instance is created, it normalizes the provided path + and caches the instance. The content of the directory is not loaded + immediately; instead, it is loaded when the `content` property or the + `load` method is accessed or called. + + Attributes: + path (str): The normalized path of the directory. + name (str): The name of the directory. + parent (Directory): The parent directory instance. + loaded (bool): Indicates whether the directory content has been loaded. + __content__ (Dict[str, Union[File, Directory]]): A dictionary + containing the directory's content, with names as keys and File or + Directory instances as values. + + Methods: + _normalize_path(path: str) -> str: Normalizes the given path. + _get_root_directory() -> Directory: Returns the root directory + instance, creating it if necessary. + _init_root_child(name: str) -> None: Initializes a root child + directory. + _init_regular(parent_path: str, name: str) -> None: Initializes a + regular directory. + content() -> List[Union[Directory, File]]: Returns the content of the + directory, loading it if necessary. + load() -> Self: Loads the content of the directory and marks it as + loaded. """ name: str path: str - parent: Directory - loaded: bool = False - __content__: Dict = {} - - def __new__(cls, path: str, _is_root_child=False) -> Directory: - ftp = FTP("ftp.datasus.gov.br") - path = f"/{path}" if not str(path).startswith("/") else path - path = path[:-1] if path.endswith("/") else path - - if not path: # if root ("/") - path = "/" - try: - directory = CACHE["/"] - except KeyError: - directory = object.__new__(cls) - directory.parent = directory - directory.name = "/" - directory.path = "/" - directory.loaded = False - directory.__content__ = {} - CACHE["/"] = directory - return directory - - parent_path, name = path.rsplit("/", maxsplit=1) + parent: "Directory" + loaded: bool + __content__: Dict[str, Union[File, "Directory"]] - if _is_root_child: - # WARNING: This parameter is for internal meanings, do not use - directory = object.__new__(cls) - directory.parent = CACHE["/"] - directory.name = name - CACHE[path] = directory - return directory + def __new__(cls, path: str, _is_root_child: bool = False) -> "Directory": + normalized_path = os.path.normpath(path) - try: - directory = CACHE[path] # Recursive and cached instantiation - except KeyError: - try: - ftp.connect() - ftp.login() - ftp.cwd(path) # Checks if parent dir exists on DATASUS - except Exception as exc: - if "cannot find the path" in str(exc): - logger.error(f"Not a directory {path}") - elif "access is denied" in str(exc).lower(): - # Forbidden access, exists in ftp but returns Forbidden - directory = object.__new__(cls) - directory.parent = Directory(parent_path) # Recursive - directory.name = name - directory.loaded = False - directory.__content__ = {} - CACHE[path] = directory - return directory - raise exc - finally: - ftp.close() - - directory = object.__new__(cls) - # TODO: In next step, all the parent directories will be generated, - # but it cwds into every parent, while its certain that they exist - # in ftp server. The best approach should be to skip the cwds - directory.parent = Directory(parent_path) # Recursive - directory.name = name - directory.loaded = False - directory.__content__ = {} - CACHE[path] = directory - return directory - - def __init__(self, path: str, _is_root_child=False) -> None: - path = f"/{path}" if not str(path).startswith("/") else path - path = path[:-1] if path.endswith("/") else path - if not path: - path = "/" - self.path = path + # Handle root directory case + if normalized_path == "/": + return cls._get_root_directory() - def __str__(self) -> str: - return self.path + # Return cached instance if exists + if normalized_path in DIRECTORY_CACHE: + return DIRECTORY_CACHE[normalized_path] - def __repr__(self) -> str: - return self.path + # Use os.path.split for reliable path splitting + parent_path, name = os.path.split(normalized_path) - def __hash__(self): - return hash(self.path) + # Handle empty parent path + if not parent_path: + parent_path = "/" + # Handle parent paths that don't start with / + elif not parent_path.startswith("/"): + parent_path = "/" + parent_path - def __eq__(self, other): - if isinstance(other, Directory): - return self.path == other.path - return False + # Create new instance + instance = super().__new__(cls) + instance.path = normalized_path - def __truediv__(self, path: str): - if isinstance(path, str): - path = f"/{path}" if not path.startswith("/") else path - path = path[:-1] if path.endswith("/") else path - return Directory(self.path + path) - raise ValueError("Unsupported division") + if _is_root_child: + instance._init_root_child(name) + else: + instance._init_regular(parent_path, name) + + DIRECTORY_CACHE[normalized_path] = instance + return instance + + @staticmethod + def _normalize_path(path: str) -> str: + """Normalizes the given path""" + path = f"/{path}" if not path.startswith("/") else path + return path.removesuffix("/") + + @classmethod + def _get_root_directory(cls) -> Directory: + """Returns the root directory instance, creating it if necessary""" + if "/" not in DIRECTORY_CACHE: + root = super().__new__(cls) + root.parent = root + root.name = "/" + root.path = "/" + root.loaded = False + root.__content__ = {} + DIRECTORY_CACHE["/"] = root + return DIRECTORY_CACHE["/"] + + def _init_root_child(self, name: str) -> None: + """Initializes a root child directory""" + self.parent = DIRECTORY_CACHE["/"] + self.name = name + self.loaded = False + self.__content__ = {} + + def _init_regular(self, parent_path: str, name: str) -> None: + """Initializes a regular directory""" + self.parent = Directory(parent_path) + self.name = name + self.loaded = False + self.__content__ = {} @property - def content(self): - """ - Returns a list of Files and Directories in the Directory, will load - if needed - """ + def content(self) -> List[Union[Directory, File]]: + """Returns the content of the directory, loading it if necessary""" if not self.loaded: self.load() return list(self.__content__.values()) - def load(self): - """ - The content of a Directory must be explicitly loaded - """ - self.__content__ |= load_path(self.path) + def load(self) -> Self: + """Loads the content of the directory and marks it as loaded""" + self.__content__ |= load_directory_content(self.path) self.loaded = True return self @@ -348,83 +372,63 @@ def reload(self): self.loaded = False return self.load() - def is_parent(self, other: Union[Self, File]) -> bool: - """ - Checks if Directory or File is inside (or at any subdir) of self. - """ - if self.path == "/": - return True - - target = other - while target.path != "/": + def __str__(self) -> str: + return self.path - if self.path == target.path: - return True + def __repr__(self) -> str: + return self.path - if isinstance(other, File): - # TODO: Implement parent logic on File (too much overhead) - target = Directory(other.parent_path) - else: - target = target.parent + def __hash__(self): + return hash(self.path) + def __eq__(self, other): + if isinstance(other, Directory): + return self.path == other.path return False -CACHE["/"] = Directory("/") - - -def load_path(path: str) -> Dict[str, Union[Directory, File]]: - """ - This method is responsible for listing all the FTP directory's. - Converts the items found within a valid DATASUS path into `File`s or - Directories, returning its content. - """ - path = str(path) - content = {} - ftp = FTP("ftp.datasus.gov.br") +def load_directory_content(path: str) -> FileContent: + """Directory content loading""" + content: FileContent = {} try: - ftp.connect() - ftp.login() + ftp = FTPSingleton.get_instance() ftp.cwd(path) + path = path.removesuffix("/") - def line_file_parser(file_line): - info = {} - if "" in file_line: - date, time, _, *name = str(file_line).strip().split() - info["size"] = 0 - info["type"] = "dir" - name = " ".join(name) + def line_parser(line: str): + if "" in line: + date, time, _, name = line.strip().split(maxsplit=3) modify = datetime.strptime( - " ".join([date, time]), "%m-%d-%y %I:%M%p" - ) - info["modify"] = modify - xpath = ( - path + name if path.endswith("/") else path + "/" + name + f"{date} {time}", "%m-%d-%y %I:%M%p" ) + info = {"size": 0, "type": "dir", "modify": modify} + xpath = f"{path}/{name}" content[name] = Directory(xpath) else: - date, time, size, name = str(file_line).strip().split() - info["size"] = size - info["type"] = "file" + date, time, size, name = line.strip().split(maxsplit=3) modify = datetime.strptime( - " ".join([date, time]), "%m-%d-%y %I:%M%p" + f"{date} {time}", "%m-%d-%y %I:%M%p" ) - info["modify"] = modify + info: FileInfo = { + "size": size, + "type": "file", + "modify": modify, + } content[name] = File(path, name, info) - ftp.retrlines("LIST", line_file_parser) + ftp.retrlines("LIST", line_parser) except Exception as exc: raise exc finally: - ftp.close() + FTPSingleton.close() - upper_names = [n.upper() for n in content] - to_remove = [] - for name in content: - if ".DBF" in name.upper(): - if name.upper().replace(".DBF", ".DBC") in upper_names: - to_remove.append(name) + to_remove = [ + name + for name in content + if name.upper().endswith(".DBF") + and name.upper().replace(".DBF", ".DBC") in content + ] for name in to_remove: del content[name] @@ -489,31 +493,29 @@ def files(self) -> List[File]: Lists Files inside content. To load a specific Directory inside content, just `load()` this directory and list files again. """ - return list(filter(lambda f: isinstance(f, File), self.content)) + return [f for f in self.content if isinstance(f, File)] def load( - self, directories: Optional[Union[Directory, List[Directory]]] = None + self, + directories: Optional[ + Union[Directory, List[Directory], Tuple[Directory, ...]] + ] = None, ) -> Database: """ Loads specific directories to Database content. Will aggregate the files found within Directories into Database.content. """ if not directories: - directories = self.paths - - directories = to_list(directories) + directories = list(self.paths) - for i, path in enumerate(directories): - if isinstance(path, str): - path = Directory(path) - directories[i] = path + directories_list = to_list(directories) - if not isinstance(path, Directory): - raise ValueError("path must a valid DATASUS directory") + for directory in directories_list: + if not isinstance(directory, Directory): + raise ValueError("Invalid directory provided.") - for directory in directories: directory.load() - self.__content__ |= directory.__content__ + self.__content__.update(directory.__content__) return self def describe(self, file: File) -> dict: diff --git a/pysus/online_data/ESUS.py b/pysus/online_data/ESUS.py index dbe9008..ef2b990 100644 --- a/pysus/online_data/ESUS.py +++ b/pysus/online_data/ESUS.py @@ -21,7 +21,7 @@ def download(uf, cache=True, checkmemory=True): today = date.today() dt = today.strftime("_%d_%m_%Y") base = f"desc-esus-notifica-estado-{uf}" # desc-notificacoes-esusve- - url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br" + url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br" # noqa: E231 out = f"ESUS_{uf}_{dt}.parquet" cachefile = os.path.join(CACHEPATH, out) @@ -36,7 +36,7 @@ def download(uf, cache=True, checkmemory=True): fname = fetch(base, uf, url) size = os.stat(fname).st_size if size > 50e6 and checkmemory: - print(f"Downloaded data is to large:{size / 1e6} MB compressed.") + print(f"Downloaded data is to large: {size / 1e6} MB compressed.") print( "Only loading the first 1000 rows. If your computer has enough" + " memory, set 'checkmemory' to False" diff --git a/pysus/online_data/IBGE.py b/pysus/online_data/IBGE.py index 3771b2e..33fba90 100644 --- a/pysus/online_data/IBGE.py +++ b/pysus/online_data/IBGE.py @@ -1,6 +1,7 @@ """ Helper functions to download official statistics from IBGE SIDRA """ + import ssl # Builtin from pathlib import Path from tempfile import TemporaryDirectory @@ -142,7 +143,7 @@ def get_sidra_table( print(f"Requesting data from {url}") try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: df = pd.DataFrame(response.json()) except HTTPError: response = requests.get(url) @@ -163,7 +164,7 @@ def list_agregados(**kwargs): url += "&".join([f"{k}={v}" for k, v in kwargs.items()]) print(f"Fetching Data groupings from {url}") try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: table = pd.DataFrame(response.json()) except requests.exceptions.SSLError as e: print(f"Failed fetching aggregates: {e}") @@ -183,7 +184,7 @@ def localidades_por_agregado(agregado: int, nivel: str): """ url = APIBASE + f"agregados/{agregado}/localidades/{nivel}" try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: table = pd.DataFrame(response.json()) except Exception as e: print(f"Could not download from {url}\n{e}") @@ -199,7 +200,7 @@ def metadados(agregado: int): """ url = APIBASE + f"agregados/{agregado}/metadados" try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: data = response.json() except Exception as e: print(f"Could not download from {url}\n{e}") @@ -215,7 +216,7 @@ def lista_periodos(agregado: int): """ url = APIBASE + f"agregados/{agregado}/periodos" try: - with (get_legacy_session() as s, s.get(url) as response): + with get_legacy_session() as s, s.get(url) as response: table = pd.DataFrame(response.json()) except Exception: return None @@ -309,10 +310,10 @@ def __init__( def _fetch_JSON(self): try: print(f"Fetching {self.url}") - with (get_legacy_session() as s, s.get(self.url) as response): + with get_legacy_session() as s, s.get(self.url) as response: self.JSON = response.json() except Exception as e: - print(f"Couldn't download data:\n{e}") + print("Couldn't download data:", e, sep="\n") def to_dataframe(self): return pd.DataFrame(self.JSON) @@ -389,7 +390,7 @@ def get_population( opts = ["ALF", "ESCA", "ESCB", "IDOSO", "RENDA"] if not censo_data or censo_data not in opts: raise ValueError( - f"Incorrect `censo_data` parameter. Options: {opts}" + f"Incorrect 'censo_data' parameter. Options: {opts}" ) file = [f for f in files if censo_data in f.name][0].download() else: diff --git a/pysus/online_data/Infogripe.py b/pysus/online_data/Infogripe.py index a576219..bd496c7 100644 --- a/pysus/online_data/Infogripe.py +++ b/pysus/online_data/Infogripe.py @@ -18,6 +18,6 @@ def list_datasets(): def download(dataset_name): - url = BASEURL + DATASETS[dataset_name] + url = BASEURL + DATASETS[dataset_name] + "?inline=false" df = pd.read_csv(url, delimiter=";", decimal=",") return df diff --git a/pysus/preprocessing/ESUS.py b/pysus/preprocessing/ESUS.py index 5552f58..110215c 100644 --- a/pysus/preprocessing/ESUS.py +++ b/pysus/preprocessing/ESUS.py @@ -26,8 +26,8 @@ def cases_by_age_and_sex(UF, start="2020-03-01", end="2020-08-31"): inplace=True, ) print( - f"Removed {old_size - len(df)} rows with missing dates of symptoms," - " notification or testing" + f"Removed {old_size - len(df)} rows with missing dates of symptoms, " + "notification or testing" ) # Desconsiderando os resultados negativos ou inconclusivos @@ -52,7 +52,9 @@ def cases_by_age_and_sex(UF, start="2020-03-01", end="2020-08-31"): ini = np.arange(0, 81, 5) fin = np.arange(5, 86, 5) fin[-1] = 120 - faixa_etaria = {f"[{i},{f})": (i, f) for i, f in zip(ini, fin)} + faixa_etaria = { + f"[{i},{f})": (i, f) for i, f in zip(ini, fin) # noqa: E231 + } labels = list(faixa_etaria.keys()) df["faixa_etaria"] = [ diff --git a/pysus/tests/test_ftp.py b/pysus/tests/test_ftp.py index 19e1e01..9f78dfd 100644 --- a/pysus/tests/test_ftp.py +++ b/pysus/tests/test_ftp.py @@ -3,7 +3,7 @@ import pandas as pd from pysus.data.local import ParquetSet -from pysus.ftp import CACHE, Database, Directory, File +from pysus.ftp import DIRECTORY_CACHE, Database, Directory, File from pysus.ftp.databases import ( ciha, cnes, @@ -64,7 +64,8 @@ def test_root_directory(self): self.assertTrue(self.root.parent == self.root) # outermost parent def test_directory_cache(self): - self.assertTrue(CACHE["/"] == self.root) + self.assertTrue(DIRECTORY_CACHE["/"] == self.root) + self.assertTrue(DIRECTORY_CACHE["/"] == self.root) def test_sinan_file(self): file = Directory("/dissemin/publicos/SINAN/DADOS/FINAIS").content[0]