diff --git a/pysus/ftp/__init__.py b/pysus/ftp/__init__.py
index 71f50d0..93d0dd0 100644
--- a/pysus/ftp/__init__.py
+++ b/pysus/ftp/__init__.py
@@ -5,7 +5,19 @@
import pathlib
from datetime import datetime
from ftplib import FTP
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import (
+ Any,
+ Dict,
+ Final,
+ List,
+ Optional,
+ Protocol,
+ Tuple,
+ TypedDict,
+ TypeVar,
+ Union,
+ runtime_checkable,
+)
import humanize
from aioftp import Client
@@ -14,330 +26,342 @@
from tqdm import tqdm
from typing_extensions import Self
-CACHEPATH = os.getenv(
+# Type aliases
+PathLike = Union[str, pathlib.Path]
+FileContent = Dict[str, Union["Directory", "File"]]
+T = TypeVar("T")
+
+# Constants
+CACHEPATH: Final[str] = os.getenv(
"PYSUS_CACHEPATH", os.path.join(str(pathlib.Path.home()), "pysus")
)
-
-__cachepath__ = pathlib.Path(CACHEPATH)
+__cachepath__: Final[pathlib.Path] = pathlib.Path(CACHEPATH)
__cachepath__.mkdir(exist_ok=True)
-def to_list(ite: Any) -> list:
+def to_list(item: Union[T, List[T], Tuple[T, ...], None]) -> List[T]:
"""Parse any builtin data type into a list"""
- return (
- [ite] if type(ite) in [str, float, int, Directory, File] else list(ite)
- )
+ if item is None:
+ return []
+ return [item] if not isinstance(item, (list, tuple)) else list(item)
-class File:
- """
- FTP File class. This class will contain methods for interacting with
- files inside DataSUS FTP server. The databases will be responsible for
- parsing the files found for each db into File classes, enabling the
- databases' files to share state and its reusability.
+# Cache storage
+DIRECTORY_CACHE: Dict[str, "Directory"] = {}
- Parameters
- path [str]: entire directory path where the file is located
- inside the FTP server
- name [str]: basename of the file
- info [dict]: a dict containing the keys [size, type, modify], which
- are present in every FTP server. In PySUS, this info
- is extract using `line_file_parser` with FTP LIST.
- Methods
- download(local_dir): extract the file to local_dir
- async_download(local_dir): async extract the file to local_dir
- """
+class FileInfo(TypedDict):
+ """File information dictionary type"""
- name: str
- extension: str
- basename: str
- path: str
- # parent: Directory # TODO: This causes too much overhead
- __info__: dict
+ size: Union[int, str]
+ type: str
+ modify: datetime
- def __init__(self, path: str, name: str, info: dict) -> None:
- name, extension = os.path.splitext(name)
- self.name = name
- self.extension = extension
- self.basename = self.name + self.extension
- self.path = (
- path + self.basename
- if path.endswith("/")
- else path + "/" + self.basename
- )
- ppath = self.path.replace(self.basename, "")
- self.parent_path = ppath[:-1] if ppath.endswith("/") else ppath
- self.__info__ = info
- def __str__(self) -> str:
- return str(self.basename)
+@runtime_checkable
+class Downloadable(Protocol):
+ async def download(self, local_dir: str) -> Data:
+ """Protocol for downloadable objects"""
+ ...
- def __repr__(self) -> str:
- return str(self.basename)
- def __hash__(self):
- return hash(self.path)
+class FTPSingleton:
+ """Singleton FTP client manager"""
- def __eq__(self, other):
- if isinstance(other, File):
- return self.path == other.path
- return False
+ _instance: Optional[FTP] = None
- @property
- def info(self):
- """
- Parse File info to human format
- """
- info = {}
- info["size"] = humanize.naturalsize(self.__info__["size"])
- info["type"] = self.extension[1:].upper() + " file"
- info["modify"] = self.__info__["modify"].strftime("%Y-%m-%d %I:%M%p")
- return info
-
- def download(self, local_dir: str = CACHEPATH, _pbar=None) -> Data:
- _dir = pathlib.Path(local_dir)
- _dir.mkdir(exist_ok=True, parents=True)
- filepath = _dir / self.basename
- filesize = int(self.__info__["size"])
+ @classmethod
+ def get_instance(cls) -> FTP:
+ """Get or create the singleton FTP instance"""
+ if cls._instance is None or not cls._instance.sock:
+ cls._instance = FTP("ftp.datasus.gov.br")
+ cls._instance.login()
+ return cls._instance
- if _pbar:
- _pbar.unit = "B"
- _pbar.unit_scale = True
- _pbar.reset(total=filesize)
+ @classmethod
+ def close(cls) -> None:
+ """Close the singleton FTP instance"""
+ if cls._instance and cls._instance.sock:
+ cls._instance.close()
+ cls._instance = None
- _parquet = filepath.with_suffix(".parquet")
- if _parquet.exists():
- if _pbar:
- _pbar.update(filesize - _pbar.n)
- return Data(str(_parquet), _pbar=_pbar)
- _dbf = filepath.with_suffix(".dbf")
- if _dbf.exists():
- if _pbar:
- _pbar.update(filesize - _pbar.n)
- return Data(str(_dbf), _pbar=_pbar)
+class File:
+ """
+ FTP File representation with improved type safety.
+
+ This class provides methods for interacting with files on the DataSUS FTP
+ server. It includes functionality for downloading files synchronously and
+ asynchronously, as well as retrieving file information in a human-readable
+ format.
+
+ Attributes:
+ name (str): The name of the file without the extension.
+ extension (str): The file extension.
+ basename (str): The full name of the file including the extension.
+ path (str): The full path to the file on the FTP server.
+ parent_path (str): The directory path where the file is located on the
+ FTP server.
+ __info (FileInfo): Metadata about the file, including size, type, and
+ modification date.
+
+ Methods:
+ info() -> Dict[str, str]:
+ Returns a dictionary with human-readable file information,
+ including size, type, and modification date.
+
+ download(
+ local_dir: str = CACHEPATH, _pbar: Optional[tqdm] = None
+ ) -> Data:
+ Downloads the file to the specified local directory. If a progress
+ bar (_pbar) is provided, it updates the progress bar during the
+ download.
+
+ async_download(local_dir: str = CACHEPATH) -> Data:
+ Asynchronously downloads the file to the specified local directory.
+
+ _line_parser(file_line: bytes) -> Tuple[str, Dict[str, Any]]:
+ Static method to parse a line from the FTP LIST command and
+ extract file information.
+ """
+
+ def __init__(self, path: str, name: str, info: FileInfo) -> None:
+ self.name, self.extension = os.path.splitext(name)
+ self.basename: str = f"{self.name}{self.extension}"
+ self.path: str = (
+ f"{path}/{self.basename}"
+ if not path.endswith("/")
+ else f"{path}{self.basename}"
+ )
+ self.parent_path: str = os.path.dirname(self.path)
+ self.__info: FileInfo = info
- if filepath.exists():
- if _pbar:
- _pbar.update(filesize - _pbar.n)
- return Data(str(filepath), _pbar=_pbar)
+ @property
+ def info(self) -> Dict[str, str]:
+ """Returns a dictionary with human-readable file information"""
+ return {
+ "size": humanize.naturalsize(self.__info["size"]),
+ "type": f"{self.extension[1:].upper()} file",
+ "modify": self.__info["modify"].strftime("%Y-%m-%d %I:%M%p"),
+ }
+
+ def download(
+ self, local_dir: str = CACHEPATH, _pbar: Optional[tqdm] = None
+ ) -> Data:
+ """Downloads the file to the specified local directory"""
+ target_dir = pathlib.Path(local_dir)
+ target_dir.mkdir(exist_ok=True, parents=True)
+
+ filepath = target_dir / self.basename
+ filesize = int(self.__info["size"])
+
+ # Check for existing files
+ for ext in (".parquet", ".dbf", ""):
+ existing = filepath.with_suffix(ext)
+ if existing.exists():
+ if _pbar:
+ _pbar.update(filesize - _pbar.n)
+ return Data(str(existing), _pbar=_pbar) # type: ignore
if _pbar:
- _pbar.set_description(f"{self.basename}")
+ _pbar.unit = "B"
+ _pbar.unit_scale = True
+ _pbar.reset(total=filesize)
+ _pbar.set_description(self.basename)
try:
- ftp = FTP("ftp.datasus.gov.br")
- ftp.login()
- output = open(f"{filepath}", "wb")
+ ftp = FTPSingleton.get_instance()
+ with open(filepath, "wb") as output:
- def callback(data):
- output.write(data)
- if _pbar:
- _pbar.update(len(data))
+ def callback(data: bytes) -> None:
+ output.write(data)
+ if _pbar:
+ _pbar.update(len(data))
+
+ ftp.retrbinary(f"RETR {self.path}", callback)
- ftp.retrbinary(
- f"RETR {self.path}",
- callback,
- )
except Exception as exc:
+ if filepath.exists():
+ filepath.unlink()
raise exc
finally:
- ftp.close()
- output.close()
+ FTPSingleton.close()
if _pbar:
_pbar.update(filesize - _pbar.n)
- return Data(str(filepath), _pbar=_pbar)
+ return Data(str(filepath), _pbar=_pbar) # type: ignore
async def async_download(self, local_dir: str = CACHEPATH) -> Data:
- # aioftp.Client.parse_list_line_custom
- def line_file_parser(file_line):
- line = file_line.decode("utf-8")
- info = {}
- if "
" in line:
- date, time, _, *name = str(line).strip().split()
- info["size"] = 0
- info["type"] = "dir"
- name = " ".join(name)
- else:
- date, time, size, name = str(line).strip().split()
- info["size"] = size
- info["type"] = "file"
-
- modify = datetime.strptime(
- " ".join([date, time]), "%m-%d-%y %I:%M%p"
- )
- info["modify"] = modify.strftime("%m/%d/%Y %I:%M%p")
-
- return name, info
-
- _dir = pathlib.Path(local_dir)
- _dir.mkdir(exist_ok=True, parents=True)
- filepath = _dir / self.basename
-
- output = (
- local_dir + str(self.basename)
- if local_dir.endswith("/")
- else local_dir + "/" + str(self.basename)
- )
-
- _parquet = filepath.with_suffix(".parquet")
- if _parquet.exists():
- return Data(str(_parquet))
-
- _dbf = filepath.with_suffix(".dbf")
- if _dbf.exists():
- return Data(str(_dbf))
+ """
+ Asynchronously downloads the file to the specified local directory
+ """
+ target_dir = pathlib.Path(local_dir)
+ target_dir.mkdir(exist_ok=True, parents=True)
+ filepath = target_dir / self.basename
- if filepath.exists():
- return Data(output)
+ # Check existing files
+ for ext in (".parquet", ".dbf", ""):
+ existing = filepath.with_suffix(ext)
+ if existing.exists():
+ return Data(str(existing)) # type: ignore
async with Client.context(
- host="ftp.datasus.gov.br",
- parse_list_line_custom=line_file_parser,
+ host="ftp.datasus.gov.br", parse_list_line_custom=self._line_parser
) as client:
await client.login()
- await client.download(self.path, output, write_into=True)
+ await client.download(self.path, str(filepath), write_into=True)
+
+ return Data(str(filepath)) # type: ignore
+
+ @staticmethod
+ def _line_parser(file_line: bytes) -> Tuple[str, Dict[str, Any]]:
+ """Static method to parse a line from the FTP LIST command and extract
+ file information
+ """
+ line = file_line.decode("utf-8")
+ if "" in line:
+ date, time, _, *name = line.strip().split()
+ info = {"size": 0, "type": "dir"}
+ name = " ".join(name)
+ else:
+ date, time, size, name = line.strip().split()
+ info = {"size": size, "type": "file"}
+
+ modify = datetime.strptime(f"{date} {time}", "%m-%d-%y %I:%M%p")
+ info["modify"] = modify.strftime("%m/%d/%Y %I:%M%p")
+ return name, info
- return Data(output)
+ def __str__(self) -> str:
+ return str(self.basename)
+ def __repr__(self) -> str:
+ return str(self.basename)
-CACHE: Dict = {}
+ def __hash__(self):
+ return hash(self.path)
+
+ def __eq__(self, other):
+ if isinstance(other, File):
+ return self.path == other.path
+ return False
class Directory:
"""
- FTP Directory class. The Directory does not load its content when called.
- Instead, it will cache all the parents Directories until root "/". To load
- the content, the attr content or the method load() should be called. When
- firstly instantiated, it will CWD into the path provided and store self and
- all parents in cache
-
- Parameters
- path [str]: entire directory path where the directory is located
- inside the FTP server
- Attrs
- name [str]: Directory name
- path [str]: Directory path
- parent [Directory]: parent Directory
- loaded [bool]: True if content is loaded
- content [dict[str:[File, Directory]]]: A dictionary with name and File
- or Directory inside the Directory (e.g: "name": Directory("name"))
+ Directory class with caching and lazy loading.
+
+ The Directory class represents a directory in a file system and includes
+ mechanisms for caching instances and lazy loading of directory content.
+ When a Directory instance is created, it normalizes the provided path
+ and caches the instance. The content of the directory is not loaded
+ immediately; instead, it is loaded when the `content` property or the
+ `load` method is accessed or called.
+
+ Attributes:
+ path (str): The normalized path of the directory.
+ name (str): The name of the directory.
+ parent (Directory): The parent directory instance.
+ loaded (bool): Indicates whether the directory content has been loaded.
+ __content__ (Dict[str, Union[File, Directory]]): A dictionary
+ containing the directory's content, with names as keys and File or
+ Directory instances as values.
+
+ Methods:
+ _normalize_path(path: str) -> str: Normalizes the given path.
+ _get_root_directory() -> Directory: Returns the root directory
+ instance, creating it if necessary.
+ _init_root_child(name: str) -> None: Initializes a root child
+ directory.
+ _init_regular(parent_path: str, name: str) -> None: Initializes a
+ regular directory.
+ content() -> List[Union[Directory, File]]: Returns the content of the
+ directory, loading it if necessary.
+ load() -> Self: Loads the content of the directory and marks it as
+ loaded.
"""
name: str
path: str
- parent: Directory
- loaded: bool = False
- __content__: Dict = {}
-
- def __new__(cls, path: str, _is_root_child=False) -> Directory:
- ftp = FTP("ftp.datasus.gov.br")
- path = f"/{path}" if not str(path).startswith("/") else path
- path = path[:-1] if path.endswith("/") else path
-
- if not path: # if root ("/")
- path = "/"
- try:
- directory = CACHE["/"]
- except KeyError:
- directory = object.__new__(cls)
- directory.parent = directory
- directory.name = "/"
- directory.path = "/"
- directory.loaded = False
- directory.__content__ = {}
- CACHE["/"] = directory
- return directory
-
- parent_path, name = path.rsplit("/", maxsplit=1)
+ parent: "Directory"
+ loaded: bool
+ __content__: Dict[str, Union[File, "Directory"]]
- if _is_root_child:
- # WARNING: This parameter is for internal meanings, do not use
- directory = object.__new__(cls)
- directory.parent = CACHE["/"]
- directory.name = name
- CACHE[path] = directory
- return directory
+ def __new__(cls, path: str, _is_root_child: bool = False) -> "Directory":
+ normalized_path = os.path.normpath(path)
- try:
- directory = CACHE[path] # Recursive and cached instantiation
- except KeyError:
- try:
- ftp.connect()
- ftp.login()
- ftp.cwd(path) # Checks if parent dir exists on DATASUS
- except Exception as exc:
- if "cannot find the path" in str(exc):
- logger.error(f"Not a directory {path}")
- elif "access is denied" in str(exc).lower():
- # Forbidden access, exists in ftp but returns Forbidden
- directory = object.__new__(cls)
- directory.parent = Directory(parent_path) # Recursive
- directory.name = name
- directory.loaded = False
- directory.__content__ = {}
- CACHE[path] = directory
- return directory
- raise exc
- finally:
- ftp.close()
-
- directory = object.__new__(cls)
- # TODO: In next step, all the parent directories will be generated,
- # but it cwds into every parent, while its certain that they exist
- # in ftp server. The best approach should be to skip the cwds
- directory.parent = Directory(parent_path) # Recursive
- directory.name = name
- directory.loaded = False
- directory.__content__ = {}
- CACHE[path] = directory
- return directory
-
- def __init__(self, path: str, _is_root_child=False) -> None:
- path = f"/{path}" if not str(path).startswith("/") else path
- path = path[:-1] if path.endswith("/") else path
- if not path:
- path = "/"
- self.path = path
+ # Handle root directory case
+ if normalized_path == "/":
+ return cls._get_root_directory()
- def __str__(self) -> str:
- return self.path
+ # Return cached instance if exists
+ if normalized_path in DIRECTORY_CACHE:
+ return DIRECTORY_CACHE[normalized_path]
- def __repr__(self) -> str:
- return self.path
+ # Use os.path.split for reliable path splitting
+ parent_path, name = os.path.split(normalized_path)
- def __hash__(self):
- return hash(self.path)
+ # Handle empty parent path
+ if not parent_path:
+ parent_path = "/"
+ # Handle parent paths that don't start with /
+ elif not parent_path.startswith("/"):
+ parent_path = "/" + parent_path
- def __eq__(self, other):
- if isinstance(other, Directory):
- return self.path == other.path
- return False
+ # Create new instance
+ instance = super().__new__(cls)
+ instance.path = normalized_path
- def __truediv__(self, path: str):
- if isinstance(path, str):
- path = f"/{path}" if not path.startswith("/") else path
- path = path[:-1] if path.endswith("/") else path
- return Directory(self.path + path)
- raise ValueError("Unsupported division")
+ if _is_root_child:
+ instance._init_root_child(name)
+ else:
+ instance._init_regular(parent_path, name)
+
+ DIRECTORY_CACHE[normalized_path] = instance
+ return instance
+
+ @staticmethod
+ def _normalize_path(path: str) -> str:
+ """Normalizes the given path"""
+ path = f"/{path}" if not path.startswith("/") else path
+ return path.removesuffix("/")
+
+ @classmethod
+ def _get_root_directory(cls) -> Directory:
+ """Returns the root directory instance, creating it if necessary"""
+ if "/" not in DIRECTORY_CACHE:
+ root = super().__new__(cls)
+ root.parent = root
+ root.name = "/"
+ root.path = "/"
+ root.loaded = False
+ root.__content__ = {}
+ DIRECTORY_CACHE["/"] = root
+ return DIRECTORY_CACHE["/"]
+
+ def _init_root_child(self, name: str) -> None:
+ """Initializes a root child directory"""
+ self.parent = DIRECTORY_CACHE["/"]
+ self.name = name
+ self.loaded = False
+ self.__content__ = {}
+
+ def _init_regular(self, parent_path: str, name: str) -> None:
+ """Initializes a regular directory"""
+ self.parent = Directory(parent_path)
+ self.name = name
+ self.loaded = False
+ self.__content__ = {}
@property
- def content(self):
- """
- Returns a list of Files and Directories in the Directory, will load
- if needed
- """
+ def content(self) -> List[Union[Directory, File]]:
+ """Returns the content of the directory, loading it if necessary"""
if not self.loaded:
self.load()
return list(self.__content__.values())
- def load(self):
- """
- The content of a Directory must be explicitly loaded
- """
- self.__content__ |= load_path(self.path)
+ def load(self) -> Self:
+ """Loads the content of the directory and marks it as loaded"""
+ self.__content__ |= load_directory_content(self.path)
self.loaded = True
return self
@@ -348,83 +372,63 @@ def reload(self):
self.loaded = False
return self.load()
- def is_parent(self, other: Union[Self, File]) -> bool:
- """
- Checks if Directory or File is inside (or at any subdir) of self.
- """
- if self.path == "/":
- return True
-
- target = other
- while target.path != "/":
+ def __str__(self) -> str:
+ return self.path
- if self.path == target.path:
- return True
+ def __repr__(self) -> str:
+ return self.path
- if isinstance(other, File):
- # TODO: Implement parent logic on File (too much overhead)
- target = Directory(other.parent_path)
- else:
- target = target.parent
+ def __hash__(self):
+ return hash(self.path)
+ def __eq__(self, other):
+ if isinstance(other, Directory):
+ return self.path == other.path
return False
-CACHE["/"] = Directory("/")
-
-
-def load_path(path: str) -> Dict[str, Union[Directory, File]]:
- """
- This method is responsible for listing all the FTP directory's.
- Converts the items found within a valid DATASUS path into `File`s or
- Directories, returning its content.
- """
- path = str(path)
- content = {}
- ftp = FTP("ftp.datasus.gov.br")
+def load_directory_content(path: str) -> FileContent:
+ """Directory content loading"""
+ content: FileContent = {}
try:
- ftp.connect()
- ftp.login()
+ ftp = FTPSingleton.get_instance()
ftp.cwd(path)
+ path = path.removesuffix("/")
- def line_file_parser(file_line):
- info = {}
- if "" in file_line:
- date, time, _, *name = str(file_line).strip().split()
- info["size"] = 0
- info["type"] = "dir"
- name = " ".join(name)
+ def line_parser(line: str):
+ if "" in line:
+ date, time, _, name = line.strip().split(maxsplit=3)
modify = datetime.strptime(
- " ".join([date, time]), "%m-%d-%y %I:%M%p"
- )
- info["modify"] = modify
- xpath = (
- path + name if path.endswith("/") else path + "/" + name
+ f"{date} {time}", "%m-%d-%y %I:%M%p"
)
+ info = {"size": 0, "type": "dir", "modify": modify}
+ xpath = f"{path}/{name}"
content[name] = Directory(xpath)
else:
- date, time, size, name = str(file_line).strip().split()
- info["size"] = size
- info["type"] = "file"
+ date, time, size, name = line.strip().split(maxsplit=3)
modify = datetime.strptime(
- " ".join([date, time]), "%m-%d-%y %I:%M%p"
+ f"{date} {time}", "%m-%d-%y %I:%M%p"
)
- info["modify"] = modify
+ info: FileInfo = {
+ "size": size,
+ "type": "file",
+ "modify": modify,
+ }
content[name] = File(path, name, info)
- ftp.retrlines("LIST", line_file_parser)
+ ftp.retrlines("LIST", line_parser)
except Exception as exc:
raise exc
finally:
- ftp.close()
+ FTPSingleton.close()
- upper_names = [n.upper() for n in content]
- to_remove = []
- for name in content:
- if ".DBF" in name.upper():
- if name.upper().replace(".DBF", ".DBC") in upper_names:
- to_remove.append(name)
+ to_remove = [
+ name
+ for name in content
+ if name.upper().endswith(".DBF")
+ and name.upper().replace(".DBF", ".DBC") in content
+ ]
for name in to_remove:
del content[name]
@@ -489,31 +493,29 @@ def files(self) -> List[File]:
Lists Files inside content. To load a specific Directory inside
content, just `load()` this directory and list files again.
"""
- return list(filter(lambda f: isinstance(f, File), self.content))
+ return [f for f in self.content if isinstance(f, File)]
def load(
- self, directories: Optional[Union[Directory, List[Directory]]] = None
+ self,
+ directories: Optional[
+ Union[Directory, List[Directory], Tuple[Directory, ...]]
+ ] = None,
) -> Database:
"""
Loads specific directories to Database content. Will aggregate the
files found within Directories into Database.content.
"""
if not directories:
- directories = self.paths
-
- directories = to_list(directories)
+ directories = list(self.paths)
- for i, path in enumerate(directories):
- if isinstance(path, str):
- path = Directory(path)
- directories[i] = path
+ directories_list = to_list(directories)
- if not isinstance(path, Directory):
- raise ValueError("path must a valid DATASUS directory")
+ for directory in directories_list:
+ if not isinstance(directory, Directory):
+ raise ValueError("Invalid directory provided.")
- for directory in directories:
directory.load()
- self.__content__ |= directory.__content__
+ self.__content__.update(directory.__content__)
return self
def describe(self, file: File) -> dict:
diff --git a/pysus/online_data/ESUS.py b/pysus/online_data/ESUS.py
index dbe9008..ef2b990 100644
--- a/pysus/online_data/ESUS.py
+++ b/pysus/online_data/ESUS.py
@@ -21,7 +21,7 @@ def download(uf, cache=True, checkmemory=True):
today = date.today()
dt = today.strftime("_%d_%m_%Y")
base = f"desc-esus-notifica-estado-{uf}" # desc-notificacoes-esusve-
- url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br"
+ url = f"https://{user}:{pwd}@elasticsearch-saps.saude.gov.br" # noqa: E231
out = f"ESUS_{uf}_{dt}.parquet"
cachefile = os.path.join(CACHEPATH, out)
@@ -36,7 +36,7 @@ def download(uf, cache=True, checkmemory=True):
fname = fetch(base, uf, url)
size = os.stat(fname).st_size
if size > 50e6 and checkmemory:
- print(f"Downloaded data is to large:{size / 1e6} MB compressed.")
+ print(f"Downloaded data is to large: {size / 1e6} MB compressed.")
print(
"Only loading the first 1000 rows. If your computer has enough"
+ " memory, set 'checkmemory' to False"
diff --git a/pysus/online_data/IBGE.py b/pysus/online_data/IBGE.py
index 3771b2e..33fba90 100644
--- a/pysus/online_data/IBGE.py
+++ b/pysus/online_data/IBGE.py
@@ -1,6 +1,7 @@
"""
Helper functions to download official statistics from IBGE SIDRA
"""
+
import ssl # Builtin
from pathlib import Path
from tempfile import TemporaryDirectory
@@ -142,7 +143,7 @@ def get_sidra_table(
print(f"Requesting data from {url}")
try:
- with (get_legacy_session() as s, s.get(url) as response):
+ with get_legacy_session() as s, s.get(url) as response:
df = pd.DataFrame(response.json())
except HTTPError:
response = requests.get(url)
@@ -163,7 +164,7 @@ def list_agregados(**kwargs):
url += "&".join([f"{k}={v}" for k, v in kwargs.items()])
print(f"Fetching Data groupings from {url}")
try:
- with (get_legacy_session() as s, s.get(url) as response):
+ with get_legacy_session() as s, s.get(url) as response:
table = pd.DataFrame(response.json())
except requests.exceptions.SSLError as e:
print(f"Failed fetching aggregates: {e}")
@@ -183,7 +184,7 @@ def localidades_por_agregado(agregado: int, nivel: str):
"""
url = APIBASE + f"agregados/{agregado}/localidades/{nivel}"
try:
- with (get_legacy_session() as s, s.get(url) as response):
+ with get_legacy_session() as s, s.get(url) as response:
table = pd.DataFrame(response.json())
except Exception as e:
print(f"Could not download from {url}\n{e}")
@@ -199,7 +200,7 @@ def metadados(agregado: int):
"""
url = APIBASE + f"agregados/{agregado}/metadados"
try:
- with (get_legacy_session() as s, s.get(url) as response):
+ with get_legacy_session() as s, s.get(url) as response:
data = response.json()
except Exception as e:
print(f"Could not download from {url}\n{e}")
@@ -215,7 +216,7 @@ def lista_periodos(agregado: int):
"""
url = APIBASE + f"agregados/{agregado}/periodos"
try:
- with (get_legacy_session() as s, s.get(url) as response):
+ with get_legacy_session() as s, s.get(url) as response:
table = pd.DataFrame(response.json())
except Exception:
return None
@@ -309,10 +310,10 @@ def __init__(
def _fetch_JSON(self):
try:
print(f"Fetching {self.url}")
- with (get_legacy_session() as s, s.get(self.url) as response):
+ with get_legacy_session() as s, s.get(self.url) as response:
self.JSON = response.json()
except Exception as e:
- print(f"Couldn't download data:\n{e}")
+ print("Couldn't download data:", e, sep="\n")
def to_dataframe(self):
return pd.DataFrame(self.JSON)
@@ -389,7 +390,7 @@ def get_population(
opts = ["ALF", "ESCA", "ESCB", "IDOSO", "RENDA"]
if not censo_data or censo_data not in opts:
raise ValueError(
- f"Incorrect `censo_data` parameter. Options: {opts}"
+ f"Incorrect 'censo_data' parameter. Options: {opts}"
)
file = [f for f in files if censo_data in f.name][0].download()
else:
diff --git a/pysus/online_data/Infogripe.py b/pysus/online_data/Infogripe.py
index a576219..bd496c7 100644
--- a/pysus/online_data/Infogripe.py
+++ b/pysus/online_data/Infogripe.py
@@ -18,6 +18,6 @@ def list_datasets():
def download(dataset_name):
- url = BASEURL + DATASETS[dataset_name]
+ url = BASEURL + DATASETS[dataset_name] + "?inline=false"
df = pd.read_csv(url, delimiter=";", decimal=",")
return df
diff --git a/pysus/preprocessing/ESUS.py b/pysus/preprocessing/ESUS.py
index 5552f58..110215c 100644
--- a/pysus/preprocessing/ESUS.py
+++ b/pysus/preprocessing/ESUS.py
@@ -26,8 +26,8 @@ def cases_by_age_and_sex(UF, start="2020-03-01", end="2020-08-31"):
inplace=True,
)
print(
- f"Removed {old_size - len(df)} rows with missing dates of symptoms,"
- " notification or testing"
+ f"Removed {old_size - len(df)} rows with missing dates of symptoms, "
+ "notification or testing"
)
# Desconsiderando os resultados negativos ou inconclusivos
@@ -52,7 +52,9 @@ def cases_by_age_and_sex(UF, start="2020-03-01", end="2020-08-31"):
ini = np.arange(0, 81, 5)
fin = np.arange(5, 86, 5)
fin[-1] = 120
- faixa_etaria = {f"[{i},{f})": (i, f) for i, f in zip(ini, fin)}
+ faixa_etaria = {
+ f"[{i},{f})": (i, f) for i, f in zip(ini, fin) # noqa: E231
+ }
labels = list(faixa_etaria.keys())
df["faixa_etaria"] = [
diff --git a/pysus/tests/test_ftp.py b/pysus/tests/test_ftp.py
index 19e1e01..9f78dfd 100644
--- a/pysus/tests/test_ftp.py
+++ b/pysus/tests/test_ftp.py
@@ -3,7 +3,7 @@
import pandas as pd
from pysus.data.local import ParquetSet
-from pysus.ftp import CACHE, Database, Directory, File
+from pysus.ftp import DIRECTORY_CACHE, Database, Directory, File
from pysus.ftp.databases import (
ciha,
cnes,
@@ -64,7 +64,8 @@ def test_root_directory(self):
self.assertTrue(self.root.parent == self.root) # outermost parent
def test_directory_cache(self):
- self.assertTrue(CACHE["/"] == self.root)
+ self.assertTrue(DIRECTORY_CACHE["/"] == self.root)
+ self.assertTrue(DIRECTORY_CACHE["/"] == self.root)
def test_sinan_file(self):
file = Directory("/dissemin/publicos/SINAN/DADOS/FINAIS").content[0]