From cca8e85e4c2463a183b7a861ce50ff989e14da1c Mon Sep 17 00:00:00 2001 From: Erik Parmann Date: Fri, 8 Mar 2019 10:48:51 +0100 Subject: [PATCH] Fast quit IROC reader on empty tag-list Earlier the IROC reader started crawling the whole file-system looking for no tags. This closes #149. --- gordo_components/data_provider/iroc_reader.py | 8 +++++++ gordo_components/data_provider/ncs_reader.py | 4 ++++ gordo_components/data_provider/providers.py | 15 ++++++++---- tests/test_data_provider_iroc.py | 24 +++++++++++++++++++ 4 files changed, 47 insertions(+), 4 deletions(-) diff --git a/gordo_components/data_provider/iroc_reader.py b/gordo_components/data_provider/iroc_reader.py index 98223bfd5..98cc6b098 100644 --- a/gordo_components/data_provider/iroc_reader.py +++ b/gordo_components/data_provider/iroc_reader.py @@ -40,6 +40,14 @@ def load_dataframes( See GordoBaseDataProvider for documentation """ + if not tag_list: + logger.warning("Iroc reader called with empty tag_list, returning none") + return + if to_ts < from_ts: + raise ValueError( + f"Iroc reader called with to_ts: {to_ts} before from_ts: {from_ts}" + ) + base_path = base_path.strip("/") # We query with an extra day on both sides since the way the files are diff --git a/gordo_components/data_provider/ncs_reader.py b/gordo_components/data_provider/ncs_reader.py index 19447d467..780464609 100644 --- a/gordo_components/data_provider/ncs_reader.py +++ b/gordo_components/data_provider/ncs_reader.py @@ -55,6 +55,10 @@ def load_dataframes( """ See GordoBaseDataProvider for documentation """ + if to_ts < from_ts: + raise ValueError( + f"NCS reader called with to_ts: {to_ts} before from_ts: {from_ts}" + ) adls_file_system_client = self.client years = range(from_ts.year, to_ts.year + 1) diff --git a/gordo_components/data_provider/providers.py b/gordo_components/data_provider/providers.py index 305011051..558f734ad 100644 --- a/gordo_components/data_provider/providers.py +++ b/gordo_components/data_provider/providers.py @@ -45,6 +45,7 @@ def load_dataframes_from_multiple_providers( for tag_reader in data_providers: if tag_reader.can_handle_tag(tag): readers_to_tags[tag_reader].append(tag) + logger.info(f"Assigning tag: {tag} to reader {tag_reader}") # In case of a tag matching two readers, we let the "first" # one handle it break @@ -52,10 +53,12 @@ def load_dataframes_from_multiple_providers( else: raise ValueError(f"Found no data providers able to download the tag {tag}") for tag_reader, readers_tags in readers_to_tags.items(): - for df in tag_reader.load_dataframes( - from_ts=from_ts, to_ts=to_ts, tag_list=readers_tags - ): - yield df + if readers_tags: + logger.info(f"Using tag reader {tag_reader} to fetch tags {readers_tags}") + for df in tag_reader.load_dataframes( + from_ts=from_ts, to_ts=to_ts, tag_list=readers_tags + ): + yield df class DataLakeProvider(GordoBaseDataProvider): @@ -113,6 +116,10 @@ def load_dataframes( """ # We create them here so we only try to get a auth-token once we actually need # it, otherwise we would have constructed them in the constructor. + if to_ts < from_ts: + raise ValueError( + f"DataLakeReader called with to_ts: {to_ts} before from_ts: {from_ts}" + ) data_providers = self._get_sub_dataproviders() yield from load_dataframes_from_multiple_providers( diff --git a/tests/test_data_provider_iroc.py b/tests/test_data_provider_iroc.py index bb957a01c..d3a4f0fdd 100644 --- a/tests/test_data_provider_iroc.py +++ b/tests/test_data_provider_iroc.py @@ -92,6 +92,30 @@ def test_load_dataframes_no_data(self, _mocked_method): ) ) + def test_load_dataframes_no_tag_list(self): + """load_dataframe will return an empty generator when called with no tags""" + iroc_reader = IrocReader(client=None, threads=1) + res = list( + iroc_reader.load_dataframes( + from_ts=isoparse("2018-05-02T01:56:00+00:00"), + to_ts=isoparse("2018-05-03T01:56:00+00:00"), + tag_list=[], + ) + ) + self.assertEqual([], res) + + def test_load_dataframes_checks_date(self): + """load_dataframe will raise ValueError if to_ts