Skip to content

Commit

Permalink
Fast quit IROC reader on empty tag-list
Browse files Browse the repository at this point in the history
Earlier the IROC reader started crawling the whole file-system
looking for no tags.

This closes #149.
  • Loading branch information
Erik Parmann authored and epa095 committed Mar 8, 2019
1 parent fbaadfe commit cca8e85
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 4 deletions.
8 changes: 8 additions & 0 deletions gordo_components/data_provider/iroc_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ def load_dataframes(
See GordoBaseDataProvider for documentation
"""

if not tag_list:
logger.warning("Iroc reader called with empty tag_list, returning none")
return
if to_ts < from_ts:
raise ValueError(
f"Iroc reader called with to_ts: {to_ts} before from_ts: {from_ts}"
)

base_path = base_path.strip("/")

# We query with an extra day on both sides since the way the files are
Expand Down
4 changes: 4 additions & 0 deletions gordo_components/data_provider/ncs_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ def load_dataframes(
"""
See GordoBaseDataProvider for documentation
"""
if to_ts < from_ts:
raise ValueError(
f"NCS reader called with to_ts: {to_ts} before from_ts: {from_ts}"
)
adls_file_system_client = self.client

years = range(from_ts.year, to_ts.year + 1)
Expand Down
15 changes: 11 additions & 4 deletions gordo_components/data_provider/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,20 @@ def load_dataframes_from_multiple_providers(
for tag_reader in data_providers:
if tag_reader.can_handle_tag(tag):
readers_to_tags[tag_reader].append(tag)
logger.info(f"Assigning tag: {tag} to reader {tag_reader}")
# In case of a tag matching two readers, we let the "first"
# one handle it
break
# The else branch is executed if the break is not called
else:
raise ValueError(f"Found no data providers able to download the tag {tag}")
for tag_reader, readers_tags in readers_to_tags.items():
for df in tag_reader.load_dataframes(
from_ts=from_ts, to_ts=to_ts, tag_list=readers_tags
):
yield df
if readers_tags:
logger.info(f"Using tag reader {tag_reader} to fetch tags {readers_tags}")
for df in tag_reader.load_dataframes(
from_ts=from_ts, to_ts=to_ts, tag_list=readers_tags
):
yield df


class DataLakeProvider(GordoBaseDataProvider):
Expand Down Expand Up @@ -113,6 +116,10 @@ def load_dataframes(
"""
# We create them here so we only try to get a auth-token once we actually need
# it, otherwise we would have constructed them in the constructor.
if to_ts < from_ts:
raise ValueError(
f"DataLakeReader called with to_ts: {to_ts} before from_ts: {from_ts}"
)
data_providers = self._get_sub_dataproviders()

yield from load_dataframes_from_multiple_providers(
Expand Down
24 changes: 24 additions & 0 deletions tests/test_data_provider_iroc.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,30 @@ def test_load_dataframes_no_data(self, _mocked_method):
)
)

def test_load_dataframes_no_tag_list(self):
"""load_dataframe will return an empty generator when called with no tags"""
iroc_reader = IrocReader(client=None, threads=1)
res = list(
iroc_reader.load_dataframes(
from_ts=isoparse("2018-05-02T01:56:00+00:00"),
to_ts=isoparse("2018-05-03T01:56:00+00:00"),
tag_list=[],
)
)
self.assertEqual([], res)

def test_load_dataframes_checks_date(self):
"""load_dataframe will raise ValueError if to_ts<from_ts"""
iroc_reader = IrocReader(client=None, threads=1)
with self.assertRaises(ValueError):
list(
iroc_reader.load_dataframes(
from_ts=isoparse("2018-05-03T01:56:00+00:00"),
to_ts=isoparse("2018-05-02T01:56:00+00:00"),
tag_list=["jalla"], # Not a tag in the input
)
)

@mock.patch.object(
IrocReader,
"_fetch_all_iroc_files_from_paths",
Expand Down

0 comments on commit cca8e85

Please sign in to comment.