Skip to content

Commit

Permalink
Fix issues writing files_array to GCS (#3)
Browse files Browse the repository at this point in the history
Fix issues writing files_array to GCS

The immediate error that we saw was that `ReportDetails.report` was `None`. I believe that's because we were not passing the `current_report_row` when creating the `ReportDetails` (see `services/report/__init__.py` changes). We had a similar issue a couple lines down before. That case slipped by. To avoid further issues we will default to writing data to the DB if we can't access the repository from the `ReportDetails` object.

Then because my env does save reports to GCS I noticed differences between saving data in DB vs GCS. The difference is that the constituent parts of `files_array` in DB returned the classes they belong to (idk how), vs the encoded data that comes back from GCS (json format). In theory that is not an issue, because both formats are pretty much interchangeable, but the less differnces the better. SO I introduced a rehydrate function that wraps the `files_array` in the classes they are supposed to be.

Using the hydration function we can better standarize the data that is returned when accessing `ReportDetails.files_array`.
The difference was mostly within a Session when you set `files_array` in the DB you still use the data you had before.
Differently, when setting it to storage you always get back the encoded data, because it goes to storage immediatelly.

By standarizing the return we make sure to always use the appropriate inner classes, regardless of where the data comes from.
  • Loading branch information
giovanni-guidini authored Jul 13, 2023
1 parent 454d352 commit 0bf5dc2
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 43 deletions.
57 changes: 45 additions & 12 deletions database/models/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from functools import cached_property, lru_cache

from shared.config import get_config
from shared.reports.types import ReportTotals, SessionTotalsArray
from shared.storage.exceptions import FileNotInStorageError
from sqlalchemy import Column, ForeignKey, Table, types
from sqlalchemy.dialects import postgresql
Expand Down Expand Up @@ -127,16 +128,35 @@ class ReportDetails(CodecovBaseModel, MixinBaseClass):
"files_array_storage_path", types.Text, nullable=True
)

@classmethod
def rehydrate_encoded_data(self, json_files_array):
"""This ensures that we always use the files_array with the correct underlying classes.
No matter where the data comes from.
"""
return [
{
**v,
"file_totals": ReportTotals(*(v.get("file_totals", []))),
"session_totals": SessionTotalsArray.build_from_encoded_data(
v.get("session_totals")
),
"diff_totals": ReportTotals(*v["diff_totals"])
if v["diff_totals"]
else None,
}
for v in json_files_array
]

@lru_cache(maxsize=1)
def _get_files_array(self):
# Get files_array from the proper source
if self._files_array is not None:
return self._files_array
return ReportDetails.rehydrate_encoded_data(self._files_array)
repository = self.report.commit.repository
archive_service = ArchiveService(repository=repository)
try:
file_str = archive_service.read_file(self._files_array_storage_path)
return json.loads(file_str)
return ReportDetails.rehydrate_encoded_data(json.loads(file_str))
except FileNotInStorageError:
log.error(
"files_array not in storage",
Expand All @@ -151,23 +171,36 @@ def _get_files_array(self):
return []

def _should_write_to_storage(self):
# Safety check to see if the path to repository is valid
# Because we had issues around this before
if (
self.report is None
or self.report.commit is None
or self.report.commit.repository is None
or self.report.commit.repository.slug is None
):
return False
report_builder_repo_ids = get_config(
"setup", "save_report_data_in_storage", "repo_ids", default=[]
)
return get_config(
master_write_switch = get_config(
"setup",
"save_report_data_in_storage",
"report_details_files_array",
default=False,
) and (
not get_config(
"setup",
"save_report_data_in_storage",
"only_codecov",
default=True,
)
or self.report.commit.repository.slug.startswith("codecov/")
or self.report.commit.repository.repoid in report_builder_repo_ids
)
only_codecov = get_config(
"setup",
"save_report_data_in_storage",
"only_codecov",
default=True,
)
is_codecov_repo = self.report.commit.repository.slug.startswith("codecov/")
is_repo_allowed = (
self.report.commit.repository.repoid in report_builder_repo_ids
)
return master_write_switch and (
not only_codecov or is_codecov_repo or is_repo_allowed
)

def _set_files_array(self, files_array: dict):
Expand Down
103 changes: 92 additions & 11 deletions database/tests/unit/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from unittest.mock import call

import pytest
from shared.reports.types import ReportTotals, SessionTotalsArray
from shared.storage.exceptions import FileNotInStorageError
from shared.utils.ReportEncoder import ReportEncoder

Expand Down Expand Up @@ -134,25 +135,95 @@ class TestReportDetailsModel(object):
{
"filename": "file_1.go",
"file_index": 0,
"file_totals": [0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2, 0],
"session_totals": {
"0": [0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2],
"meta": {"session_count": 1},
},
"file_totals": ReportTotals(
*[0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2, 0]
),
"session_totals": SessionTotalsArray.build_from_encoded_data(
{
"0": [0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2],
"meta": {"session_count": 1},
}
),
"diff_totals": None,
},
{
"filename": "file_2.py",
"file_index": 1,
"file_totals": [0, 2, 1, 0, 1, "50.00000", 1, 0, 0, 0, 0, 0, 0],
"session_totals": {
"0": [0, 2, 1, 0, 1, "50.00000", 1],
"meta": {"session_count": 1},
},
"file_totals": ReportTotals(
*[0, 2, 1, 0, 1, "50.00000", 1, 0, 0, 0, 0, 0, 0]
),
"session_totals": SessionTotalsArray.build_from_encoded_data(
{
"0": [0, 2, 1, 0, 1, "50.00000", 1],
"meta": {"session_count": 1},
}
),
"diff_totals": None,
},
]

def test_rehydrate_already_hydrated(self):
fully_encoded_sample = [
{
"filename": "file_1.go",
"file_index": 0,
"file_totals": [0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2, 0],
"session_totals": {
"0": [0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2],
"meta": {"session_count": 1},
},
"diff_totals": None,
},
{
"filename": "file_2.py",
"file_index": 1,
"file_totals": [0, 2, 1, 0, 1, "50.00000", 1, 0, 0, 0, 0, 0, 0],
"session_totals": {
"0": [0, 2, 1, 0, 1, "50.00000", 1],
"meta": {"session_count": 1},
},
"diff_totals": None,
},
]
half_encoded_sample = [
{
"filename": "file_1.go",
"file_index": 0,
"file_totals": ReportTotals(
*[0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2, 0]
),
"session_totals": {
"0": [0, 8, 5, 3, 0, "62.50000", 0, 0, 0, 0, 10, 2],
"meta": {"session_count": 1},
},
"diff_totals": None,
},
{
"filename": "file_2.py",
"file_index": 1,
"file_totals": ReportTotals(
*[0, 2, 1, 0, 1, "50.00000", 1, 0, 0, 0, 0, 0, 0]
),
"session_totals": {
"0": [0, 2, 1, 0, 1, "50.00000", 1],
"meta": {"session_count": 1},
},
"diff_totals": None,
},
]
assert (
ReportDetails.rehydrate_encoded_data(self.sample_files_array)
== self.sample_files_array
)
assert (
ReportDetails.rehydrate_encoded_data(fully_encoded_sample)
== self.sample_files_array
)
assert (
ReportDetails.rehydrate_encoded_data(half_encoded_sample)
== self.sample_files_array
)

def test_get_files_array_from_db(self, dbsession, mocker):
factory_report_details: ReportDetails = ReportDetailsFactory()
factory_report_details._files_array = self.sample_files_array
Expand Down Expand Up @@ -263,7 +334,17 @@ def test__should_write_to_storage(self, dbsession, mocker, mock_configuration):
assert allowlisted_repo._should_write_to_storage() == False
assert codecov_report_details._should_write_to_storage() == False

def test_set_files_array_to_db(self, dbsession, mocker):
def test_set_files_array_to_db(self, dbsession, mocker, mock_configuration):
mock_configuration.set_params(
{
"setup": {
"save_report_data_in_storage": {
"only_codecov": False,
"report_details_files_array": False,
},
}
}
)
mock_archive_service = mocker.patch("database.models.reports.ArchiveService")

factory_report_details: ReportDetails = ReportDetailsFactory()
Expand Down
4 changes: 3 additions & 1 deletion services/report/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@ async def initialize_and_save_report(
)
if report_details is None:
report_details = ReportDetails(
report_id=current_report_row.id_, files_array=[]
report_id=current_report_row.id_,
_files_array=[],
report=current_report_row,
)
db_session.add(report_details)
db_session.flush()
Expand Down
Loading

0 comments on commit 0bf5dc2

Please sign in to comment.