Skip to content

Commit

Permalink
generate-attribution: Try to extract contact information
Browse files Browse the repository at this point in the history
The idea of having an automated data issue reporting form came up during
38C3.

The extracted list of contact addresses should be consumed as follows:
1. Use publisher address if it exists. The agency might be unaware of
   the feeds existence, and writing to them will not help.
2. Pick the agency with the matching agency_id if it exists.
3. Use a contact address with type "attribution". It should be manually
   checked whether this is applicable.
  • Loading branch information
jbruechert committed Jan 3, 2025
1 parent 47dcb7a commit 8d7c6f2
Showing 1 changed file with 40 additions and 6 deletions.
46 changes: 40 additions & 6 deletions src/generate-attribution.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,9 @@ def http_source_attribution(source: HttpSource) -> Optional[dict]:
print(f"Info: {feed_path} does not exist, skipping…")
return None

contacts: list[dict] = []

with ZipFile(feed_path) as z:
with z.open("agency.txt", "r") as a:
with io.TextIOWrapper(a) as at:
agencyreader = csv.DictReader(at, delimiter=",", quotechar='"')
for row in agencyreader:
attribution["operators"].append(row["agency_name"])
if "feed_info.txt" in z.namelist():
with z.open("feed_info.txt", "r") as i:
with io.TextIOWrapper(i) as it:
Expand All @@ -72,6 +69,31 @@ def http_source_attribution(source: HttpSource) -> Optional[dict]:
attribution["publisher"] = {}
attribution["publisher"]["name"] = publisher["feed_publisher_name"]
attribution["publisher"]["url"] = publisher["feed_publisher_url"]

contact = {
"type": "publisher",
"name": publisher["feed_publisher_name"],
"email": publisher.get("feed_contact_email"),
"url": publisher.get("feed_contact_url")
}

contacts.append(contact)

with z.open("agency.txt", "r") as a:
with io.TextIOWrapper(a) as at:
agencyreader = list(csv.DictReader(at, delimiter=",", quotechar='"'))

attribution["operators"] = \
filter_duplicates(map(lambda agency: agency["agency_name"],
agencyreader))

contacts += map(lambda agency: {
"type": "agency",
"agency_id": agency.get("agency_id"),
"name": agency["agency_name"],
"email": agency.get("agency_email")
}, agencyreader)

if "attributions.txt" in z.namelist():
with z.open("attributions.txt", "r") as a:
with io.TextIOWrapper(a) as at:
Expand All @@ -80,12 +102,24 @@ def http_source_attribution(source: HttpSource) -> Optional[dict]:
map(
lambda contrib: {
"name": contrib["organization_name"],
"url": contrib.get("attribution_url"),
"url": contrib.get("attribution_url")
},
attributionstxt,
)
)

attribution_contacts = map(lambda operator: {
"type": "attribution",
"name": operator["organization_name"],
"email": operator.get("attribution_email")
}, attributionstxt)

contacts += attribution_contacts

attribution["contacts"] = \
list(filter(lambda c: c.get("email") or c.get("url"),
contacts))

if (
"operators" in attribution
and len(attribution["operators"]) == 1
Expand Down

0 comments on commit 8d7c6f2

Please sign in to comment.