Skip to content

Commit

Permalink
Refactor query a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
smolnar committed May 27, 2024
1 parent 898a1ef commit 310f3b2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 13 deletions.
4 changes: 2 additions & 2 deletions ml/decree-embeddings/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def base_embed_decrees(decrees):
f"Vectorized [{len(vectorizer.get_feature_names_out())}] features in [{vectorizer_fit_time_in_ms:.2f}ms]"
)

logger.debug("Vectorized features:")
logger.debug(vectorizer.get_feature_names_out().tolist())
logger.debug("Sample of Vectorized Features Names:")
logger.debug(vectorizer.get_feature_names_out())

for i, decree in enumerate(data):
decree["vector"] = [int(decree["year"]) or 0] + vectors[i].toarray()[0]
Expand Down
27 changes: 16 additions & 11 deletions ml/decree-embeddings/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,21 @@ def decrees(include_text=True, batch_size=1000):
cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
last_id = 0

text_query = (
"""
,ARRAY_TO_STRING(
(
SELECT ARRAY_AGG(decree_pages.text ORDER BY decree_pages.number ASC) FROM decree_pages
WHERE decree_pages.decree_id = decrees.id
GROUP BY decree_pages.decree_id
),
''
) AS text
"""
if include_text
else ""
)

while True:
start_time = time()

Expand All @@ -34,17 +49,7 @@ def decrees(include_text=True, batch_size=1000):
ARRAY_AGG(DISTINCT legislation_areas.value) FILTER (WHERE legislation_areas.value IS NOT NULL) AS areas,
ARRAY_AGG(DISTINCT legislation_subareas.value) FILTER (WHERE legislation_subareas.value IS NOT NULL) AS subareas,
ARRAY_AGG(DISTINCT legislations.value) FILTER (WHERE legislations.value IS NOT NULL) AS legislations
{
"""
,ARRAY_TO_STRING(
(
SELECT ARRAY_AGG(decree_pages.text ORDER BY decree_pages.number ASC) FROM decree_pages
WHERE decree_pages.decree_id = decrees.id
GROUP BY decree_pages.decree_id
),
''
) AS text
""" if include_text else ""}
{text_query}
FROM decrees
LEFT OUTER JOIN decree_forms ON decree_forms.id = decrees.decree_form_id
Expand Down

0 comments on commit 310f3b2

Please sign in to comment.