Skip to content

Commit

Permalink
Updated naming convention to include date + days reported, rather tha…
Browse files Browse the repository at this point in the history
…n week number
  • Loading branch information
tfnribeiro committed Jul 10, 2024
1 parent f4663a3 commit 8ecfd48
Showing 1 changed file with 23 additions and 17 deletions.
40 changes: 23 additions & 17 deletions tools/report_generator/generate_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def get_total_reject_article_reason_table(total_rejected_article_reasons):


def generate_feed_count_plots(feed_df, lang):
filename = f"feed_downloaded_articles_{lang}_w_{CURRENT_WEEK_N}.png"
filename = f"feed_downloaded_articles_{lang}_{date_str}_d{DAYS_FOR_REPORT}.png"
if feed_df[feed_df["Language"] == lang].Count.sum() == 0:
return ""
plt.figure(lang)
Expand All @@ -117,7 +117,7 @@ def generate_feed_count_plots(feed_df, lang):


def generate_bookmarks_by_language_plot(boomark_df):
filename = f"bookmarks_plot_w_{CURRENT_WEEK_N}.png"
filename = f"bookmarks_plot_{date_str}_d{DAYS_FOR_REPORT}.png"
bookmark_plot = (
boomark_df.groupby(["Language", "Has Exercised"])[["user_id"]]
.count()
Expand All @@ -133,7 +133,7 @@ def generate_bookmarks_by_language_plot(boomark_df):
def generate_topic_by_feed_plot(article_topic_df, lang):
# If I want to make topics consistant
# https://stackoverflow.com/questions/39000115/how-can-i-set-the-colors-per-value-when-coloring-plots-by-a-dataframe-column
filename = f"topics_per_feed_lang_{lang}_w_{CURRENT_WEEK_N}.png"
filename = f"topics_per_feed_lang_{lang}_{date_str}_d{DAYS_FOR_REPORT}.png"
topic_monitor = (
article_topic_df.groupby(["Language", "Feed Name"])
.Topic.value_counts()
Expand All @@ -159,7 +159,7 @@ def generate_topic_by_feed_plot(article_topic_df, lang):


def generate_topic_coverage_plot(article_df, article_with_topics_df):
filename = f"topic_coverage_plot_w_{CURRENT_WEEK_N}.png"
filename = f"topic_coverage_plot_{date_str}_d{DAYS_FOR_REPORT}.png"
article_df["has_topic"] = "No"
article_df.loc[article_df.id.isin(article_with_topics_df.id), "has_topic"] = "Yes"
articles_with_topics = (
Expand All @@ -180,7 +180,7 @@ def generate_topic_coverage_plot(article_df, article_with_topics_df):


def generate_total_article_per_language(article_df):
filename = f"total_articles_downloaded_w_{CURRENT_WEEK_N}.png"
filename = f"total_articles_downloaded_{date_str}_d{DAYS_FOR_REPORT}.png"
data = article_df["Language"].value_counts().reset_index()
sns.barplot(
x="Language",
Expand All @@ -197,9 +197,9 @@ def generate_total_article_per_language(article_df):

def generate_histogram(article_df, column, bins=20, remove_outliers=False):
filename = (
f"hist_{column}_removed_out_w_{CURRENT_WEEK_N}.png"
f"hist_{column}_removed_out_{date_str}_d{DAYS_FOR_REPORT}.png"
if remove_outliers
else f"hist_{column}_w_{CURRENT_WEEK_N}.png"
else f"hist_{column}_{date_str}_d{DAYS_FOR_REPORT}.png"
)
if remove_outliers:
article_df[article_df[column] < article_df[column].quantile(0.99)].groupby(
Expand All @@ -214,9 +214,9 @@ def generate_histogram(article_df, column, bins=20, remove_outliers=False):

def generate_user_reading_time(user_reading_time_df, lang=""):
filename = (
f"user_reading_time_plot_all_lang_w_{CURRENT_WEEK_N}.png"
f"user_reading_time_plot_all_lang_{date_str}_d{DAYS_FOR_REPORT}.png"
if lang == ""
else f"user_reading_time_plot_{lang}_w_{CURRENT_WEEK_N}.png"
else f"user_reading_time_plot_{lang}_{date_str}_d{DAYS_FOR_REPORT}.png"
)
plot_total_reading_time = (
user_reading_time_df.groupby(["Language", "Feed Name"])
Expand Down Expand Up @@ -247,9 +247,9 @@ def generate_user_reading_time(user_reading_time_df, lang=""):

def generate_unique_articles_read_plot(user_reading_time_df, lang=""):
filename = (
f"user_unique_articles_read_plot_all_lang_w_{CURRENT_WEEK_N}.png"
f"user_unique_articles_read_plot_all_lang_{date_str}_d{DAYS_FOR_REPORT}.png"
if lang == ""
else f"user_unique_articles_read_plot_{lang}_w_{CURRENT_WEEK_N}.png"
else f"user_unique_articles_read_plot_{lang}_{date_str}_d{DAYS_FOR_REPORT}.png"
)

if lang == "":
Expand Down Expand Up @@ -287,9 +287,9 @@ def generate_unique_articles_read_plot(user_reading_time_df, lang=""):

def generate_topic_reading_time(topic_reading_time_df, lang=""):
filename = (
f"topic_reading_time_plot_all_lang_w_{CURRENT_WEEK_N}.png"
f"topic_reading_time_plot_all_lang_{date_str}_d{DAYS_FOR_REPORT}.png"
if lang == ""
else f"topic_reading_time_plot_{lang}_w_{CURRENT_WEEK_N}.png"
else f"topic_reading_time_plot_{lang}_{date_str}_d{DAYS_FOR_REPORT}.png"
)
plot_total_reading_time = (
topic_reading_time_df.groupby(["Language", "Topic"])
Expand Down Expand Up @@ -322,9 +322,9 @@ def generate_topic_reading_time(topic_reading_time_df, lang=""):

def generate_exercise_activity(exercise_activity_df, lang=""):
filename = (
f"exercise_activity_plot_all_lang_w_{CURRENT_WEEK_N}.png"
f"exercise_activity_plot_all_lang_{date_str}_d{DAYS_FOR_REPORT}.png"
if lang == ""
else f"exercise_activity_plot_{lang}_w_{CURRENT_WEEK_N}.png"
else f"exercise_activity_plot_{lang}_{date_str}_d{DAYS_FOR_REPORT}.png"
)
ax = plt.subplot(111)
if lang == "":
Expand Down Expand Up @@ -580,7 +580,10 @@ def generate_html_page():
</body>
"""
with open(
os.path.join(FOLDER_FOR_REPORT_OUTPUT, f"report_week_nr_{CURRENT_WEEK_N}.html"),
os.path.join(
FOLDER_FOR_REPORT_OUTPUT,
f"report_zeeguu_{date_str}_d{DAYS_FOR_REPORT}.html",
),
"w",
encoding="UTF-8",
) as f:
Expand Down Expand Up @@ -610,7 +613,10 @@ def generate_html_page():

app = create_app()
sns.set_theme("paper", "whitegrid")
CURRENT_WEEK_N = datetime.datetime.now().isocalendar()[1]
cur_time = datetime.datetime.now()
CURRENT_WEEK_N = max(cur_time.isocalendar()[1] - 1, 1)
date_str = cur_time.strftime("%Y_%m_%d")

DB_URI = app.config["SQLALCHEMY_DATABASE_URI"]
# rcParams["figure.figsize"] = 10, 8
db_connection = create_engine(
Expand Down

0 comments on commit 8ecfd48

Please sign in to comment.