Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into upgrade-elastic-s…
Browse files Browse the repository at this point in the history
…earch
  • Loading branch information
tfnribeiro committed Oct 7, 2024
2 parents 5df374a + 78bc683 commit 259a8c2
Show file tree
Hide file tree
Showing 93 changed files with 280,807 additions and 397 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,5 @@ lu-mir-zeeguu-credentials.json

zenv*
tools/_playground.py
semanticEmbApi/app/semantic_vector/binaries/distiluse-base-multilingual-cased-v2/

!zeeguu/core/word_filter/data/
1 change: 0 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"workbench.colorTheme": "Quiet Light",
"python.testing.pytestArgs": ["zeeguu"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
Expand Down
4 changes: 4 additions & 0 deletions CODING-GUIDELINES.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Coding Conventions for the Zeeguu API

## SQLAlchemy Related
- [Adding altered objects to sessions without committing](https://github.com/zeeguu/api/discussions/210)
24 changes: 24 additions & 0 deletions archlens.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,30 @@
}
],
"ignorePackages": []
},
"inside-api": {
"packages": [
{
"path": "api",
"depth": 1
}
],
"ignorePackages": [
"*test*",
"core"
]
},
"inside-core": {
"packages": [
{
"path": "core",
"depth": 1
}
],
"ignorePackages": [
"*test*",
"core"
]
}
}
}
6 changes: 4 additions & 2 deletions default_docker.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ SECRET_KEY="lalalal"

## The SMTP config information below is used for
## sending password reset requests.
SMTP_EMAIL=''
INVITATION_CODES=['test']
SMTP_EMAIL = ''

INVITATION_CODES=['test', 'zeeguu-preview']

SEND_NOTIFICATION_EMAILS=False
ZEEGUU_DATA_FOLDER="/Users/zeeguu/data"
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ services:
ZEEGUU_CONFIG: /Zeeguu-API/default_docker.cfg
ZEEGUU_ES_CONN_STRING: "elasticsearch:9200"
PYTHONUNBUFFERED: 1
DEV_SKIP_TRANSLATION: 1
MICROSOFT_TRANSLATE_API_KEY: ${MICROSOFT_TRANSLATE_API_KEY}
GOOGLE_TRANSLATE_API_KEY: ${GOOGLE_TRANSLATE_API_KEY}

Expand Down
1 change: 1 addition & 0 deletions env_var_defs_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@

os.environ["GOOGLE_TRANSLATE_API_KEY"] = ""
os.environ["MICROSOFT_TRANSLATE_API_KEY"] = ""
os.environ["DEV_SKIP_TRANSLATION"] = 0
os.environ["WORDNIK_API_KEY"] = ""
7 changes: 2 additions & 5 deletions tools/_playground.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from zeeguu.core.content_retriever.parse_with_readability_server import (
download_and_parse,
)
from zeeguu.core.content_retriever.parse_with_readability_server import download_and_parse

from zeeguu.api.app import create_app

app = create_app()
app.app_context().push()

na = download_and_parse(
"https://www.dr.dk/stories/1288510966/allerede-inden-oscar-showets-start-lurer-en-ny-skandale-i-kulissen"
)
"https://www.dr.dk/nyheder/indland/flere-laeger-uden-koebenhavn-kronikerpakker-og-kaempe-region-her-er-det-vigtigste-i")
print(na)
5 changes: 5 additions & 0 deletions tools/delete_dev_users.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,17 @@
db_session = zeeguu.core.model.db.session

# delete all anonymous users
anon_users = 0
for user in User.find_all():
if "anon.zeeguu" in user.email:
delete_user_account(db_session, user)
anon_users += 1

dev_users = 0
for user in User.query.filter_by(is_dev=True):
print("deleting ... " + user.name)
delete_user_account(db_session, user)
dev_users += 1

print("Deleted: anon=" + str(anon_users) + ", dev=" + str(dev_users))
print("Remaining users: " + str(len(User.find_all())))
27 changes: 27 additions & 0 deletions tools/migrations/24-08-30--user_cohort_map.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
CREATE TABLE `zeeguu_test`.`user_cohort_map` (
`user_id` INT NOT NULL,
`cohort_id` INT NOT NULL,
PRIMARY KEY (`user_id`, `cohort_id`),
INDEX `cohort_id_ibfk_1_idx` (`user_id` ASC) VISIBLE,
CONSTRAINT `user_cohort_map_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `zeeguu_test`.`user` (`id`) ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `user_cohort_map_ibfk_2` FOREIGN KEY (`cohort_id`) REFERENCES `zeeguu_test`.`cohort` (`id`) ON DELETE NO ACTION ON UPDATE NO ACTION
);

INSERT INTO
user_cohort_map (user_id, cohort_id)
SELECT
id,
cohort_id
from
user
WHERE
cohort_id is not null;

ALTER TABLE
`zeeguu_test`.`user` DROP FOREIGN KEY `user_ibfk_3`;

ALTER TABLE
`zeeguu_test`.`user` DROP COLUMN `cohort_id`,
DROP INDEX `cohort_id`;

;
56 changes: 56 additions & 0 deletions tools/migrations/24-09-24--add_feeback_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
CREATE TABLE `zeeguu_test`.`feedback_component` (
`id` INT NOT NULL AUTO_INCREMENT,
`component_type` VARCHAR(45) NOT NULL,
PRIMARY KEY (`id`)
);

INSERT INTO
`zeeguu_test`.`feedback_component` (`component_type`)
VALUES
('Article Reader');

INSERT INTO
`zeeguu_test`.`feedback_component` (`component_type`)
VALUES
('Article Recommendations');

INSERT INTO
`zeeguu_test`.`feedback_component` (`component_type`)
VALUES
('Translation');

INSERT INTO
`zeeguu_test`.`feedback_component` (`component_type`)
VALUES
('Sound');

INSERT INTO
`zeeguu_test`.`feedback_component` (`component_type`)
VALUES
('Exercises');

INSERT INTO
`zeeguu_test`.`feedback_component` (`component_type`)
VALUES
('Extension');

INSERT INTO
`zeeguu_test`.`feedback_component` (`component_type`)
VALUES
('Other');

CREATE TABLE `zeeguu_test`.`user_feedback` (
`id` INT NOT NULL AUTO_INCREMENT,
`user_id` INT NOT NULL,
`feedback_component_id` INT NOT NULL,
`message` VARCHAR(512) NULL,
`report_time` DATETIME NULL,
`url_id` INT NULL,
PRIMARY KEY (`id`),
INDEX `user_feedback_ibfk_1_idx` (`user_id` ASC),
INDEX `user_feedback_ibfk_2_idx` (`feedback_component_id` ASC),
INDEX `user_feedback_ibfk_2_idx1` (`url_id` ASC),
CONSTRAINT `user_feedback_ibfk_1` FOREIGN KEY (`user_id`) REFERENCES `zeeguu_test`.`user` (`id`) ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `user_feedback_ibfk_2` FOREIGN KEY (`feedback_component_id`) REFERENCES `zeeguu_test`.`feedback_component` (`id`) ON DELETE NO ACTION ON UPDATE NO ACTION,
CONSTRAINT `user_feedback_ibfk_3` FOREIGN KEY (`url_id`) REFERENCES `zeeguu_test`.`url` (`id`) ON DELETE NO ACTION ON UPDATE NO ACTION
);
30 changes: 30 additions & 0 deletions tools/migrations/24-09-24--adding-language-to-search.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
ALTER TABLE
`zeeguu_test`.`search`
ADD
COLUMN `language_id` INT NULL
AFTER
`id`,
CHANGE COLUMN `keywords` `keywords` VARCHAR(100) NULL DEFAULT NULL,
ADD
INDEX `search_ibfk_1_idx` (`language_id` ASC) VISIBLE;

;

ALTER TABLE
`zeeguu_test`.`search`
ADD
CONSTRAINT `search_ibfk_1` FOREIGN KEY (`language_id`) REFERENCES `zeeguu_test`.`language` (`id`) ON DELETE NO ACTION ON UPDATE NO ACTION;

UPDATE
`zeeguu_test`.`search` s
INNER JOIN `zeeguu_test`.`search_subscription` ssub ON s.id = ssub.search_id
INNER JOIN `zeeguu_test`.`user` u on u.id = ssub.user_id
SET
s.language_id = u.learned_language_id;

UPDATE
`zeeguu_test`.`search` s
INNER JOIN `zeeguu_test`.`search_filter` ssub ON s.id = ssub.search_id
INNER JOIN `zeeguu_test`.`user` u on u.id = ssub.user_id
SET
s.language_id = u.learned_language_id;
6 changes: 6 additions & 0 deletions tools/mysql_to_elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ def main(starting_index):

max_id = session.query(func.max(Article.id)).first()[0]
min_id = session.query(func.min(Article.id)).first()[0]

if max_id is None:
max_id = 0
if min_id is None:
min_id = 0

print(f"starting import at: {starting_index}")
print(f"max id in db: {max_id}")

Expand Down
46 changes: 40 additions & 6 deletions tools/report_generator/data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def get_article_topics_df(self, feed_df):
df = pd.read_sql(query, con=self.db_connection)
self.__add_feed_name(df, feed_df)
return df

def get_article_new_topics_df(self, feed_df):
print("Getting Article New Topics...")
query = f"""SELECT a.id, l.name Language, a.feed_id, t.title Topic, atm.origin_type
Expand All @@ -59,8 +59,8 @@ def get_article_df(self, feed_df):
df = pd.read_sql(query, con=self.db_connection)
self.__add_feed_name(df, feed_df)
return df
def get_article_df_with_ids(self, feed_df, id_to_fetch:list[int]):

def get_article_df_with_ids(self, feed_df, id_to_fetch: list[int]):
print("Getting Articles with Ids...")
ids_as_str = [str(v) for v in id_to_fetch]
query = f"""SELECT a.*, l.name Language
Expand All @@ -70,7 +70,7 @@ def get_article_df_with_ids(self, feed_df, id_to_fetch:list[int]):
df = pd.read_sql(query, con=self.db_connection)
self.__add_feed_name(df, feed_df)
return df

def get_language_df(self):
print("Getting Languages...")
query = "SELECT * from language"
Expand Down Expand Up @@ -228,7 +228,7 @@ def get_topic_reading_time(self):
"Unclassified"
)
return topic_reading_time_df

def get_new_topic_reading_time(self):
print("Getting New Topic Reading Times...")
query = f"""SELECT l.name as Language, t.title Topic, SUM(urs.duration) total_reading_time
Expand All @@ -249,7 +249,41 @@ def get_new_topic_reading_time(self):
"Unclassified"
)
return topic_reading_time_df


def get_top_search_subscriptions(self):
print("Getting top search subscriptions...")
query = """SELECT s.keywords, count(user_id) total_users, sum(receive_email) as total_subscribers
FROM search_subscription s_sub
INNER JOIN search s
ON s.id = s_sub.search_id
GROUP by search_id
ORDER BY total_users DESC;"""
top_search_subscriptions_df = pd.read_sql(query, con=self.db_connection)
return top_search_subscriptions_df

def get_added_search_subscriptions(self):
print("Getting new added search subscriptions...")
query = f"""SELECT DISTINCT value as search
FROM zeeguu_test.user_activity_data
WHERE event like 'SUBSCRIBE_TO_SEARCH'
AND value in (SELECT keywords from search)
AND DATEDIFF(CURDATE(), time) <= {self.DAYS_FOR_REPORT};"""
newly_added_subscriptions = list(
pd.read_sql(query, con=self.db_connection)["search"].values
)
return newly_added_subscriptions

def get_top_search_filters(self):
print("Getting top search filters...")
query = """SELECT s.keywords, count(user_id) total_users
FROM search_filter s_f
INNER JOIN search s
ON s.id = s_f.search_id
GROUP by search_id
ORDER BY total_users DESC;"""
top_search_filters_df = pd.read_sql(query, con=self.db_connection)
return top_search_filters_df

def add_language_to_df(self, df, language_data):
df["Language"] = df.language_id.apply(
lambda x: language_data.loc[language_data.id == x, "name"].values[0]
Expand Down
Loading

0 comments on commit 259a8c2

Please sign in to comment.