From 7af58b9e0169488ec759cd18a7107b1a76e58947 Mon Sep 17 00:00:00 2001 From: CKrawczyk Date: Thu, 5 Dec 2024 11:07:08 +0000 Subject: [PATCH 1/7] Update router tests for local testing If installed with `online` but redis is not up and running for celery, make sure the unittests use system memory instead. --- panoptes_aggregation/tests/router_tests/test_routes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/panoptes_aggregation/tests/router_tests/test_routes.py b/panoptes_aggregation/tests/router_tests/test_routes.py index 70db0aa5..7df25883 100644 --- a/panoptes_aggregation/tests/router_tests/test_routes.py +++ b/panoptes_aggregation/tests/router_tests/test_routes.py @@ -1,5 +1,8 @@ try: import panoptes_aggregation.routes as routes + from panoptes_aggregation.batch_aggregation import celery as celeryapp + celeryapp.conf.update(CELERY_BROKER_URL='memory://') + celeryapp.conf.update(CELERY_RESULT_BACKEND='cache+memory://') OFFLINE = False except ImportError: OFFLINE = True From d7f929926413d4ac9f2fc0b2a4fd6b01bd091c52 Mon Sep 17 00:00:00 2001 From: CKrawczyk Date: Thu, 5 Dec 2024 11:36:19 +0000 Subject: [PATCH 2/7] Fix celery tests locally for batch run Also use in memory broker for the other set of batch aggregation tests. Needed to make all the tests run with the pytest runner (these tests are imported first, and the config needs to be updated at first import). --- .../tests/batch_aggregation/test_batch_aggregation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py index 1ca25376..d9b5ac3e 100644 --- a/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py +++ b/panoptes_aggregation/tests/batch_aggregation/test_batch_aggregation.py @@ -5,6 +5,9 @@ from panoptes_aggregation.batch_aggregation import run_aggregation from panoptes_aggregation import batch_aggregation as batch_agg +batch_agg.celery.conf.update(CELERY_BROKER_URL='memory://') +batch_agg.celery.conf.update(CELERY_RESULT_BACKEND='cache+memory://') + wf_export = 'panoptes_aggregation/tests/batch_aggregation/wf_export.csv' cls_export = 'panoptes_aggregation/tests/batch_aggregation/cls_export.csv' From 5dcb6e3ef195edd05115ce385527c5046f61166e Mon Sep 17 00:00:00 2001 From: CKrawczyk Date: Thu, 5 Dec 2024 11:46:03 +0000 Subject: [PATCH 3/7] Update doc and dev dependencies Just minor changes to the sphinx conf file, the format `intersphinx_mapping` has been updated. --- docs/source/conf.py | 2 +- pyproject.toml | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 5b429b80..efa7edaf 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -174,4 +174,4 @@ # Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} +intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} diff --git a/pyproject.toml b/pyproject.toml index 28567368..357b9c71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,20 +54,20 @@ gui = [ "Gooey>=1.0.8.1,<1.1" ] doc = [ - "matplotlib>=3.5.1,<3.9", - "myst-nb>=0.13.2,<1.1", - "sphinx>=5.2.0,<7.3", + "matplotlib>=3.5.1,<4.0", + "myst-nb>=0.13.2,<2.0", + "sphinx>=5.2.0,<7.5", "sphinxcontrib-httpdomain>=1.7.0,<1.9", - "sphinx_rtd_theme>=0.4.3,<1.4" + "sphinx_rtd_theme>=0.4.3,<4.0" ] test = [ - "coverage>=4.5.3,<7.4", - "coveralls>=3.0.0,<3.3.2", - "flake8>=6.0,<6.2", + "coverage>=4.5.3,<7.7", + "coveralls>=3.0.0,<4.1", + "flake8>=7.0,<7.2", "flake8-black>=0.3.4,<0.4", - "flake8-bugbear>=23.5,<23.10", - "pytest>=7.1.2,<7.4.4", - "pytest-subtests>=0.10.0,<0.11.1" + "flake8-bugbear>=23.5,<24.11", + "pytest>=7.1.2,<8.4", + "pytest-subtests>=0.10.0,<0.14" ] [project.scripts] From f093331e18ffdd173fbb455c184ca46cb80e7999 Mon Sep 17 00:00:00 2001 From: CKrawczyk Date: Thu, 5 Dec 2024 14:41:48 +0000 Subject: [PATCH 4/7] Update online dependencies One small change needed for the sentry update (keyword change for a function). --- panoptes_aggregation/routes.py | 2 +- pyproject.toml | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/panoptes_aggregation/routes.py b/panoptes_aggregation/routes.py index 9d73648e..45612399 100644 --- a/panoptes_aggregation/routes.py +++ b/panoptes_aggregation/routes.py @@ -74,7 +74,7 @@ def make_application(): # and the DSN being set via the SENTRY_DSN env var # https://docs.sentry.io/error-reporting/configuration/?platform=python#dsn sentry_sdk.init( - request_bodies='always', + max_request_body_size='always', integrations=[FlaskIntegration()] ) # setup the flask app to server web requests diff --git a/pyproject.toml b/pyproject.toml index 357b9c71..887e1eb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "pyyaml>=6.0,<6.1", "scikit-learn>=1.2.0,<1.3.3", "scipy>=1.10.0,<1.11.4", - "werkzeug>=2.3.0,<3.0.2", + "werkzeug>=2.3.0,<3.2", "shapely>=2.0,<2.0.3" ] @@ -38,16 +38,16 @@ dependencies = [ online = [ "azure-identity>=1,<2", "azure-storage-blob>=12,<13", - "celery>=5.3,<5.4", + "celery>=5.3,<5.5", "redis>=5,<6", "flower>2,<3", - "flask>=2.3,<3.1", - "flask-cors>=3.0,<4.1", + "flask>=2.3,<3.2", + "flask-cors>=3.0,<5.1", "panoptes-client>=1.6,<1.7", - "requests>=2.28,<2.32", - "gunicorn>=20.0,<21.3", - "sentry-sdk[flask]>=1.0,<1.36", - "newrelic>=8.4.0,<9.1.3", + "requests>=2.28,<2.33", + "gunicorn>=20.0,<24.0", + "sentry-sdk[flask]>=2.19,<2.20", + "newrelic>=8.4.0,<10.4", "gitpython>=3.0.0,<3.2" ] gui = [ From 556c70b77aa8a4c28f5571838e0bde3bfa218dea Mon Sep 17 00:00:00 2001 From: CKrawczyk Date: Thu, 5 Dec 2024 14:54:52 +0000 Subject: [PATCH 5/7] Partial update of main dependencies The "safe" dependencies to update (that don't touch Numpy's 2.0 update). Also removes a depreciation warning for Pandas. --- panoptes_aggregation/scripts/reduce_panoptes_csv.py | 1 - panoptes_aggregation/tests/scripts_tests/test_reduce_csv.py | 1 - pyproject.toml | 6 +++--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/panoptes_aggregation/scripts/reduce_panoptes_csv.py b/panoptes_aggregation/scripts/reduce_panoptes_csv.py index 56d90bad..8fd9b815 100644 --- a/panoptes_aggregation/scripts/reduce_panoptes_csv.py +++ b/panoptes_aggregation/scripts/reduce_panoptes_csv.py @@ -33,7 +33,6 @@ def reduce_csv( with extracted_csv as extracted_csv_in: extracted = pandas.read_csv( extracted_csv_in, - infer_datetime_format=True, parse_dates=['created_at'], encoding='utf-8' ) diff --git a/panoptes_aggregation/tests/scripts_tests/test_reduce_csv.py b/panoptes_aggregation/tests/scripts_tests/test_reduce_csv.py index f088d6c8..1940ed3a 100644 --- a/panoptes_aggregation/tests/scripts_tests/test_reduce_csv.py +++ b/panoptes_aggregation/tests/scripts_tests/test_reduce_csv.py @@ -113,7 +113,6 @@ def setUp(self): self.extracted_csv_question = StringIO(extracted_csv_question) self.extracted_dataframe_question = pandas.read_csv( StringIO(extracted_csv_question), - infer_datetime_format=True, parse_dates=['created_at'], encoding='utf-8' ) diff --git a/pyproject.toml b/pyproject.toml index 887e1eb8..458f83b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,11 +20,11 @@ dependencies = [ "beautifulsoup4>=4.8.1,<4.13", "collatex>=2.3,<2.4", "hdbscan>=0.8.20,<=0.8.33", - "lxml>=4.4,<4.10", + "lxml>=4.4,<5.4", "numpy>=1.22.0,<1.26.3", - "packaging>=20.1,<23.3", + "packaging>=20.1,<24.3", "pandas>=1.4.0,<2.1.4", - "progressbar2>=3.39,<4.3", + "progressbar2>=3.39,<4.6", "python-levenshtein>=0.21.0,<0.24", "python-slugify>=7.0.0,<8.1", "pyyaml>=6.0,<6.1", From 9efa68677b487661ff9690aca6a55457a0b82626 Mon Sep 17 00:00:00 2001 From: CKrawczyk Date: Thu, 5 Dec 2024 17:16:15 +0000 Subject: [PATCH 6/7] Update the rest of the dependencies The OPTICS clusters are a bit different with the new version of scikit-learn, not really sure why. For now just setting a higher `xi` for the test makes it all work again. Also had to adjust the output data types for some of the text extractor outputs. --- .../extractors/poly_line_text_extractor.py | 4 ++-- .../reducers/optics_line_text_reducer.py | 4 ++-- panoptes_aggregation/reducers/optics_text_utils.py | 6 +++--- .../reducer_tests/test_optics_line_text_reducer.py | 8 +++++--- pyproject.toml | 14 +++++++------- 5 files changed, 19 insertions(+), 17 deletions(-) diff --git a/panoptes_aggregation/extractors/poly_line_text_extractor.py b/panoptes_aggregation/extractors/poly_line_text_extractor.py index e7e9918f..7348cd34 100644 --- a/panoptes_aggregation/extractors/poly_line_text_extractor.py +++ b/panoptes_aggregation/extractors/poly_line_text_extractor.py @@ -95,7 +95,7 @@ def poly_line_text_extractor(classification, dot_freq='line', gold_standard=Fals dx = x[-1] - x[0] dy = y_fit[-1] - y_fit[0] slope = np.rad2deg(np.arctan2(dy, dx)) - except np.RankWarning: + except np.exceptions.RankWarning: try: # rotate by 90 before fitting x_tmp = -np.array(y) @@ -109,7 +109,7 @@ def poly_line_text_extractor(classification, dot_freq='line', gold_standard=Fals dy = 0.0 # rotate by -90 to bring back into correct coordinates slope = np.rad2deg(np.arctan2(dy, dx)) - 90 - except np.RankWarning: + except np.exceptions.RankWarning: # this is the case where dx = dy = 0 (a line of zero length) slope = 0 if dot_freq == 'word': diff --git a/panoptes_aggregation/reducers/optics_line_text_reducer.py b/panoptes_aggregation/reducers/optics_line_text_reducer.py index e4dd3787..a566ca79 100644 --- a/panoptes_aggregation/reducers/optics_line_text_reducer.py +++ b/panoptes_aggregation/reducers/optics_line_text_reducer.py @@ -221,8 +221,8 @@ def optics_line_text_reducer(data_by_frame, **kwargs_optics): 'clusters_x': xm.tolist(), 'clusters_y': ym.tolist(), 'clusters_text': clusters_text, - 'number_views': cdx.sum(), - 'line_slope': slope, + 'number_views': cdx.sum().item(), + 'line_slope': slope.item(), 'consensus_score': consensus_score_value, 'consensus_text': consensus_text, 'user_ids': user_ids, diff --git a/panoptes_aggregation/reducers/optics_text_utils.py b/panoptes_aggregation/reducers/optics_text_utils.py index ad6d0ce6..bcc1d18c 100644 --- a/panoptes_aggregation/reducers/optics_text_utils.py +++ b/panoptes_aggregation/reducers/optics_text_utils.py @@ -75,7 +75,7 @@ def metric(a, b, data_in=[]): strip_tags(data_a['text'][0]), strip_tags(data_b['text'][0]) ) - return np.sqrt(dx + dy).sum() + dt + return (np.sqrt(dx + dy).sum() + dt).item() def get_min_samples(N): @@ -182,7 +182,7 @@ def cluster_of_one(X, data, user_ids, extract_index): 'clusters_y': line['y'], 'clusters_text': [[w] for w in line['text'][0].split()], 'number_views': 1, - 'line_slope': slope, + 'line_slope': slope.item(), 'consensus_score': 1.0, 'consensus_text': ' '.join(line['text'][0].split()), 'user_ids': [user_ids[user_index]], @@ -268,7 +268,7 @@ def order_lines(frame_in, angle_eps=30, gutter_eps=150): # append to final list new_frames = list(frame[cdx][mdx][y_order]) for nf in new_frames: - nf['line_slope'] = angle_row[1] + nf['line_slope'] = angle_row[1].item() nf['slope_label'] = slope_label nf['gutter_label'] = gutter_label frame_ordered += new_frames diff --git a/panoptes_aggregation/tests/reducer_tests/test_optics_line_text_reducer.py b/panoptes_aggregation/tests/reducer_tests/test_optics_line_text_reducer.py index 77d48d83..c4210fc0 100644 --- a/panoptes_aggregation/tests/reducer_tests/test_optics_line_text_reducer.py +++ b/panoptes_aggregation/tests/reducer_tests/test_optics_line_text_reducer.py @@ -493,7 +493,7 @@ 'parameters': { 'min_samples': 'auto', 'max_eps': None, - 'xi': 0.05, + 'xi': 0.11, 'angle_eps': 30.0, 'gutter_eps': 150.0, 'low_consensus_threshold': 3.0, @@ -513,7 +513,8 @@ 'angle_eps': 30.0, 'gutter_eps': 150.0, 'low_consensus_threshold': 3.0, - 'minimum_views': 5 + 'minimum_views': 5, + 'xi': 0.11 }, okwargs={ 'min_samples': 'auto' @@ -537,7 +538,8 @@ 'angle_eps': 30.0, 'gutter_eps': 150.0, 'low_consensus_threshold': 3.0, - 'minimum_views': 5 + 'minimum_views': 5, + 'xi': 0.11 }, network_kwargs=kwargs_extra_data, output_kwargs=True, diff --git a/pyproject.toml b/pyproject.toml index 458f83b2..6d7320e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,19 +19,19 @@ requires-python = ">=3.8,<3.12" dependencies = [ "beautifulsoup4>=4.8.1,<4.13", "collatex>=2.3,<2.4", - "hdbscan>=0.8.20,<=0.8.33", + "hdbscan>=0.8.20,<=0.8.41", "lxml>=4.4,<5.4", - "numpy>=1.22.0,<1.26.3", + "numpy>=2.0,<2.2", "packaging>=20.1,<24.3", - "pandas>=1.4.0,<2.1.4", + "pandas>=2.0,<2.3", "progressbar2>=3.39,<4.6", - "python-levenshtein>=0.21.0,<0.24", + "python-levenshtein>=0.21.0,<0.27", "python-slugify>=7.0.0,<8.1", "pyyaml>=6.0,<6.1", - "scikit-learn>=1.2.0,<1.3.3", - "scipy>=1.10.0,<1.11.4", + "scikit-learn>=1.2.0,<1.6", + "scipy>=1.10.0,<1.15", "werkzeug>=2.3.0,<3.2", - "shapely>=2.0,<2.0.3" + "shapely>=2.0,<2.1" ] [project.optional-dependencies] From 31e3185dfa8d1efcb4820e3182c805405401ff6e Mon Sep 17 00:00:00 2001 From: CKrawczyk Date: Thu, 5 Dec 2024 17:16:53 +0000 Subject: [PATCH 7/7] bump version --- panoptes_aggregation/version/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/panoptes_aggregation/version/__init__.py b/panoptes_aggregation/version/__init__.py index fa721b49..ea5d65fc 100644 --- a/panoptes_aggregation/version/__init__.py +++ b/panoptes_aggregation/version/__init__.py @@ -1 +1 @@ -__version__ = '4.1.0' +__version__ = '4.2.0'