Skip to content

Commit

Permalink
Update python 3.7->3.9 & poetry 1.1.8->1.5.1
Browse files Browse the repository at this point in the history
  • Loading branch information
juhoautio-rovio committed Sep 14, 2024
1 parent 349e396 commit 14235c1
Show file tree
Hide file tree
Showing 9 changed files with 2,331 additions and 676 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,11 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.7'
python-version: '3.9'
- name: Install Poetry
run: pip install --upgrade MarkupSafe==2.0.1 poetry-core==1.0.4 poetry==1.1.8 poetry-dynamic-versioning==0.12.7 urllib3==1.26.15
run: pip install --upgrade poetry==1.5.1
- name: Install Poetry dynamic versioning plugin
run: poetry self add "poetry-dynamic-versioning[plugin]"
- name: Update apt-get
run: sudo apt-get update
- name: Install libkrb5-dev
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@ jobs:
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: '3.7'
python-version: '3.9'
- name: Install Poetry
run: pip install --upgrade MarkupSafe==2.0.1 poetry-core==1.0.4 poetry==1.1.8 poetry-dynamic-versioning==0.12.7 urllib3==1.26.15
run: pip install --upgrade poetry==1.5.1
- name: Install Poetry dynamic versioning plugin
run: poetry self add "poetry-dynamic-versioning[plugin]"
- name: Update apt-get
run: sudo apt-get update
- name: Install libkrb5-dev
Expand Down
1 change: 1 addition & 0 deletions python/.tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
poetry 1.5.1
2,950 changes: 2,297 additions & 653 deletions python/poetry.lock

Large diffs are not rendered by default.

27 changes: 13 additions & 14 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,21 @@ license = "Apache-2.0"
authors = ["Vivek Balakrishnan <[email protected]>", "Juho Autio <[email protected]>"]

[tool.poetry.dependencies]
python = "^3.7"
pyspark = "^3.0.0"
python = "^3.9"
pyspark = "^3.4.1"

[tool.poetry.dev-dependencies]
pytest = "5.4.3"
pyspark-stubs = "3.0.0.post3"
pandas = "1.1.5"
jupyter = "1.0.0"
sparkmagic = "0.20.5"
boto3 = "1.21.40"
safety = "1.10.3"
# Not directly depending on tornado. Should set version explicitly to fix CWE-601.
# However, tornado requires Python 3.8+ so can't upgrade yet.
# tornado = "^6.3.2"
# Not directly depending on cryptography. Should set version explicitly to fix CWE-295.
cryptography="^41.0.3"
pytest = "8.3.3"
pandas = ">=1.5.3,<2.0.0"
numpy = ">=1.26.4,<2.0.0"
jupyter = "1.1.1"
sparkmagic = "0.21.0"
boto3 = "1.35.19"
safety = "3.2.7"
# Not directly depending on tornado. Set version explicitly to fix CWE-601.
tornado = ">=6.3.2"
# Not directly depending on cryptography. Set version explicitly to fix CWE-295.
cryptography=">=41.0.3"

[tool.poetry-dynamic-versioning]
enable = true
Expand Down
11 changes: 9 additions & 2 deletions python/rovio_ingest/extensions/dataframe_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,17 @@ class ConfKeys:

def repartition_by_druid_segment_size(self, time_col_name, segment_granularity='DAY', rows_per_segment=5000000,
exclude_columns_with_unknown_types=False):
_jdf = self.sql_ctx._sc._jvm.com.rovio.ingest.extensions.java.DruidDatasetExtensions \
_jdf = spark_session_or_sql_ctx(self)._sc._jvm.com.rovio.ingest.extensions.java.DruidDatasetExtensions \
.repartitionByDruidSegmentSize(self._jdf, time_col_name, segment_granularity, rows_per_segment,
exclude_columns_with_unknown_types)
return DataFrame(_jdf, self.sql_ctx)
return DataFrame(_jdf, spark_session_or_sql_ctx(self))


def spark_session_or_sql_ctx(df: DataFrame):
if hasattr(df, 'sparkSession'):
return df.sparkSession
# backward compatibility for older spark versions
return df.sql_ctx


def normalize_date(spark: SparkSession, value: datetime, granularity: str) -> datetime:
Expand Down
2 changes: 1 addition & 1 deletion python/tests/spark_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def _rovio_ingest_classpath() -> str:
Note that pyspark 2.4.4 comes with spark jars with specific scala version, for example:
mleap/python/venv/lib/python3.7/site-packages/pyspark/jars/spark-core_2.11-2.4.4.jar
rovio-ingest/python/venv/lib/python3.9/site-packages/pyspark/jars/spark-core_2.11-2.4.4.jar
Thus, pyspark is incompatible with scala 2.12 and only works with scala 2.11.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class DruidDatasetExtensionsAzureSpec extends AnyFlatSpec with Matchers with Bef

lazy val spark: SparkSession = {
SparkSession.builder()
.appName("Spark/MLeap Parity Tests")
.appName(getClass.getName)
.config("spark.sql.session.timeZone", "UTC")
.config("spark.driver.bindAddress", "127.0.0.1")
.master("local[2]")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class DruidDatasetExtensionsSpec extends AnyFlatSpec with Matchers with BeforeAn

lazy val spark: SparkSession = {
SparkSession.builder()
.appName("Spark/MLeap Parity Tests")
.appName(getClass.getName)
.config("spark.sql.session.timeZone", "UTC")
.config("spark.driver.bindAddress", "127.0.0.1")
.master("local[2]")
Expand Down

0 comments on commit 14235c1

Please sign in to comment.