Skip to content

Commit

Permalink
revert changes
Browse files Browse the repository at this point in the history
  • Loading branch information
Tianhao-Gu committed Jun 11, 2024
1 parent 83c90c9 commit df3bfe8
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 33 deletions.
34 changes: 5 additions & 29 deletions src/notebook_utils/notebook_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,13 @@

import itables.options as opt
from itables import init_notebook_mode, show
from pandas import DataFrame as PandasDataFrame
from pyspark.sql import DataFrame as SparkDataFrame
from pandas import DataFrame

lock = RLock()


def spark_to_pandas(
spark_df: SparkDataFrame,
limit: int = 1000,
offset: int = 0
) -> PandasDataFrame:
"""
Convert a Spark DataFrame to a pandas DataFrame.
:param spark_df: a Spark DataFrame
:param limit: the number of rows to fetch
:param offset: the number of rows to skip
:return: a pandas DataFrame
"""

return spark_df.offset(offset).limit(limit).toPandas()


def display_df(
df: PandasDataFrame | SparkDataFrame,
df: DataFrame,
layout: dict = None,
buttons: list = None,
length_menu: list = None
Expand All @@ -35,21 +17,15 @@ def display_df(
Display a pandas DataFrame using itables.
iTables project page: https://github.com/mwouts/itables
Notice itables.show() function is not compatible with Spark DataFrames. If a Spark DataFrame is passed to this
function, it will be converted to a pandas DataFrame (first 1000 rows) before displaying it.
Notice this function is not compatible with Spark DataFrames. A Spark DataFrame should be converted to a
pandas DataFrame before calling this function which is not efficient for large datasets.
:param df: a pandas DataFrame or a Spark DataFrame
:param df: a pandas DataFrame # TODO add spark DataFrame support
:param layout: layout options, refer to https://datatables.net/reference/option/layout
:param buttons: buttons options, options refer to https://datatables.net/reference/button/
:param length_menu: length menu options, refer to https://datatables.net/reference/option/lengthMenu
:return:
"""
# convert Spark DataFrame to pandas DataFrame
if isinstance(df, SparkDataFrame):
if df.count() > 1000:
print("Converting first 1000 rows from Spark to Pandas DataFrame...")
df = spark_to_pandas(df)

# initialize itables for the notebook
init_notebook_mode(all_interactive=False)

Expand Down
5 changes: 1 addition & 4 deletions src/notebook_utils/startup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,4 @@
remove_table,
)
from minio_utils.minio_utils import get_minio_client
from notebook_utils.notebook_utils import (
display_df,
spark_to_pandas,
)
from notebook_utils.notebook_utils import display_df

0 comments on commit df3bfe8

Please sign in to comment.