Skip to content

Commit

Permalink
Merge pull request #1006 from matouma/dev-1.0.1.dev1
Browse files Browse the repository at this point in the history
Dev 1.0.1.dev1
  • Loading branch information
touma-I authored Jan 31, 2025
2 parents f9a3cc4 + 813c7ca commit 6262d77
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 3 deletions.
4 changes: 4 additions & 0 deletions transforms/README-list.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ Note: This list includes the transforms that were part of the release starting w

## Release notes:

### 1.0.1.dev1
Added Gneissweb transforms
### 1.0.1.dev0
PR #979 (code_profiler)
### 1.0.0.a6
Added Profiler
Added Resize
Expand Down
10 changes: 8 additions & 2 deletions transforms/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_transforms"
version = "1.0.1.dev0"
version = "1.0.1.dev1"
requires-python = ">=3.10,<3.13"
keywords = ["transforms", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
description = "Data Preparation Toolkit Transforms using Ray"
Expand Down Expand Up @@ -44,6 +44,7 @@ all = { file = [
"language/similarity/requirements.txt",
"language/extreme_tokenized/requirements.txt",
"language/readability/requirements.txt",
"language/gneissweb_classification/requirements.txt",

##### Cannot have html2parquet until we solve
## docling-ibm-models 1.1.7 depends on lxml<5.0.0 and >=4.9.1
Expand Down Expand Up @@ -71,6 +72,7 @@ language = { file = [
"language/similarity/requirements.txt",
"language/extreme_tokenized/requirements.txt",
"language/readability/requirements.txt",
"language/gneissweb_classification/requirements.txt",

##### Cannot have html2parquet until we solve
## docling-ibm-models 1.1.7 depends on lxml<5.0.0 and >=4.9.1
Expand Down Expand Up @@ -124,6 +126,8 @@ readability = { file = ["language/readability/requirements.txt"]}

code_profiler = { file = ["code/code_profiler/requirements.txt"]}

gneissweb_classification = { file = ["language/gneissweb_classification/requirements.txt"]}

rep_removal = { file = ["universal/rep_removal/requirements.txt"]}
# Does not seem to work for our custom layout
# copy all files to a single src and let automatic discovery find them
Expand Down Expand Up @@ -152,12 +156,14 @@ dpk_extreme_tokenized = "language/extreme_tokenized/dpk_extreme_tokenized"
dpk_readability = "language/readability/dpk_readability"
dpk_profiler = "universal/profiler/dpk_profiler"
dpk_resize = "universal/resize/dpk_resize"
dpk_gneissweb_classification = "language/gneissweb_classification/dpk_gneissweb_classification"
dpk_rep_removal = "universal/rep_removal/dpk_rep_removal"


[tool.setuptools.package-data]
#"*" = ["*.txt"]
dpk_rep_removal = ["universal/rep_removal/dpk_rep_removal/rust", "universal/rep_removal/dpk_rep_removal/gpt2"]
"dpk_rep_removal.rust" = ["**"]
"dpk_rep_removal.gpt2" = ["**"]

[options]
package_dir = ["src","test"]
Expand Down
2 changes: 1 addition & 1 deletion transforms/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
data-prep-toolkit>=0.2.3
data-prep-toolkit>=0.2.4.dev0

0 comments on commit 6262d77

Please sign in to comment.