Skip to content

Commit

Permalink
match new kaikki jsonl filenames (#90)
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanVukovic99 authored Jun 28, 2024
1 parent 5e9ddca commit 6912c84
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 6 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
*.json
*.jsonl
!tag_bank_term.json
!tag_bank_ipa.json
!parts_of_speech.json
Expand Down
4 changes: 2 additions & 2 deletions 2-extract-language.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
source_iso = os.environ.get("source_iso")
target_iso = os.environ.get("target_iso")

input_file = f"data/kaikki/{target_iso}-extract.json"
output_file = f"data/kaikki/{source_iso}-{target_iso}-extract.json"
input_file = f"data/kaikki/{target_iso}-extract.jsonl"
output_file = f"data/kaikki/{source_iso}-{target_iso}-extract.jsonl"

print(f"Reading {input_file} and writing {output_file}...")

Expand Down
8 changes: 4 additions & 4 deletions auto.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,19 +97,19 @@ for target_lang in "${languages[@]}"; do
# Step 3: Download JSON data if it doesn't exist
if [ "$target_language" = "English" ]; then
language_no_special_chars=$(echo "$language" | tr -d '[:space:]-') #Serbo-Croatian, Ancient Greek and such cases
filename="kaikki.org-dictionary-$language_no_special_chars.json"
filename="kaikki.org-dictionary-$language_no_special_chars.jsonl"
filepath="data/kaikki/$filename"


if [ ! -f "$filepath" ] || [ "$redownload" = true ]; then
url="kaikki.org/dictionary/$language/$filename"
url="https://kaikki.org/dictionary/$language/$filename"
echo "Downloading $filename from $url"
wget "$url" -O "$filepath"
else
echo "Kaikki dict already exists. Skipping download."
fi
else
target_extract="$target_iso-extract.json"
target_extract="$target_iso-extract.jsonl"
target_extract_path="data/kaikki/$target_extract"

if [ ! -f "$target_extract_path" ] || [ "$redownload" = true ] && [ "$downloaded_target_extract" = false ]; then
Expand All @@ -123,7 +123,7 @@ for target_lang in "${languages[@]}"; do
echo "Kaikki dict already exists. Skipping download."
fi

filename="$source_iso-$target_iso-extract.json"
filename="$source_iso-$target_iso-extract.jsonl"
filepath="data/kaikki/$filename"

if [ ! -f "$filepath" ]; then
Expand Down

0 comments on commit 6912c84

Please sign in to comment.