Skip to content

Commit

Permalink
apply code review suggestions
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreaFrancis committed Mar 21, 2024
1 parent 4e84b75 commit 1b8f4de
Show file tree
Hide file tree
Showing 9 changed files with 16 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/_e2e_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ jobs:
S3_REGION_NAME: "us-east-1"
CLOUDFRONT_KEY_PAIR_ID: "K3814DK2QUJ71H"
CLOUDFRONT_PRIVATE_KEY: ${{ secrets.CLOUDFRONT_PRIVATE_KEY }}
HF_HUB_ENABLE_HF_TRANSFER: "1"
run: docker compose -f docker-compose-datasets-server.yml up -d --wait --wait-timeout 20
working-directory: ./tools
- name: Install poetry
Expand Down
2 changes: 2 additions & 0 deletions chart/templates/_env/_envWorker.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,6 @@
value: {{ .Values.descriptiveStatistics.maxSplitSizeBytes | quote }}
- name: DESCRIPTIVE_STATISTICS_CACHE_DIRECTORY
value: {{ .Values.descriptiveStatistics.cacheDirectory | quote }}
- name: HF_HUB_ENABLE_HF_TRANSFER
value: 1
{{- end -}}
2 changes: 2 additions & 0 deletions chart/templates/services/search/_container.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
value: {{ .Values.duckDBIndex.cacheDirectory | quote }}
- name: DUCKDB_INDEX_EXTENSIONS_DIRECTORY
value: "/tmp/duckdb-extensions"
- name: HF_HUB_ENABLE_HF_TRANSFER
value: 1
volumeMounts:
{{ include "volumeMountDuckDBIndexRW" . | nindent 2 }}
securityContext:
Expand Down
2 changes: 2 additions & 0 deletions e2e/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ export S3_REGION_NAME := us-east-1
# S3_ACCESS_KEY_ID <- secret
# S3_SECRET_ACCESS_KEY <- secret

export HF_HUB_ENABLE_HF_TRANSFER := 1

# makefile variables
DOCKER_COMPOSE := ../tools/docker-compose-datasets-server.yml

Expand Down
7 changes: 3 additions & 4 deletions libs/libcommon/src/libcommon/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from fnmatch import fnmatch
from pathlib import Path
from typing import Any, Optional, TypeVar, Union, cast

from huggingface_hub import constants
import orjson
import pandas as pd
from huggingface_hub import constants, hf_hub_download
Expand Down Expand Up @@ -204,10 +204,9 @@ def download_file_from_hub(
force_download: bool = False,
resume_download: bool = False,
) -> None:
# Force hf_transfer usage
constants.HF_HUB_ENABLE_HF_TRANSFER = True
logging.debug(f"Using {constants.HF_HUB_ENABLE_HF_TRANSFER} for hf_transfer")
retry_download_hub_file = retry(on=[ReadTimeout], sleeps=HF_HUB_HTTP_ERROR_RETRY_SLEEPS)(hf_hub_download)
retry_on = [RuntimeError] if constants.HF_HUB_ENABLE_HF_TRANSFER else [ReadTimeout]
retry_download_hub_file = retry(on=retry_on, sleeps=HF_HUB_HTTP_ERROR_RETRY_SLEEPS)(hf_hub_download)
retry_download_hub_file(
repo_type=repo_type,
revision=revision,
Expand Down
1 change: 1 addition & 0 deletions services/search/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ export COMPOSE_PROJECT_NAME := search
export MONGO_PORT := 27033
export CACHE_MONGO_URL := mongodb://localhost:${MONGO_PORT}
export QUEUE_MONGO_URL := mongodb://localhost:${MONGO_PORT}
export HF_HUB_ENABLE_HF_TRANSFER := 1
# makefile variables
DOCKER_COMPOSE := ../../tools/docker-compose-mongo.yml
TEST_PATH ?= tests
Expand Down
2 changes: 1 addition & 1 deletion services/worker/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ export COMPOSE_PROJECT_NAME := worker
export MONGO_PORT := 27040
export CACHE_MONGO_URL := mongodb://localhost:${MONGO_PORT}
export QUEUE_MONGO_URL := mongodb://localhost:${MONGO_PORT}

export HF_HUB_ENABLE_HF_TRANSFER := 1
# makefile variables
DOCKER_COMPOSE := ../../tools/docker-compose-mongo.yml

Expand Down
2 changes: 2 additions & 0 deletions tools/docker-compose-datasets-server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ services:
API_UVICORN_HOSTNAME: 0.0.0.0 # required for docker compose
API_UVICORN_NUM_WORKERS: ${SEARCH_UVICORN_NUM_WORKERS-2}
API_UVICORN_PORT: ${SEARCH_UVICORN_PORT-8083}
HF_HUB_ENABLE_HF_TRANSFER: 1
ports:
# for debug
- ${SEARCH_UVICORN_PORT-8083}:${SEARCH_UVICORN_PORT-8083}
Expand Down Expand Up @@ -181,6 +182,7 @@ services:
FIRST_ROWS_MIN_CELL_BYTES: ${FIRST_ROWS_MIN_CELL_BYTES-100}
FIRST_ROWS_MIN_NUMBER: ${FIRST_ROWS_MIN_NUMBER-10}
FIRST_ROWS_COLUMNS_MAX_NUMBER: ${FIRST_ROWS_COLUMNS_MAX_NUMBER-1_000}
HF_HUB_ENABLE_HF_TRANSFER: 1
OPT_IN_OUT_URLS_SCAN_COLUMNS_MAX_NUMBER: ${OPT_IN_OUT_URLS_SCAN_COLUMNS_MAX_NUMBER-10}
OPT_IN_OUT_URLS_SCAN_MAX_CONCURRENT_REQUESTS_NUMBER: ${OPT_IN_OUT_URLS_SCAN_MAX_CONCURRENT_REQUESTS_NUMBER-100}
OPT_IN_OUT_URLS_SCAN_MAX_REQUESTS_PER_SECOND: ${OPT_IN_OUT_URLS_SCAN_MAX_REQUESTS_PER_SECOND-50}
Expand Down
2 changes: 2 additions & 0 deletions tools/docker-compose-dev-datasets-server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ services:
API_UVICORN_HOSTNAME: 0.0.0.0 # required for docker compose
API_UVICORN_NUM_WORKERS: ${SEARCH_UVICORN_NUM_WORKERS-2}
API_UVICORN_PORT: ${SEARCH_UVICORN_PORT-8083}
HF_HUB_ENABLE_HF_TRANSFER: 1
ports:
# for debug
- ${SEARCH_UVICORN_PORT-8083}:${SEARCH_UVICORN_PORT-8083}
Expand Down Expand Up @@ -196,6 +197,7 @@ services:
FIRST_ROWS_MIN_CELL_BYTES: ${FIRST_ROWS_MIN_CELL_BYTES-100}
FIRST_ROWS_MIN_NUMBER: ${FIRST_ROWS_MIN_NUMBER-10}
FIRST_ROWS_COLUMNS_MAX_NUMBER: ${FIRST_ROWS_COLUMNS_MAX_NUMBER-1_000}
HF_HUB_ENABLE_HF_TRANSFER: 1
OPT_IN_OUT_URLS_SCAN_COLUMNS_MAX_NUMBER: ${OPT_IN_OUT_URLS_SCAN_COLUMNS_MAX_NUMBER-10}
OPT_IN_OUT_URLS_SCAN_MAX_CONCURRENT_REQUESTS_NUMBER: ${OPT_IN_OUT_URLS_SCAN_MAX_CONCURRENT_REQUESTS_NUMBER-100}
OPT_IN_OUT_URLS_SCAN_MAX_REQUESTS_PER_SECOND: ${OPT_IN_OUT_URLS_SCAN_MAX_REQUESTS_PER_SECOND-50}
Expand Down

0 comments on commit 1b8f4de

Please sign in to comment.