Merge branch 'main' into small-improvement-in-models

sadra-barikbin · Dec 26, 2024 · b100627 · b100627
2 parents 37bfb2e + 2ef9740
commit b100627
Show file tree

Hide file tree

Showing 93 changed files with 3,075 additions and 4,395 deletions.
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -18,7 +18,6 @@ jobs:
        uses: actions/checkout@v3
        with:
         lfs: 'true'
-        ref: ${{ github.event.pull_request.head.sha }} # we want to test against our branch not against a merge commit
      - name: Setup Python environment
        uses: actions/setup-python@v4
        with:
@@ -28,18 +27,20 @@ jobs:
        run: |
          pip install -e .[dev,extended_tasks,multilingual]
      - name: Get cached files
-       uses: actions/cache@v2
+       uses: actions/cache@v4
        id: get-cache
        with:
          path: "cache"
          key: test-cache-HF
      - name: Test
        env:
         HF_TEST_TOKEN: ${{ secrets.HF_TEST_TOKEN }}
+        HF_HOME: "cache/models"
+        HF_DATASETS_CACHE: "cache/datasets"
        run: | # PYTHONPATH="${PYTHONPATH}:src" HF_DATASETS_CACHE="cache/datasets" HF_HOME="cache/models"
         python -m pytest --disable-pytest-warnings
      - name: Write cache
-       uses: actions/cache@v2
+       uses: actions/cache@v4
        with:
          path: "cache"
          key: test-cache-HF
diff --git a/.github/workflows/trufflehog.yml b/.github/workflows/trufflehog.yml
@@ -0,0 +1,18 @@
+on:
+  push:
+
+name: Scan Secret Leaks
+
+permissions:
+  contents: read
+
+jobs:
+  trufflehog:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: Secret Scanning
+      uses: trufflesecurity/trufflehog@main
diff --git a/README.md b/README.md
@@ -55,7 +55,7 @@ Hub, S3, or locally.
 ## ⚡️ Installation
 
 ```bash
-pip install lighteval[accelerate]
+pip install lighteval
 ```
 
 Lighteval allows for many extras when installing, see [here](https://github.com/huggingface/lighteval/wiki/Installation) for a complete list.
@@ -71,20 +71,24 @@ huggingface-cli login
 
 Lighteval offers two main entry points for model evaluation:
 
-
-* `lighteval accelerate`: evaluate models on CPU or one or more GPUs using [🤗
-  Accelerate](https://github.com/huggingface/accelerate).
-* `lighteval nanotron`: evaluate models in distributed settings using [⚡️
-  Nanotron](https://github.com/huggingface/nanotron).
+- `lighteval accelerate` : evaluate models on CPU or one or more GPUs using [🤗
+  Accelerate](https://github.com/huggingface/accelerate)
+- `lighteval nanotron`: evaluate models in distributed settings using [⚡️
+  Nanotron](https://github.com/huggingface/nanotron)
+- `lighteval vllm`: evaluate models on one or more GPUs using [🚀
+  VLLM](https://github.com/vllm-project/vllm)
+- `lighteval endpoint`
+    - `inference-endpoint`: evaluate models on one or more GPUs using [🔗
+  Inference Endpoint](https://huggingface.co/inference-endpoints/dedicated)
+    - `tgi`: evaluate models on one or more GPUs using [🔗 Text Generation Inference](https://huggingface.co/docs/text-generation-inference/en/index)
+    - `openai`: evaluate models on one or more GPUs using [🔗 OpenAI API](https://platform.openai.com/)
 
 Here’s a quick command to evaluate using the Accelerate backend:
 
 ```shell
 lighteval accelerate \
-    --model_args "pretrained=gpt2" \
-    --tasks "leaderboard|truthfulqa:mc|0|0" \
-    --override_batch_size 1 \
-    --output_dir="./evals/"
+    "pretrained=gpt2" \
+    "leaderboard|truthfulqa:mc|0|0"
 ```
 
 ## 🙏 Acknowledgements

diff --git a/community_tasks/_template.py b/community_tasks/_template.py
@@ -99,8 +99,6 @@ def __init__(
             suite=["community"],
             generation_size=-1,
             stop_sequence=None,
-            output_regex=None,
-            frozen=False,
         )
 
 
@@ -118,10 +116,3 @@ def __init__(
     sample_level_fn=lambda x: x,  # how to compute score for one sample
     corpus_level_fn=np.mean,  # aggregation
 )
-
-# MODULE LOGIC
-# You should not need to touch this
-# Convert to dict for lighteval
-if __name__ == "__main__":
-    print(t.name for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/community_tasks/aimo_evals.py b/community_tasks/aimo_evals.py
@@ -56,11 +56,3 @@ def aimo_prompt(line, task_name: str = None):
 
 # STORE YOUR EVALS
 TASKS_TABLE = [task]
-
-
-# MODULE LOGIC
-# You should not need to touch this
-
-if __name__ == "__main__":
-    print(t.name for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))