-
-
Notifications
You must be signed in to change notification settings - Fork 2.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ci: add checksum checker pipeline (#2274)
Signed-off-by: mudler <[email protected]>
- Loading branch information
Showing
2 changed files
with
126 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#!/bin/bash | ||
set -euxo pipefail | ||
# This scripts needs yq and huggingface_hub to be installed | ||
# to install hugingface_hub run pip install huggingface_hub | ||
|
||
# Path to the input YAML file | ||
input_yaml=$1 | ||
|
||
# Function to download file and check checksum using Python | ||
function check_and_update_checksum() { | ||
model_name="$1" | ||
file_name="$2" | ||
uri="$3" | ||
old_checksum="$4" | ||
idx="$5" | ||
|
||
# Download the file and calculate new checksum using Python | ||
new_checksum=$(python3 -c " | ||
import hashlib | ||
from huggingface_hub import hf_hub_download | ||
import requests | ||
import sys | ||
import os | ||
uri = '$uri' | ||
file_name = '$file_name' | ||
# Function to parse the URI and determine download method | ||
# Function to parse the URI and determine download method | ||
def parse_uri(uri): | ||
if uri.startswith('huggingface://'): | ||
# Remove the protocol and extract repo id and filename | ||
repo_id = uri.split('://')[1] | ||
return 'huggingface', repo_id.rsplit('/', 1)[0] | ||
elif 'huggingface.co' in uri: | ||
# For full URLs to Hugging Face, extract repo and filename before '/resolve/' | ||
parts = uri.split('/resolve/') | ||
if len(parts) > 1: | ||
repo_path = parts[0].split('https://huggingface.co/')[-1] | ||
repo_id, file_part = repo_path.rsplit('/', 1) | ||
return 'huggingface', (repo_id, file_part) | ||
return 'direct', uri | ||
def calculate_sha256(file_path): | ||
sha256_hash = hashlib.sha256() | ||
with open(file_path, 'rb') as f: | ||
for byte_block in iter(lambda: f.read(4096), b''): | ||
sha256_hash.update(byte_block) | ||
return sha256_hash.hexdigest() | ||
download_type, repo_id_or_url = parse_uri(uri) | ||
# Decide download method based on URI type | ||
if download_type == 'huggingface': | ||
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name, use_auth_token=False) | ||
else: | ||
# Direct download for non-Hugging Face URLs | ||
response = requests.get(repo_id_or_url) | ||
if response.status_code == 200: | ||
with open(file_name, 'wb') as f: | ||
f.write(response.content) | ||
file_path = file_name | ||
else: | ||
print(f'Error downloading file: {response.status_code}', file=sys.stderr) | ||
sys.exit(1) | ||
print(calculate_sha256(file_path)) | ||
# Clean up the downloaded file | ||
os.remove(file_path) | ||
") | ||
|
||
# Compare and update the YAML file if checksums do not match | ||
if [[ "$old_checksum" != "$new_checksum" ]]; then | ||
echo "Checksum mismatch for $file_name. Updating..." | ||
yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml" | ||
yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml" | ||
else | ||
echo "Checksum match for $file_name. No update needed." | ||
fi | ||
} | ||
|
||
# Read the YAML and process each file | ||
len=$(yq eval '. | length' "$input_yaml") | ||
for ((i=0; i<$len; i++)) | ||
do | ||
name=$(yq eval ".[$i].name" "$input_yaml") | ||
files_len=$(yq eval ".[$i].files | length" "$input_yaml") | ||
for ((j=0; j<$files_len; j++)) | ||
do | ||
filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml") | ||
uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml") | ||
checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml") | ||
echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum" | ||
check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i" | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
name: Check if checksums are up-to-date | ||
on: | ||
schedule: | ||
- cron: 0 20 * * * | ||
workflow_dispatch: | ||
jobs: | ||
checksum_check: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v4 | ||
- name: Install dependencies | ||
run: | | ||
pip install huggingface_hub | ||
# yq | ||
VERSION=v4.43.1 BINARY=yq_linux_amd64 wget https://github.com/mikefarah/yq/releases/download/${VERSION}/${BINARY}.tar.gz -O - |\ | ||
tar xz && sudo mv ${BINARY} /usr/bin/yq | ||
- name: Checksum checker 🔧 | ||
run: | | ||
bash .github/checksum_checker.sh gallery/index.yaml | ||
- name: Create Pull Request | ||
uses: peter-evans/create-pull-request@v6 | ||
with: | ||
token: ${{ secrets.UPDATE_BOT_TOKEN }} | ||
push-to-fork: ci-forks/LocalAI | ||
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml' | ||
title: 'models(gallery): :arrow_up: update checksum' | ||
branch: "update/checksum" | ||
body: Updating checksums in gallery/index.yaml | ||
signoff: true |