Skip to content

Commit

Permalink
add retries for load datasets requests failures (#2007)
Browse files Browse the repository at this point in the history
  • Loading branch information
winglian authored Oct 31, 2024
1 parent d4dbfa0 commit dc1de7d
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions src/axolotl/utils/data/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

import functools
import logging
import time
from pathlib import Path
from typing import List, Optional, Tuple, Union

import requests
from datasets import (
Dataset,
DatasetDict,
Expand Down Expand Up @@ -53,6 +55,28 @@
LOG = logging.getLogger("axolotl")


def retry_on_request_exceptions(max_retries=3, delay=1):
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs): # pylint: disable=inconsistent-return-statements
for attempt in range(max_retries):
try:
return func(*args, **kwargs)
except (
requests.exceptions.ReadTimeout,
requests.exceptions.ConnectionError,
) as exc:
if attempt < max_retries - 1:
time.sleep(delay)
else:
raise exc

return wrapper

return decorator


@retry_on_request_exceptions(max_retries=3, delay=5)
def prepare_dataset(cfg, tokenizer, processor=None):
prompters = []
if not cfg.pretraining_dataset:
Expand Down

0 comments on commit dc1de7d

Please sign in to comment.