diff --git a/llmfoundry/utils/prompt_files.py b/llmfoundry/utils/prompt_files.py new file mode 100644 index 0000000000..7ce747b4c0 --- /dev/null +++ b/llmfoundry/utils/prompt_files.py @@ -0,0 +1,58 @@ +import os +from typing import List, Optional + +PROMPTFILE_PREFIX = "file::" + + +def load_prompts(prompts: List[str], + prompt_delimiter: Optional[str] = None) -> List[str]: + """ + Loads a set of prompts, both free text and from file + + Args: + prompts (List[str]): List of free text prompts and prompt files + prompt_delimiter (Optional str): Delimiter for text file + If not provided, assumes the prompt file is a single prompt (non-delimited) + + Returns: + List of prompt string(s) + """ + prompt_strings = [] + for prompt in prompts: + if prompt.startswith(PROMPTFILE_PREFIX): + prompts = load_prompts_from_file(prompt, prompt_delimiter) + prompt_strings.extend(prompts) + else: + prompt_strings.append(prompt) + return prompt_strings + + +def load_prompts_from_file(prompt_path: str, + prompt_delimiter: Optional[str] = None) -> List[str]: + """ + Load a set of prompts from a text fie + + Args: + prompt_path (str): Path for text file + prompt_delimiter (Optional str): Delimiter for text file + If not provided, assumes the prompt file is a single prompt (non-delimited) + + Returns: + List of prompt string(s) + """ + + if not prompt_path.startswith(PROMPTFILE_PREFIX): + raise ValueError(f'prompt_path_str must start with {PROMPTFILE_PREFIX}') + + _, prompt_file_path = prompt_path.split(PROMPTFILE_PREFIX, maxsplit=1) + prompt_file_path = os.path.expanduser(prompt_file_path) + if not os.path.isfile(prompt_file_path): + raise FileNotFoundError( + f'{prompt_file_path=} does not match any existing files.') + + with open(prompt_file_path, 'r') as f: + prompt_string = f.read() + + if prompt_delimiter is None: + return [prompt_string] + return [i for i in prompt_string.split(prompt_delimiter) if i] diff --git a/scripts/inference/endpoint_generate.py b/scripts/inference/endpoint_generate.py index 77c9e691c9..56d04f536c 100644 --- a/scripts/inference/endpoint_generate.py +++ b/scripts/inference/endpoint_generate.py @@ -11,14 +11,15 @@ import logging import math import os +import tempfile import time from argparse import ArgumentParser, Namespace -from typing import List, cast import pandas as pd import requests -from composer.utils import (ObjectStore, maybe_create_object_store_from_uri, - parse_uri) +from composer.utils import maybe_create_object_store_from_uri, parse_uri + +from llmfoundry.utils import prompt_files as utils logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') log = logging.getLogger(__name__) @@ -40,20 +41,18 @@ def parse_args() -> Namespace: '-p', '--prompts', nargs='+', - help='Generation prompts. Use syntax "file::/path/to/prompt.txt" to load a ' +\ - 'prompt contained in a txt file.' + help='List of generation prompts or list of delimited files. Use syntax ' +\ + '"file::/path/to/prompt.txt" to load a prompt(s) contained in a txt file.' ) - - now = time.strftime('%Y%m%d-%H%M%S') - default_local_folder = f'/tmp/output/{now}' - parser.add_argument('-l', - '--local-folder', - type=str, - default=default_local_folder, - help='Local folder to save the output') + parser.add_argument( + '--prompt-delimiter', + default='\n', + help= + 'Prompt delimiter for txt files. By default, a file is a single prompt') parser.add_argument('-o', '--output-folder', + required=True, help='Remote folder to save the output') ##### @@ -61,7 +60,7 @@ def parse_args() -> Namespace: parser.add_argument( '--rate-limit', type=int, - default=10, + default=75, help='Max number of calls to make to the endpoint in a second') parser.add_argument( '--batch-size', @@ -86,21 +85,6 @@ def parse_args() -> Namespace: return parser.parse_args() -def load_prompts_from_file(prompt_path_str: str) -> List[str]: - # Note: slightly different than hf_generate.py (uses delimiter to split strings) - - if not prompt_path_str.startswith('file::'): - raise ValueError('prompt_path_str must start with "file::".') - _, prompt_file_path = prompt_path_str.split('file::', maxsplit=1) - prompt_file_path = os.path.expanduser(prompt_file_path) - if not os.path.isfile(prompt_file_path): - raise FileNotFoundError( - f'{prompt_file_path=} does not match any existing files.') - with open(prompt_file_path, 'r') as f: - prompt_string = f.read() - return prompt_string.split(PROMPT_DELIMITER) - - async def main(args: Namespace) -> None: # This is mildly experimental, so for now imports are not added as part of llm-foundry try: @@ -115,7 +99,7 @@ async def main(args: Namespace) -> None: if args.batch_size > args.rate_limit: raise ValueError( - f'Batch size is {args.batch_size} but rate limit is set to { args.rate_limit} / s' + f'Batch size is {args.batch_size} but rate limit is set to {args.rate_limit} / s' ) url = args.endpoint if args.endpoint else os.environ.get(ENDPOINT_URL_ENV) @@ -129,12 +113,7 @@ async def main(args: Namespace) -> None: if not api_key: log.warning(f'API key not set in {ENDPOINT_API_KEY_ENV}') - # Load prompts - prompt_strings = [] - for prompt in args.prompts: - if prompt.startswith('file::'): - prompt = load_prompts_from_file(prompt) - prompt_strings.append(prompt) + prompt_strings = utils.load_prompts(args.prompts, args.prompt_delimiter) cols = ['batch', 'prompt', 'output'] param_data = { @@ -144,14 +123,17 @@ async def main(args: Namespace) -> None: 'top_p': args.top_p, } + total_batches = math.ceil(len(prompt_strings) / args.batch_size) + log.info( + f'Generating {len(prompt_strings)} prompts in {total_batches} batches') + @sleep_and_retry - @limits(calls=args.rate_limit // args.batch_size, period=1) # type: ignore + @limits(calls=total_batches, period=1) # type: ignore async def generate(session: aiohttp.ClientSession, batch: int, prompts: list): data = copy.copy(param_data) data['prompt'] = prompts headers = {'Authorization': api_key, 'Content-Type': 'application/json'} - req_start = time.time() async with session.post(url, headers=headers, json=data) as resp: if resp.ok: @@ -159,12 +141,9 @@ async def generate(session: aiohttp.ClientSession, batch: int, response = await resp.json() except requests.JSONDecodeError: raise Exception( - f'Bad response: {resp.status_code} {resp.reason}' # type: ignore - ) + f'Bad response: {resp.status} {resp.reason}') else: - raise Exception( - f'Bad response: {resp.status_code} {resp.content.decode().strip()}' # type: ignore - ) + raise Exception(f'Bad response: {resp.status} {resp.reason}') req_end = time.time() n_compl = response['usage']['completion_tokens'] @@ -183,10 +162,7 @@ async def generate(session: aiohttp.ClientSession, batch: int, res = pd.DataFrame(columns=cols) batch = 0 - total_batches = math.ceil(len(prompt_strings) / args.batch_size) - log.info( - f'Generating {len(prompt_strings)} prompts in {total_batches} batches') - + gen_start = time.time() async with aiohttp.ClientSession() as session: tasks = [] @@ -201,28 +177,29 @@ async def generate(session: aiohttp.ClientSession, batch: int, res = pd.concat(results) res.reset_index(drop=True, inplace=True) - log.info(f'Generated {len(res)} prompts, example data:') + + gen_end = time.time() + gen_latency = (gen_end - gen_start) + log.info(f'Generated {len(res)} prompts in {gen_latency}s, example data:') log.info(res.head()) - # save res to local output folder - os.makedirs(args.local_folder, exist_ok=True) - local_path = os.path.join(args.local_folder, 'output.csv') - res.to_csv(os.path.join(args.local_folder, 'output.csv'), index=False) - log.info(f'Saved results in {local_path}') - - if args.output_folder: - # Upload the local output to the remote location - output_object_store = cast( - ObjectStore, maybe_create_object_store_from_uri(args.output_folder)) - _, _, output_folder_prefix = parse_uri(args.output_folder) - files_to_upload = os.listdir(args.local_folder) - - for file in files_to_upload: - assert not os.path.isdir(file) - local_path = os.path.join(args.local_folder, file) + with tempfile.TemporaryDirectory() as tmp_dir: + file = 'output.csv' + local_path = os.path.join(tmp_dir, file) + res.to_csv(local_path, index=False) + + output_object_store = maybe_create_object_store_from_uri( + args.output_folder) + if output_object_store is not None: + _, _, output_folder_prefix = parse_uri(args.output_folder) remote_path = os.path.join(output_folder_prefix, file) output_object_store.upload_object(remote_path, local_path) - log.info(f'Uploaded {local_path} to {args.output_folder}/{file}') + log.info(f'Uploaded results to {args.output_folder}/{file}') + else: + os.makedirs(args.output_folder, exist_ok=True) + permanent_local = os.path.join(args.output_folder, file) + os.rename(local_path, permanent_local) + log.info(f'Saved results to {permanent_local}') if __name__ == '__main__': diff --git a/scripts/inference/hf_generate.py b/scripts/inference/hf_generate.py index 45ddc6b63e..6ac645e5b7 100644 --- a/scripts/inference/hf_generate.py +++ b/scripts/inference/hf_generate.py @@ -1,7 +1,6 @@ # Copyright 2022 MosaicML LLM Foundry authors # SPDX-License-Identifier: Apache-2.0 import itertools -import os import random import time import warnings @@ -13,6 +12,8 @@ import torch from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer +from llmfoundry.utils import prompt_files as utils + def get_dtype(dtype: str): if dtype == 'fp32': @@ -62,9 +63,14 @@ def parse_args() -> Namespace: 'My name is', 'This is an explanation of deep learning to a five year old. Deep learning is', ], - help='Generation prompts. Use syntax "file::/path/to/prompt.txt" to load a ' +\ - 'prompt contained in a txt file.' + help='List of generation prompts or list of delimited files. Use syntax ' +\ + '"file::/path/to/prompt.txt" to load a prompt(s) contained in a txt file.' ) + parser.add_argument( + '--prompt-delimiter', + default=None, + help= + 'Prompt delimiter for txt files. By default, a file is a single prompt') parser.add_argument('--max_seq_len', type=int, default=None) parser.add_argument('--max_new_tokens', type=int, default=100) parser.add_argument('--max_batch_size', type=int, default=None) @@ -125,19 +131,6 @@ def parse_args() -> Namespace: return parser.parse_args() -def load_prompt_string_from_file(prompt_path_str: str): - if not prompt_path_str.startswith('file::'): - raise ValueError('prompt_path_str must start with "file::".') - _, prompt_file_path = prompt_path_str.split('file::', maxsplit=1) - prompt_file_path = os.path.expanduser(prompt_file_path) - if not os.path.isfile(prompt_file_path): - raise FileNotFoundError( - f'{prompt_file_path=} does not match any existing files.') - with open(prompt_file_path, 'r') as f: - prompt_string = ''.join(f.readlines()) - return prompt_string - - def maybe_synchronize(): if torch.cuda.is_available(): torch.cuda.synchronize() @@ -163,11 +156,7 @@ def main(args: Namespace) -> None: print(f'Using {model_dtype=}') # Load prompts - prompt_strings = [] - for prompt in args.prompts: - if prompt.startswith('file::'): - prompt = load_prompt_string_from_file(prompt) - prompt_strings.append(prompt) + prompt_strings = utils.load_prompts(args.prompts, args.prompt_delimiter) # Grab config first print(f'Loading HF Config...') diff --git a/tests/test_prompt_files.py b/tests/test_prompt_files.py new file mode 100644 index 0000000000..5f2fb582fc --- /dev/null +++ b/tests/test_prompt_files.py @@ -0,0 +1,14 @@ +from pathlib import Path + +from llmfoundry.utils import prompt_files as utils + +def test_load_prompt_strings(tmp_path: Path): + assert utils.load_prompts(['hello', 'goodbye']) == ['hello', 'goodbye'] + + with open(tmp_path / 'prompts.txt', 'w') as f: + f.write('hello goodbye') + + temp = utils.PROMPTFILE_PREFIX + str(tmp_path / 'prompts.txt') + assert utils.load_prompts( + [temp, temp, 'why'], + ' ') == ['hello', 'goodbye', 'hello', 'goodbye', 'why']