diff --git a/.env_example b/.env_example index 4213619..583f3d7 100644 --- a/.env_example +++ b/.env_example @@ -4,8 +4,14 @@ LANGCHAIN_ENDPOINT=https://api.smith.langchain.com LANGCHAIN_PROJECT=Experiments LANGCHAIN_API_KEY= -# OpenAI Platform API key. See https://platform.openai.com/docs/quickstart/account-setup +# Either "openai" or "openai_azure" +ALTTEXTER_MODEL=openai +# openai: See https://platform.openai.com/docs/quickstart/account-setup OPENAI_API_KEY= +# openai_azure:See https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api#retrieve-key-and-endpoint +AZURE_OPENAI_ENDPOINT= +AZURE_OPENAI_API_KEY= +AZURE_OPENAI_DEPLOYMENT= # Host and port to bind service to ALTTEXTER_HOST=0.0.0.0 diff --git a/README.md b/README.md index 3f8eb75..4950287 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ via [gov.uk:](https://design102.blog.gov.uk/2022/01/14/whats-the-alternative-how 1. Clone the repo. 1. Copy `.env-example` to `.env` and fill in the required env variables. 1. Optionally edit `config.json` to customize CORS and logging. -1. Run `docker-compose up` to build and start the service. +1. Run `docker-compose up` (v1) or `docker compose up` (v2) to build and start the service. 1. Run `python client-example.py example/apis.ipynb` to test. Expected output: ```bash @@ -50,7 +50,7 @@ via [gov.uk:](https://design102.blog.gov.uk/2022/01/14/whats-the-alternative-how ## Features * Uses LangChain's [Pydantic parser](https://python.langchain.com/docs/modules/model_io/output_parsers/types/pydantic) as foundation for system prompt to reliably generate a JSON of expected format ([function calling](https://community.openai.com/t/does-the-model-gpt-4-vision-preview-have-function-calling/490197/2) will be even cooler). -* Optionally integrates with LangSmith (in beta) to serve [trace URL](https://docs.smith.langchain.com/tracing/tracing-faq) for each generation. +* Optionally integrates with LangSmith to serve [trace URL](https://docs.smith.langchain.com/tracing/tracing-faq) for each generation. ## TODO @@ -58,6 +58,5 @@ via [gov.uk:](https://design102.blog.gov.uk/2022/01/14/whats-the-alternative-how - [ ] Unit tests - [ ] Special handling for large files and images - [ ] Rate limiting at the service level -- [ ] Option to use [Azure OpenAI Services](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/gpt-4-turbo-with-vision-is-now-available-on-azure-openai-service/ba-p/4008456) - [ ] Explore extending to multimodal models beyond OpenAI -- [ ] Extend this TODO list +- [X] Option to use [Azure OpenAI Services](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/gpt-4-turbo-with-vision-is-now-available-on-azure-openai-service/ba-p/4008456) diff --git a/alttexter.py b/alttexter.py index 8665bc8..7cdde2c 100644 --- a/alttexter.py +++ b/alttexter.py @@ -6,17 +6,29 @@ from langchain import callbacks from langchain.callbacks.tracers.langchain import wait_for_all_tracers -from langchain.chat_models import ChatOpenAI from langchain.output_parsers import PydanticOutputParser -from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate -from langchain_core.messages import HumanMessage +from langchain.prompts import ChatPromptTemplate +from langchain_core.messages import HumanMessage, SystemMessage +from langchain_openai import AzureChatOpenAI, ChatOpenAI from langsmith import Client from schema import AlttexterResponse, ImageAltText -def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[List[ImageAltText], Optional[str]]: +def determine_llm() -> ChatOpenAI: + """Determine which LLM to use based on environment variables.""" + model_env = os.getenv("ALTTEXTER_MODEL") + if model_env == 'openai': + return ChatOpenAI(verbose=True, temperature=0, model="gpt-4-vision-preview", max_tokens=4096) + elif model_env == 'openai_azure': + return AzureChatOpenAI(verbose=True, temperature=0, openai_api_version="2024-02-15-preview", + azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT"), + model="vision-preview", max_tokens=4096) + else: + raise ValueError(f"Unsupported model specified: {model_env}") + +def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[List[ImageAltText], Optional[str]]: """ Processes input text and images to generate alt text and title attributes. @@ -28,20 +40,12 @@ def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[Lis Returns: Tuple[AlttexterResponse, str]: Generated alt texts and optional tracing URL. """ - llm = ChatOpenAI( - verbose=True, - temperature=0, - model="gpt-4-vision-preview", - max_tokens=4096 - ) + llm = determine_llm() content = [ { "type": "text", - "text": f"""ARTICLE: - -{input_text} - """ + "text": f"""ARTICLE: {input_text}""" } ] @@ -73,20 +77,12 @@ def alttexter(input_text: str, images: dict, image_urls: List[str]) -> Tuple[Lis content.append(image_entry) parser = PydanticOutputParser(pydantic_object=AlttexterResponse) - - system_prompt = SystemMessagePromptTemplate.from_template( - template="""You are a world-class expert at generating concise alternative text and title attributes for images defined in technical articles written in markdown format. - -For each image in the article use a contextual understanding of the article text and the image itself to generate a concise alternative text and title attribute. - -{format_instructions}""", - partial_variables={"format_instructions": parser.get_format_instructions()}, - ) - all_image_identifiers = list(images.keys()) + image_urls + messages = ChatPromptTemplate.from_messages( [ - system_prompt, + SystemMessage( + content='''You are a world-class expert at generating concise alternative text and title attributes for images defined in technical articles written in markdown format.\nFor each image in the article use a contextual understanding of the article text and the image itself to generate a concise alternative text and title attribute.\n{format_instructions}'''.format(format_instructions=parser.get_format_instructions())), HumanMessage(content=content), HumanMessage( content=f"Tip: List of file names of images including their paths or URLs: {str(all_image_identifiers)}" diff --git a/docker-compose.yml b/docker-compose.yml index 33d0b7c..171d9f4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -11,10 +11,14 @@ services: volumes: - ${ALTTEXTER_CERTS_DIR}:/certs environment: - - LANGCHAIN_PROJECT - LANGCHAIN_TRACING_V2 - LANGCHAIN_ENDPOINT + - LANGCHAIN_PROJECT - LANGCHAIN_API_KEY - - ALTTEXTER_TOKEN + - ALTTEXTER_MODEL - OPENAI_API_KEY + - AZURE_OPENAI_ENDPOINT + - AZURE_OPENAI_API_KEY + - AZURE_OPENAI_DEPLOYMENT + - ALTTEXTER_TOKEN command: python main.py --port ${ALTTEXTER_PORT} --host ${ALTTEXTER_HOST} --certfile /certs/${ALTTEXTER_CERTFILE_NAME} --keyfile /certs/${ALTTEXTER_KEYFILE_NAME} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 261cbff..623cfb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,7 @@ -langchain==0.0.354 -langsmith==0.0.80 -openai==1.6.1 -fastapi==0.105.0 +langchain==0.1.7 +langchain-openai==0.0.6 +fastapi==0.109.2 pydantic==1.10.12 -uvicorn==0.25.0 +uvicorn==0.27.1 tiktoken==0.5.2 nbformat==5.9.2 \ No newline at end of file diff --git a/schema.py b/schema.py index 436b268..0b6d920 100644 --- a/schema.py +++ b/schema.py @@ -15,7 +15,7 @@ class AlttexterRequest(BaseModel): class ImageAltText(BaseModel): name: str = Field(..., description="File name of the image including path or URL.") title: str = Field(..., description="Title of the image.") - alt_text: str = Field(..., description="Concise alternative text for the image.") + alt_text: str = Field(..., description="Concise alternative text for the image. The text should follow the Microsoft Style Guide.") class AlttexterResponse(BaseModel):