models.yaml

---
# This is the model configuration file used by the `home-assistant-datasets`
# tool. Each entry is a ConfigEntry that configures a specific model type
# with whatever integration is needed (either core integration or custom component).
#
# Some models require secrets such as API keys or urls that are specific to you
# and not checked into the repository and referenced in this file like `!secret openai_api_key`.
# Create a file called `secrets.yaml` and include the necessary keys.
models:
  - model_id: assistant
    domain: homeassistant
    description: The Home Assisatnt NLP assistant pipeline
    urls:
      - https://github.com/home-assistant/hassil

  #
  # Large Cloud Models
  #

  - model_id: gpt-3.5
    domain: openai_conversation
    description: Open AI Conversation integration using gpt-3.5 (175B)
    urls:
      - https://platform.openai.com/docs/models/gpt-3-5-turbo
    config_entry_data:
      api_key: !secret openai_api_key
    config_entry_options:
      chat_model: gpt-3.5-turbo-1106
      llm_hass_api: assist

  - model_id: gpt-4o
    domain: openai_conversation
    description: Open AI Conversation integration using gpt-4o
    urls:
      - https://platform.openai.com/docs/models/gpt-4o
    config_entry_data:
      api_key: !secret openai_api_key
    config_entry_options:
      chat_model: gpt-4o
      llm_hass_api: assist

  - model_id: gpt-4o-mini
    domain: openai_conversation
    description: Open AI Conversation integration using gpt-4o-mini
    urls:
      - https://platform.openai.com/docs/models/gpt-4o-mini
    config_entry_data:
      api_key: !secret openai_api_key
    config_entry_options:
      chat_model: gpt-4o-mini
      llm_hass_api: assist

  - model_id: gemini-pro
    domain: google_generative_ai_conversation
    description: Google Generative AI integration using gemini pro (v1)
    urls:
      - https://deepmind.google/technologies/gemini/pro/
    config_entry_data:
      api_key: !secret google_api_key
    config_entry_options:
      chat_model: models/gemini-pro
      llm_hass_api: assist

  - model_id: gemini-1.5-flash
    domain: google_generative_ai_conversation
    description: Google Generative AI integration using gemini flash (v1.5)
    urls:
      - https://blog.google/products/gemini/google-gemini-new-features-july-2024/
    config_entry_data:
      api_key: !secret google_api_key
    config_entry_options:
      chat_model: models/gemini-1.5-flash-latest
      llm_hass_api: assist


  - model_id: claude-3-5-sonnet
    domain: anthropic
    description: Anthropic integration using Claude 3.5 Sonnet
    urls:
      - https://www.anthropic.com/news/claude-3-5-sonnet
    config_entry_data:
      api_key: !secret anthropic_api_key
    config_entry_options:
      chat_model: claude-3-5-sonnet-20240620
      llm_hass_api: assist

  - model_id: claude-3-haiku
    domain: anthropic
    description: Anthropic integration using Claude 3 Haiku
    urls:
      - https://www.anthropic.com/news/claude-3-haiku
    config_entry_data:
      api_key: !secret anthropic_api_key
    config_entry_options:
      chat_model: claude-3-haiku-20240307
      llm_hass_api: assist

  #
  # Open Source smaller function calling models
  #

  # Using an openai compatible custom component
  # https://github.com/allenporter/hass-openai-custom-conversation
  - model_id: functionary-small-v2.5
    description: A custom open AI integration using functionary small v2.5 (8B) with a modified pre-release llama cpp python server.
    domain: vicuna_conversation
    urls:
      - https://huggingface.co/meetkai/functionary-small-v2.5
      - https://github.com/abetlen/llama-cpp-python
      - https://github.com/allenporter/functionary-server
    config_entry_data:
      api_key: sk-0000000000000000000
      # Update with your openai compatible functionary url
      base_url: http://functionary.functionary:8000/v1
    config_entry_options:
      chat_model: functionary-small-v2.5
      llm_hass_api: assist

  - model_id: mistral-v3
    domain: ollama
    description: Mistral V3 (7B) using Ollama
    urls:
      - https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3
      - https://ollama.com/library/mistral
      - https://mistral.ai/news/announcing-mistral-7b/
    config_entry_data:
      url: !secret ollama_url
      model: mistral:latest
    config_entry_options:
      llm_hass_api: assist

  - model_id: mistral-nemo
    domain: ollama
    description: A state-of-the-art 12B model with 128k context length, built by Mistral AI in collaboration with NVIDIA.
    urls:
      - https://mistral.ai/news/mistral-nemo/
      - https://ollama.com/library/mistral-nemo
    config_entry_data:
      url: !secret ollama_url
      model: mistral-nemo
    config_entry_options:
      llm_hass_api: assist
      num_ctx: 8192  # Note: Model has 128k context length

  - model_id: llama3-groq-tool-use
    domain: ollama
    description: Groq tool use model fine tuned from llama3 (8B) using Ollama
    urls:
      - https://ollama.com/library/llama3-groq-tool-use
      - https://console.groq.com/docs/tool-use
    config_entry_data:
      url: !secret ollama_url
      model: llama3-groq-tool-use:latest
    config_entry_options:
      llm_hass_api: assist

  - model_id: llama3.1
    domain: ollama
    description: Llama 3.1 (8B) from Meta using Ollama with 8k contex window.
    urls:
      - https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct
      - https://ollama.com/library/llama3.1
    config_entry_data:
      url: !secret ollama_url
      model: llama3.1:latest
    config_entry_options:
      llm_hass_api: assist
      num_ctx: 8192

  - model_id: xlam-7b
    domain: ollama
    description: XLam (7B) model from Salesforce using Ollama with 4k context window.
    urls:
      - https://huggingface.co/Salesforce/xLAM-7b-fc-r
      - https://github.com/SalesforceAIResearch/xLAM
      - https://ollama.com/allenporter/xlam:7b
    config_entry_data:
      url: !secret ollama_url
      model: allenporter/xlam:7b
    config_entry_options:
      llm_hass_api: assist
      num_ctx: 4096

  - model_id: assist-llm
    domain: ollama
    description: Assist LLM fine tuned on Home Assistant Assist intents based on Llama 3.1 (8B)
    urls:
      - https://ollama.com/allenporter/assist-llm
      - https://huggingface.co/allenporter/assist-llm
      - https://huggingface.co/allenporter/assist-llm-GGUF
    config_entry_data:
      url: !secret ollama_url
      model: allenporter/assist-llm
    config_entry_options:
      llm_hass_api: assist
      num_ctx: 8192

  #
  # Open Source smaller models
  #

  # Configuration for home-llm custom component https://github.com/acon96/home-llm
  - model_id: home-llm
    domain: llama_conversation
    description: The home-llm v3 model based on Phi (3B) and custom component using service calls to control Home Assistant.
    urls:
      - https://github.com/acon96/home-llm/
      - https://huggingface.co/acon96/Home-3B-v3-GGUF
      - https://ollama.com/fixt/home-3b-v3
    version: 2
    # Home LLM has a very complex configuration that depends on the specific backend
    # and model used. You're on your own to figure out the valid values to enter here.
    config_entry_data:
      model_backend: ollama
      host: ollama.ollama
      port: "11434"
      ssl: false
      huggingface_model: fixt/home-3b-v3:latest
    config_entry_options:
      llm_hass_api: home-llm-service-api
      prompt: "You are 'Al', a helpful AI Assistant that controls the devices in a house. Complete the following task as instructed with the information provided only.\nThe current time and date is {{ (as_timestamp(now()) | timestamp_custom(\"%I:%M %p on %A %B %d, %Y\", \"\")) }}\nServices: {{ formatted_tools }}\nDevices:\n{{ formatted_devices }}"
      prompt_template: "zephyr"
      tool_format: "min_tool_format"
      tool_multi_turn_chat: false
      in_context_examples: false
      in_context_examples_file: "in_context_examples.csv"
      num_in_context_examples: 4.0
      max_new_tokens: 128
      context_length: 2048.0
      top_k: 40.0
      temperature: 0.1
      top_p: 1.0
      typical_p: 1.0
      ollama_json_mode: false
      request_timeout: 90.0
      ollama_keep_alive: 30.0
      remote_use_chat_endpoint: false
      extra_attributes_to_expose:
        - rgb_color
        - brightness
        - temperature
        - humidity
        - fan_mode
        - media_title
        - volume_level
        - item
        - wind_speed
      service_call_regex: "```homeassistant\\n([\\S \\t\\n]*?)```"
      refresh_prompt_per_turn: true
      remember_conversation: true
      remember_num_interactions: 5

  - model_id: xlam-1b
    domain: ollama
    description: XLam (1B) model from Salesforce using Ollama
    urls:
      - https://huggingface.co/Salesforce/xLAM-1b-fc-r
      - https://github.com/SalesforceAIResearch/xLAM
      - https://ollama.com/allenporter/xlam:1b
    config_entry_data:
      url: !secret ollama_url
      model: allenporter/xlam:1b
    config_entry_options:
      llm_hass_api: assist

  #
  # Non function calling models
  #

  - model_id: llama3
    domain: ollama
    description: Llama 3 (8B) from Meta using Ollama
    urls:
      - https://ollama.com/library/llama3
    config_entry_data:
      url: !secret ollama_url
      model: llama3:latest

  - model_id: gemma
    domain: ollama
    description: Gemma (7B) from Google using Ollama
    urls:
      - https://ollama.com/library/gemma
      - https://ai.google.dev/gemma/docs/model_card
    config_entry_data:
      url: !secret ollama_url
      model: gemma:7b