.project-metadata.yaml

name: LLM Hands on Lab with CML
description: "Hands on Lab which demonstrates a number of concepts including web scraping, vector databases, model deployment, model usage, Langchain, application building, and instruction following/tuning."
author: "Cloudera"
date: "2023-12-20"
specification_version: 1.0
prototype_version: 1.0
environment_variables:
  #AWS_ACCESS_KEY_ID:
  #  default: ""
  #  description: "Access key ID for interacting with AWS Bedrock"
  #AWS_SECRET_ACCESS_KEY:
  #  default: ""
  #  description: "Secret ket for interacting with AWS Bedrock"
  #AWS_DEFAULT_REGION:
  #  default: "us-west-2"
  #  description: "AWS Region where Bedrock models are available"
  #PINECONE_API_KEY:
  #  default: ""
  #  description: "API Keys for Pinecone service"
  #PINECONE_ENVIRONMENT:
  #  default: ""
  #  description: "Environment within Pinecone service"
  PINECONE_INDEX:
    default: ""
    description: "Index within Pinecone Environment"

runtimes: 
  - editor: JupyterLab
    kernel: Python 3.10
    edition: Nvidia GPU
  
tasks:
  - type: run_session
    name: Install Dependencies
    script: 0_install_prerequisites/download_reqs_set_vars.py
    short_summary: Install Dependencies
    kernel: python3
    cpu: 2
    memory: 12

  - type: run_session
    name: Setup Chroma Server
    script: 0_install_prerequisites/setup-chroma.py
    short_summary: Setup Chroma Server Vector DB
    long_summary: Resolve sqlite dependency with python version and control logging in Chroma server package
    kernel: python3
    cpu: 2
    memory: 4

  - type: create_job
    name: Pull and Convert HTMLS to TXT
    entity_label: htmls_to_txt
    script: 2_populate_vector_db/html_to_text.py
    arguments: None
    short_summary: Create job to source htmls, download, and convert them to text. 
    long_summary: Create job to source htmls, download, and convert them to text. Update html_links.txt with the htmls you wish to download and convert.
    cpu: 2
    memory: 4
    environment:
      TASK_TYPE: CREATE/RUN_JOB