.project-metadata.yaml

name: Intelligent QA Chatbot with NiFi, Pinecone, and Llama2
description: |
  Ingest data with Cloudera DataFlow from a user-specified website sitemap to create embeddings in a Pinecone vector DB and deploy a context-aware LLM chatbot app with Cloudera Machine Learning.

author: Cloudera Inc.
specification_version: 1.0
prototype_version: 1.0
date: "2024-03-25"

environment_variables:
  VECTOR_DB:
    default: "CHROMA"
    description: "Enter 'CHROMA' or 'PINECONE' for your preferred Vector DB. Only 'CHROMA' or 'PINECONE' are valid options. Chroma does not require any additional setup. Pinecone will require you to create an account and generate an API key."
    required: true
  COLLECTION_NAME:
    default: "cml-default"
    description: "The default is 'cml-default' and can be changed to identify variations for organizations with multiple indexes."
    required: true
  PINECONE_API_KEY:
    default: ""
    description: "Only Required for Pinecone Vector DB: Enter your API Key for Pinecone here. (Shown in API Keys page)"
  PINECONE_ENVIRONMENT:
    default: "gcp-starter"
    description: "Only Required for Pinecone Vector DB: Enter your Pinecone environment here. (Shown in API Keys page)"


runtimes:
  - editor: PBJ Workbench
    kernel: Python 3.10
    edition: Nvidia GPU

tasks:
  - type: run_session
    name: Validate GPU Availibility
    script: 0_session-verify-deps/check_gpu_resources.py
    short_summary: Check for GPU availibility. 
    long_summary: Check GPUs are enabled on this workspace and are currently schedulable.
    kernel: python3
    cpu: 2
    memory: 4

  - type: run_session
    name: Validate GPU Capability
    script: 0_session-verify-deps/check_gpu_capability.py
    short_summary: Check for GPU capability. 
    long_summary: Check GPUs are capable on this workspace and meet project requirements.
    kernel: python3
    cpu: 2
    memory: 4
    gpu: 1

  - type: run_session
    name: Install Dependencies
    script: 1_session-install-deps/download_requirements.py
    short_summary: Install Package Dependencies
    kernel: python3
    cpu: 2
    memory: 16

  - type: run_session
    name: Setup Chroma Vector DB
    script: 1_session-install-deps/setup-chroma.py
    short_summary: Setup Chroma Vector DB (will only run if Chroma is chosen as Vector DB)
    kernel: python3
    cpu: 1
    memory: 2

  - type: create_job
    name: XML Scrape (Step 1)
    entity_label: xml_scrape
    script: USER_START_HERE/Build_Your_Own_Knowledge_Base_Tools/Python-based_sitemap_scrape/1_kb_xml_scrape.py
    arguments: None
    short_summary: Scrape XML mapped in conf file.
    long_summary: Scrape XML mapped in conf file.
    cpu: 4
    memory: 8
    environment:
      TASK_TYPE: CREATE/RUN_JOB

  - type: create_job
    name: HTML to TEXT (Step 2)
    entity_label: html_to_text
    script: USER_START_HERE/Build_Your_Own_Knowledge_Base_Tools/Python-based_sitemap_scrape/2_kb_html_to_text.py
    arguments: None
    short_summary: Download and convert htmls to text.
    long_summary: Download and convert htmls to text.
    cpu: 4
    memory: 8
    environment:
      TASK_TYPE: CREATE/RUN_JOB

  - type: run_job
    entity_label: html_to_text
    short_summary: Convert HTMLS to text.

  - type: create_job
    name: Populate Vector DB with documents embeddings
    entity_label: vectordb_insert
    script: 2_job-populate-vectordb/vectordb_insert.py
    arguments: None
    short_summary: Create job to populate Vector Database with document embeddings.
    long_summary: Create job to launch Vector Database locally and insert embeddings for documents. Embeddings are generated by the locally running embeddings model.
    cpu: 2
    memory: 8
    environment:
      TASK_TYPE: CREATE/RUN_JOB

  - type: run_job
    entity_label: vectordb_insert
    short_summary: Populate Vector DB with documents embeddings

  - type: create_model
    name: Create Llama2 Model
    entity_label: llama-2-7b
    description: LLAMA-2-7B model hosted in CML. 
    short_summary: LLAMA-2-7B
    default_resources:
      cpu: 4
      memory: 16
      gpu: 1
    default_replication_policy:
      type: fixed
      num_replicas: 1
  
  - type: build_model
    name: Build Llama2 Model
    entity_label: llama-2-7b
    comment: First build by the AMP
    examples:
      - request:
          prompt: What is Cloudera?
          temperature: 0
          max_new_tokens: 50
          repetition_penalty: 0.5

    target_file_path: llm_model.py
    target_function_name: api_wrapper

  - type: run_session
    name: Start Llama2 CML Model (if enabled)
    script: model/create_model.py
    short_summary: Create/Start Llama2 CML Model (if enabled)
    kernel: python3
    cpu: 2
    memory: 4

  - type: run_session
    name: Create/Start CML Llama2 Chatbot Interface
    script: 3_app/create_app.py
    short_summary: Create/Start CML Llama2 Chatbot Interface
    kernel: python3
    cpu: 1
    memory: 2