Skip to content

Commit

Permalink
Merge pull request #25 from microsoft/20241010_update
Browse files Browse the repository at this point in the history
Fix formatting for E4T3
  • Loading branch information
feaselkl authored Oct 14, 2024
2 parents d8b5216 + 7cd9ff0 commit 6b23c39
Showing 1 changed file with 92 additions and 92 deletions.
184 changes: 92 additions & 92 deletions docs/04_implement_audio_transcription/0403.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,20 +78,20 @@ The key tasks are as follows:

The completed version of the `generate_embeddings_for_call_contents()` function is as follows:

```python
def generate_embeddings_for_call_contents(call_contents):
"""Generate embeddings for call contents. Key assumptions:
- Call contents is a single string.
- Azure OpenAI endpoint, key, and deployment name stored in Streamlit secrets."""
```python
def generate_embeddings_for_call_contents(call_contents):
"""Generate embeddings for call contents. Key assumptions:
- Call contents is a single string.
- Azure OpenAI endpoint, key, and deployment name stored in Streamlit secrets."""

# Normalize the text for tokenization
normalized_content = normalize_text(call_contents)
# Normalize the text for tokenization
normalized_content = normalize_text(call_contents)

# Call make_azure_openai_embedding_request() with the normalized content
response = make_azure_openai_embedding_request(normalized_content)
# Call make_azure_openai_embedding_request() with the normalized content
response = make_azure_openai_embedding_request(normalized_content)

return response.data[0].embedding
```
return response.data[0].embedding
```

</details>

Expand All @@ -102,27 +102,27 @@ The key tasks are as follows:

The completed version of the `save_transcript_to_cosmos_db()` function is as follows:

```python
def save_transcript_to_cosmos_db(transcript_item):
"""Save embeddings to Cosmos DB vector store. Key assumptions:
- transcript_item is a JSON object containing call_id (int),
call_transcript (string), and request_vector (list).
- Cosmos DB endpoint, key, and database name stored in Streamlit secrets."""

cosmos_endpoint = st.secrets["cosmos"]["endpoint"]
cosmos_key = st.secrets["cosmos"]["key"]
cosmos_database_name = st.secrets["cosmos"]["database_name"]
cosmos_container_name = "CallTranscripts"

# Create a CosmosClient
client = CosmosClient(url=cosmos_endpoint, credential=cosmos_key)
# Load the Cosmos database and container
database = client.get_database_client(cosmos_database_name)
container = database.get_container_client(cosmos_container_name)

# Insert the call transcript
container.create_item(body=transcript_item)
```
```python
def save_transcript_to_cosmos_db(transcript_item):
"""Save embeddings to Cosmos DB vector store. Key assumptions:
- transcript_item is a JSON object containing call_id (int),
call_transcript (string), and request_vector (list).
- Cosmos DB endpoint, key, and database name stored in Streamlit secrets."""

cosmos_endpoint = st.secrets["cosmos"]["endpoint"]
cosmos_key = st.secrets["cosmos"]["key"]
cosmos_database_name = st.secrets["cosmos"]["database_name"]
cosmos_container_name = "CallTranscripts"

# Create a CosmosClient
client = CosmosClient(url=cosmos_endpoint, credential=cosmos_key)
# Load the Cosmos database and container
database = client.get_database_client(cosmos_database_name)
container = database.get_container_client(cosmos_container_name)

# Insert the call transcript
container.create_item(body=transcript_item)
```

</details>

Expand All @@ -133,26 +133,26 @@ The key tasks are as follows:

The completed version of the `make_azure_openai_embedding_request()` function is as follows:

```python
def make_azure_openai_embedding_request(text):
"""Create and return a new embedding request. Key assumptions:
- Azure OpenAI endpoint, key, and deployment name stored in Streamlit secrets."""

aoai_endpoint = st.secrets["aoai"]["endpoint"]
aoai_key = st.secrets["aoai"]["key"]
aoai_embedding_deployment_name = st.secrets["aoai"]["embedding_deployment_name"]

client = openai.AzureOpenAI(
api_key=aoai_key,
api_version="2024-06-01",
azure_endpoint = aoai_endpoint
)
# Create and return a new embedding request
return client.embeddings.create(
model=aoai_embedding_deployment_name,
input=text
)
```
```python
def make_azure_openai_embedding_request(text):
"""Create and return a new embedding request. Key assumptions:
- Azure OpenAI endpoint, key, and deployment name stored in Streamlit secrets."""

aoai_endpoint = st.secrets["aoai"]["endpoint"]
aoai_key = st.secrets["aoai"]["key"]
aoai_embedding_deployment_name = st.secrets["aoai"]["embedding_deployment_name"]

client = openai.AzureOpenAI(
api_key=aoai_key,
api_version="2024-06-01",
azure_endpoint = aoai_endpoint
)
# Create and return a new embedding request
return client.embeddings.create(
model=aoai_embedding_deployment_name,
input=text
)
```

</details>

Expand All @@ -163,46 +163,46 @@ The key tasks are as follows:

The completed version of the `make_cosmos_db_vector_search_request()` function is as follows:

```python
def make_cosmos_db_vector_search_request(query_embedding, max_results=5, minimum_similarity_score=0.5):
"""Create and return a new vector search request. Key assumptions:
- Query embedding is a list of floats based on a search string.
- Cosmos DB endpoint, key, and database name stored in Streamlit secrets."""

cosmos_endpoint = st.secrets["cosmos"]["endpoint"]
cosmos_key = st.secrets["cosmos"]["key"]
cosmos_database_name = st.secrets["cosmos"]["database_name"]
cosmos_container_name = "CallTranscripts"

# Create a CosmosClient
client = CosmosClient(url=cosmos_endpoint, credential=cosmos_key)
# Load the Cosmos database and container
database = client.get_database_client(cosmos_database_name)
container = database.get_container_client(cosmos_container_name)

results = container.query_items(
query=f"""
SELECT TOP {max_results}
c.id,
c.call_id,
c.call_transcript,
c.abstractive_summary,
VectorDistance(c.request_vector, @request_vector) AS SimilarityScore
FROM c
WHERE
VectorDistance(c.request_vector, @request_vector) > {minimum_similarity_score}
ORDER BY
VectorDistance(c.request_vector, @request_vector)
""",
parameters=[
{"name": "@request_vector", "value": query_embedding}
],
enable_cross_partition_query=True
)

# Create and return a new vector search request
return results
```
```python
def make_cosmos_db_vector_search_request(query_embedding, max_results=5,minimum_similarity_score=0.5):
"""Create and return a new vector search request. Key assumptions:
- Query embedding is a list of floats based on a search string.
- Cosmos DB endpoint, key, and database name stored in Streamlit secrets."""

cosmos_endpoint = st.secrets["cosmos"]["endpoint"]
cosmos_key = st.secrets["cosmos"]["key"]
cosmos_database_name = st.secrets["cosmos"]["database_name"]
cosmos_container_name = "CallTranscripts"

# Create a CosmosClient
client = CosmosClient(url=cosmos_endpoint, credential=cosmos_key)
# Load the Cosmos database and container
database = client.get_database_client(cosmos_database_name)
container = database.get_container_client(cosmos_container_name)

results = container.query_items(
query=f"""
SELECT TOP {max_results}
c.id,
c.call_id,
c.call_transcript,
c.abstractive_summary,
VectorDistance(c.request_vector, @request_vector) AS SimilarityScore
FROM c
WHERE
VectorDistance(c.request_vector, @request_vector) > {minimum_similarity_score}
ORDER BY
VectorDistance(c.request_vector, @request_vector)
""",
parameters=[
{"name": "@request_vector", "value": query_embedding}
],
enable_cross_partition_query=True
)

# Create and return a new vector search request
return results
```

</details>

Expand Down

0 comments on commit 6b23c39

Please sign in to comment.