Skip to content

Commit

Permalink
Adding Guided Project modules to GitHub lab repo
Browse files Browse the repository at this point in the history
  • Loading branch information
MScalopez committed Dec 13, 2024
1 parent 6614570 commit cea2e3f
Show file tree
Hide file tree
Showing 64 changed files with 6,367 additions and 0 deletions.
390 changes: 390 additions & 0 deletions 05-deploy-with-aks/create-azure-resources.ps1

Large diffs are not rendered by default.

60 changes: 60 additions & 0 deletions 05-deploy-with-aks/create-mongodb-vcore-cluster.bicep
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

@description('Azure Cosmos DB MongoDB vCore cluster name')
@maxLength(40)
param clusterName string = '' //= 'msdocs-${uniqueString(resourceGroup().id)}'

@description('Location for the cluster.')
param location string = '' //= resourceGroup().location

@description('Username for admin user')
param adminUsername string = ''

@description('Public IP address to allow access to the cluster')
param publicIp string = '0.0.0.0'

@description('Public IP address rule name for local access to the cluster')
param publicIpRuleName string = 'labMachineIPAccessRule'

@secure()
@description('Password for admin user')
//@minLength(8)
@maxLength(128)
param adminPassword string = ''

resource cluster 'Microsoft.DocumentDB/mongoClusters@2023-03-01-preview' = {
name: clusterName
location: location
properties: {
administratorLogin: adminUsername
administratorLoginPassword: adminPassword
nodeGroupSpecs: [
{
kind: 'Shard'
nodeCount: 1
sku: 'M30'
diskSizeGB: 128
enableHa: false
}
]
}
}



resource firewallRules 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = {
parent: cluster
name: 'AllowAllAzureServices'
properties: {
startIpAddress: '0.0.0.0'
endIpAddress: '0.0.0.0'
}
}

resource firewallRules_local_access 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = {
parent: cluster
name: publicIpRuleName
properties: {
startIpAddress: publicIp
endIpAddress: publicIp
}
}
40 changes: 40 additions & 0 deletions 05-deploy-with-aks/node.js/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
randomIdentifier=
location=
changeSubscription=
subscriptionName=
skipCreatingResourceGroup=
resourceGroup=

skipCreatingCosmosDBCluster=
skipCreatingCosmosDBPublicIPFirewallRule=
cosmosCluster=
cosmosClusterLocation=
cosmosDbEndpoint=
cosmosClusterAdmin=
cosmosClusterPassword=
cosmosdbDatabase="cosmicworks"

skipCreatingAzureOpenAIAccount=
cognitiveServicesKind="OpenAI"
OpenAIAccount=
OpenAIAccountLocation=
OpenAIAccountSKU="s0"
OpenAIEndpoint=
OpenAIKey1=
OpenAIVersion="2023-05-15"

skipCreatingAzureOpenAIDeployment=
OpenAIDeploymentName=
OpenAIDeploymentModel="text-embedding-ada-002"
OpenAIDeploymentModelFormat="OpenAI"
OpenAIDeploymentModelVersion="2"
OpenAIDeploymentSKU="Standard"
OpenAIDeploymentSKUCapacity=100

skipCreatingAzureOpenAICompletionDeployment=
OpenAICompletionDeploymentName=
OpenAICompletionDeploymentModel="gpt-35-turbo"
OpenAICompletionDeploymentModelFormat="OpenAI"
OpenAICompletionDeploymentModelVersion="0301"
OpenAICompletionDeploymentSKU="Standard"
OpenAICompletionDeploymentSKUCapacity=100
1 change: 1 addition & 0 deletions 05-deploy-with-aks/node.js/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
node_modules/
124 changes: 124 additions & 0 deletions 05-deploy-with-aks/node.js/Blobs/loadAndVectorize.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Importing required modules
const fs = require('fs');
const path = require('path');
const mongodb = require('mongodb');
const { MongoClient, updateOne } = require('mongodb');

// Importing custom modules
const Customers = require('../Collections/customers');
const Products = require('../Collections/products');
const SalesOrders = require('../Collections/salesOrders');
const Indexes = require('../SearchComponents/indexes');

// Function to load and vectorize local blob data to MongoDB cluster
async function loadAndVectorizeLocalBlobDataToMongoDBCluster(client, dataFolder, cosmosDbMongoDbDatabase, batchSize, embeddingsDeployment, AzureOpenAIClient, processCustomersVector, processProductsVector, processSalesOrdersVector) {
// Read JSON documents from the data folder
const localBlobsFiles = fs.readdirSync(dataFolder);

// Loop through each file in the data folder
for (const blobFile of localBlobsFiles) {
let batchNumber = 1;

// Process only JSON files
if (blobFile.includes('.json')) {
console.log(`\n(${new Date().toISOString()}) ${blobFile}`);

// Read the content of the file and parse it as JSON
const fileContent = fs.readFileSync(path.join(dataFolder, blobFile), 'utf-8');
const jsonData = JSON.parse(fileContent);

const totalNumberOfDocuments = jsonData.length;

// Process only if there are documents in the JSON file
if (totalNumberOfDocuments >= 0) {
// Get the collection name from the file name
const collectionName = blobFile.split(".json")[0];

// Get the database and the collection
const db = client.db(cosmosDbMongoDbDatabase);
const collection = db.collection(collectionName);
let currentDocIdx = 0;

let operations = [];

let indexList = [];

// Loop through each document in the JSON file
for (let doc of jsonData) {
currentDocIdx++;

// Generate embeddings for the document based on the collection name
if (collectionName === "customers" && processCustomersVector) {
doc = await Customers.generateCustomerEmbedding(doc, embeddingsDeployment, AzureOpenAIClient);
} else if (collectionName === "products" && processProductsVector) {
doc = await Products.generateProductEmbedding(doc, embeddingsDeployment, AzureOpenAIClient);
} else if (collectionName === "salesOrders" && processSalesOrdersVector) {
doc = await SalesOrders.generateSalesOrderEmbedding(doc, embeddingsDeployment, AzureOpenAIClient);
}

// Log the progress for every 100 documents processed
if (currentDocIdx % 100 === 0 && ((processCustomersVector && collectionName === "customers") || (processProductsVector && collectionName === "products") || (processSalesOrdersVector && collectionName === "salesOrders"))) {
console.log(`\t${currentDocIdx} out of ${totalNumberOfDocuments} docs vectorized.`);
}

// Prepare the update operation for the document
operations.push({
updateOne: {
filter: { "_id": doc["_id"] },
update: { "$set": doc },
upsert: true
}
});

// Write the operations to the database in batches
if (operations.length === batchSize) {
console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`);
await collection.bulkWrite(operations, { ordered: false });
operations = [];
batchNumber++;
}
}

// Log the completion of vectorization
if ((processCustomersVector && collectionName === "customers") || (processProductsVector && collectionName === "products") || (processSalesOrdersVector && collectionName === "salesOrders")) {
console.log(`\t${totalNumberOfDocuments} out of ${totalNumberOfDocuments} docs vectorized.`);
}

// Write any remaining operations to the database
if (operations.length > 0) {
console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`);
await collection.bulkWrite(operations, { ordered: false });
}

console.log(`(${new Date().toISOString()}) Collection ${collectionName}, total number of documents processed ${currentDocIdx} .\n`);

// Create the vector indexes for the collection
if (processCustomersVector && collectionName === "customers") {
indexList = [
["customerTypeVectorSearchIndex", "customerTypeVector"],
["customerTitleVectorSearchIndex", "customerTitleVector"],
["customerNameVectorSearchIndex", "customerNameVector"],
["customerEmailAddressVectorSearchIndex", "customerEmailAddressVector"],
["customerPhoneNumberVectorSearchIndex", "customerPhoneNumberVector"],
["customerAddressesVectorSearchIndex", "customerAddressesVector"]
];
await Indexes.createVectorIndexes(collection, indexList, db, collectionName);
} else if (processProductsVector && collectionName === "products") {
indexList = [
["productVectorSearchIndex", "productVector"]
];
await Indexes.createVectorIndexes(collection, indexList, db, collectionName);
} else if (processSalesOrdersVector && collectionName === "salesOrders") {
indexList = [
["salesOrderDetailVectorSearchIndex", "salesOrderDetailVector"]
];
await Indexes.createVectorIndexes(collection, indexList, db, collectionName);
}

}
}
}
}

// Export the function
module.exports.loadAndVectorizeLocalBlobDataToMongoDBCluster = loadAndVectorizeLocalBlobDataToMongoDBCluster;
46 changes: 46 additions & 0 deletions 05-deploy-with-aks/node.js/Blobs/webDownload.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Import the BlobServiceClient from the Azure Storage Blob package
const { BlobServiceClient } = require('@azure/storage-blob');

// Import the file system module
const fs = require('fs');

// Import the path module
const path = require('path');

// Define an asynchronous function to download files from Azure Blob Storage if they don't exist locally
async function downloadFilesFromBlobIfTheyDontExist(accountUrl, containerName, dataFolder) {
// Create a new BlobServiceClient
const blobServiceClient = new BlobServiceClient(accountUrl);

// Get a ContainerClient for the specified container
const containerClient = blobServiceClient.getContainerClient(containerName);

// List all blobs in the container
let blobs = containerClient.listBlobsFlat();

// Iterate over each blob
for await (const blob of blobs) {
// Construct the local file path
const filePath = path.join(dataFolder, blob.name);

// Check if the file already exists locally
if (!fs.existsSync(filePath)) {
// If the file doesn't exist locally, download it from Azure Blob Storage

// Get a BlobClient for the blob
const blobClient = containerClient.getBlobClient(blob.name);

// Download the blob
const downloadBlockBlobResponse = await blobClient.download(0);

// Create a write stream for the local file
const fileStream = fs.createWriteStream(filePath);

// Pipe the downloaded blob to the file stream
downloadBlockBlobResponse.readableStreamBody.pipe(fileStream);
}
}
}

// Export the function
module.exports.downloadFilesFromBlobIfTheyDontExist = downloadFilesFromBlobIfTheyDontExist;
65 changes: 65 additions & 0 deletions 05-deploy-with-aks/node.js/Collections/customers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Import the Embeddings module from the SearchComponents directory
const Embeddings = require('../SearchComponents/embeddings');

// Function to convert a list of address objects into a single string
function getCustomerAddresses(addresses) {
let addressesString = "";

// Iterate over each address in the list
for (let idx = 0; idx < addresses.length; idx++) {
const address = addresses[idx];
// Concatenate each address field into the addressesString
addressesString += (idx > 0 ? "; " : "") +
(address.addressLine1 ? "Address Line - " + address.addressLine1 : "") +
(address.addressLine2 ? " " + address.addressLine2 : "") +
(address.city ? ", city - " + address.city : "") +
(address.state ? ", state - " + address.state : "") +
(address.country ? ", country - " + address.country : "") +
(address.zipCode ? ", zipcode - " + address.zipCode : "") +
(address.location ? ", location - " + address.location : "");
}

// Return the concatenated string of addresses
return addressesString;
}

// Asynchronous function to generate embeddings for various customer fields
async function generateCustomerEmbedding(customer, embeddingsDeployment, AzureOpenAIClient) {
// If the customer has a type, generate an embedding for it
if (customer.type) {
customer.customerTypeVector = await Embeddings.generateEmbeddings(customer.type, embeddingsDeployment, AzureOpenAIClient);
}

// If the customer has a title, generate an embedding for it
if (customer.title) {
customer.customerTitleVector = await Embeddings.generateEmbeddings(customer.title, embeddingsDeployment, AzureOpenAIClient);
}

// If the customer has a first and last name, generate an embedding for it
if (customer.firstName && customer.lastName) {
customer.customerNameVector = await Embeddings.generateEmbeddings(customer.firstName + " " + customer.lastName, embeddingsDeployment, AzureOpenAIClient);
}

// If the customer has an email address, generate an embedding for it
if (customer.emailAddress) {
customer.customerEmailAddressVector = await Embeddings.generateEmbeddings(customer.emailAddress, embeddingsDeployment, AzureOpenAIClient);
}

// If the customer has a phone number, generate an embedding for it
if (customer.phoneNumber) {
customer.customerPhoneNumberVector = await Embeddings.generateEmbeddings(customer.phoneNumber, embeddingsDeployment, AzureOpenAIClient);
}

// Get the string representation of the customer's addresses
const address = getCustomerAddresses(customer.addresses);
// If the customer has addresses, generate an embedding for them
if (address.length > 0) {
customer.customerAddressesVector = await Embeddings.generateEmbeddings(address, embeddingsDeployment, AzureOpenAIClient);
}

// Return the customer object with the added embeddings
return customer;
}

// Export the generateCustomerEmbedding function
module.exports.generateCustomerEmbedding = generateCustomerEmbedding;
23 changes: 23 additions & 0 deletions 05-deploy-with-aks/node.js/Collections/products.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Import the Embeddings module from the SearchComponents directory
const Embeddings = require('../SearchComponents/embeddings');

// Asynchronous function to generate an embedding for a product
async function generateProductEmbedding(product, embeddingsDeployment, AzureOpenAIClient) {

// Construct a string representing the product's name and category
const productName = "Category - " + product["categoryName"] + ", Name -" + product["name"];

// If the productName exists, generate an embedding for it
if (productName) {
// The embedding is generated using the Embeddings module's generateEmbeddings function
// The resulting embedding is stored in the product object under the key "productVector"
product["productVector"] = await Embeddings.generateEmbeddings(productName, embeddingsDeployment, AzureOpenAIClient);
}

// Return the product object with the added embedding
return product;

}

// Export the generateProductEmbedding function
module.exports.generateProductEmbedding = generateProductEmbedding;
Loading

0 comments on commit cea2e3f

Please sign in to comment.