generated from GraemeMalcolm/Jekyll-Theme
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding Guided Project modules to GitHub lab repo
- Loading branch information
Showing
64 changed files
with
6,367 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
|
||
@description('Azure Cosmos DB MongoDB vCore cluster name') | ||
@maxLength(40) | ||
param clusterName string = '' //= 'msdocs-${uniqueString(resourceGroup().id)}' | ||
|
||
@description('Location for the cluster.') | ||
param location string = '' //= resourceGroup().location | ||
|
||
@description('Username for admin user') | ||
param adminUsername string = '' | ||
|
||
@description('Public IP address to allow access to the cluster') | ||
param publicIp string = '0.0.0.0' | ||
|
||
@description('Public IP address rule name for local access to the cluster') | ||
param publicIpRuleName string = 'labMachineIPAccessRule' | ||
|
||
@secure() | ||
@description('Password for admin user') | ||
//@minLength(8) | ||
@maxLength(128) | ||
param adminPassword string = '' | ||
|
||
resource cluster 'Microsoft.DocumentDB/mongoClusters@2023-03-01-preview' = { | ||
name: clusterName | ||
location: location | ||
properties: { | ||
administratorLogin: adminUsername | ||
administratorLoginPassword: adminPassword | ||
nodeGroupSpecs: [ | ||
{ | ||
kind: 'Shard' | ||
nodeCount: 1 | ||
sku: 'M30' | ||
diskSizeGB: 128 | ||
enableHa: false | ||
} | ||
] | ||
} | ||
} | ||
|
||
|
||
|
||
resource firewallRules 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { | ||
parent: cluster | ||
name: 'AllowAllAzureServices' | ||
properties: { | ||
startIpAddress: '0.0.0.0' | ||
endIpAddress: '0.0.0.0' | ||
} | ||
} | ||
|
||
resource firewallRules_local_access 'Microsoft.DocumentDB/mongoClusters/firewallRules@2023-03-01-preview' = { | ||
parent: cluster | ||
name: publicIpRuleName | ||
properties: { | ||
startIpAddress: publicIp | ||
endIpAddress: publicIp | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
randomIdentifier= | ||
location= | ||
changeSubscription= | ||
subscriptionName= | ||
skipCreatingResourceGroup= | ||
resourceGroup= | ||
|
||
skipCreatingCosmosDBCluster= | ||
skipCreatingCosmosDBPublicIPFirewallRule= | ||
cosmosCluster= | ||
cosmosClusterLocation= | ||
cosmosDbEndpoint= | ||
cosmosClusterAdmin= | ||
cosmosClusterPassword= | ||
cosmosdbDatabase="cosmicworks" | ||
|
||
skipCreatingAzureOpenAIAccount= | ||
cognitiveServicesKind="OpenAI" | ||
OpenAIAccount= | ||
OpenAIAccountLocation= | ||
OpenAIAccountSKU="s0" | ||
OpenAIEndpoint= | ||
OpenAIKey1= | ||
OpenAIVersion="2023-05-15" | ||
|
||
skipCreatingAzureOpenAIDeployment= | ||
OpenAIDeploymentName= | ||
OpenAIDeploymentModel="text-embedding-ada-002" | ||
OpenAIDeploymentModelFormat="OpenAI" | ||
OpenAIDeploymentModelVersion="2" | ||
OpenAIDeploymentSKU="Standard" | ||
OpenAIDeploymentSKUCapacity=100 | ||
|
||
skipCreatingAzureOpenAICompletionDeployment= | ||
OpenAICompletionDeploymentName= | ||
OpenAICompletionDeploymentModel="gpt-35-turbo" | ||
OpenAICompletionDeploymentModelFormat="OpenAI" | ||
OpenAICompletionDeploymentModelVersion="0301" | ||
OpenAICompletionDeploymentSKU="Standard" | ||
OpenAICompletionDeploymentSKUCapacity=100 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
node_modules/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
// Importing required modules | ||
const fs = require('fs'); | ||
const path = require('path'); | ||
const mongodb = require('mongodb'); | ||
const { MongoClient, updateOne } = require('mongodb'); | ||
|
||
// Importing custom modules | ||
const Customers = require('../Collections/customers'); | ||
const Products = require('../Collections/products'); | ||
const SalesOrders = require('../Collections/salesOrders'); | ||
const Indexes = require('../SearchComponents/indexes'); | ||
|
||
// Function to load and vectorize local blob data to MongoDB cluster | ||
async function loadAndVectorizeLocalBlobDataToMongoDBCluster(client, dataFolder, cosmosDbMongoDbDatabase, batchSize, embeddingsDeployment, AzureOpenAIClient, processCustomersVector, processProductsVector, processSalesOrdersVector) { | ||
// Read JSON documents from the data folder | ||
const localBlobsFiles = fs.readdirSync(dataFolder); | ||
|
||
// Loop through each file in the data folder | ||
for (const blobFile of localBlobsFiles) { | ||
let batchNumber = 1; | ||
|
||
// Process only JSON files | ||
if (blobFile.includes('.json')) { | ||
console.log(`\n(${new Date().toISOString()}) ${blobFile}`); | ||
|
||
// Read the content of the file and parse it as JSON | ||
const fileContent = fs.readFileSync(path.join(dataFolder, blobFile), 'utf-8'); | ||
const jsonData = JSON.parse(fileContent); | ||
|
||
const totalNumberOfDocuments = jsonData.length; | ||
|
||
// Process only if there are documents in the JSON file | ||
if (totalNumberOfDocuments >= 0) { | ||
// Get the collection name from the file name | ||
const collectionName = blobFile.split(".json")[0]; | ||
|
||
// Get the database and the collection | ||
const db = client.db(cosmosDbMongoDbDatabase); | ||
const collection = db.collection(collectionName); | ||
let currentDocIdx = 0; | ||
|
||
let operations = []; | ||
|
||
let indexList = []; | ||
|
||
// Loop through each document in the JSON file | ||
for (let doc of jsonData) { | ||
currentDocIdx++; | ||
|
||
// Generate embeddings for the document based on the collection name | ||
if (collectionName === "customers" && processCustomersVector) { | ||
doc = await Customers.generateCustomerEmbedding(doc, embeddingsDeployment, AzureOpenAIClient); | ||
} else if (collectionName === "products" && processProductsVector) { | ||
doc = await Products.generateProductEmbedding(doc, embeddingsDeployment, AzureOpenAIClient); | ||
} else if (collectionName === "salesOrders" && processSalesOrdersVector) { | ||
doc = await SalesOrders.generateSalesOrderEmbedding(doc, embeddingsDeployment, AzureOpenAIClient); | ||
} | ||
|
||
// Log the progress for every 100 documents processed | ||
if (currentDocIdx % 100 === 0 && ((processCustomersVector && collectionName === "customers") || (processProductsVector && collectionName === "products") || (processSalesOrdersVector && collectionName === "salesOrders"))) { | ||
console.log(`\t${currentDocIdx} out of ${totalNumberOfDocuments} docs vectorized.`); | ||
} | ||
|
||
// Prepare the update operation for the document | ||
operations.push({ | ||
updateOne: { | ||
filter: { "_id": doc["_id"] }, | ||
update: { "$set": doc }, | ||
upsert: true | ||
} | ||
}); | ||
|
||
// Write the operations to the database in batches | ||
if (operations.length === batchSize) { | ||
console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`); | ||
await collection.bulkWrite(operations, { ordered: false }); | ||
operations = []; | ||
batchNumber++; | ||
} | ||
} | ||
|
||
// Log the completion of vectorization | ||
if ((processCustomersVector && collectionName === "customers") || (processProductsVector && collectionName === "products") || (processSalesOrdersVector && collectionName === "salesOrders")) { | ||
console.log(`\t${totalNumberOfDocuments} out of ${totalNumberOfDocuments} docs vectorized.`); | ||
} | ||
|
||
// Write any remaining operations to the database | ||
if (operations.length > 0) { | ||
console.log(`\tWriting collection ${collectionName}, batch size ${batchSize}, batch ${batchNumber}, number of documents processed so far ${currentDocIdx}.`); | ||
await collection.bulkWrite(operations, { ordered: false }); | ||
} | ||
|
||
console.log(`(${new Date().toISOString()}) Collection ${collectionName}, total number of documents processed ${currentDocIdx} .\n`); | ||
|
||
// Create the vector indexes for the collection | ||
if (processCustomersVector && collectionName === "customers") { | ||
indexList = [ | ||
["customerTypeVectorSearchIndex", "customerTypeVector"], | ||
["customerTitleVectorSearchIndex", "customerTitleVector"], | ||
["customerNameVectorSearchIndex", "customerNameVector"], | ||
["customerEmailAddressVectorSearchIndex", "customerEmailAddressVector"], | ||
["customerPhoneNumberVectorSearchIndex", "customerPhoneNumberVector"], | ||
["customerAddressesVectorSearchIndex", "customerAddressesVector"] | ||
]; | ||
await Indexes.createVectorIndexes(collection, indexList, db, collectionName); | ||
} else if (processProductsVector && collectionName === "products") { | ||
indexList = [ | ||
["productVectorSearchIndex", "productVector"] | ||
]; | ||
await Indexes.createVectorIndexes(collection, indexList, db, collectionName); | ||
} else if (processSalesOrdersVector && collectionName === "salesOrders") { | ||
indexList = [ | ||
["salesOrderDetailVectorSearchIndex", "salesOrderDetailVector"] | ||
]; | ||
await Indexes.createVectorIndexes(collection, indexList, db, collectionName); | ||
} | ||
|
||
} | ||
} | ||
} | ||
} | ||
|
||
// Export the function | ||
module.exports.loadAndVectorizeLocalBlobDataToMongoDBCluster = loadAndVectorizeLocalBlobDataToMongoDBCluster; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
// Import the BlobServiceClient from the Azure Storage Blob package | ||
const { BlobServiceClient } = require('@azure/storage-blob'); | ||
|
||
// Import the file system module | ||
const fs = require('fs'); | ||
|
||
// Import the path module | ||
const path = require('path'); | ||
|
||
// Define an asynchronous function to download files from Azure Blob Storage if they don't exist locally | ||
async function downloadFilesFromBlobIfTheyDontExist(accountUrl, containerName, dataFolder) { | ||
// Create a new BlobServiceClient | ||
const blobServiceClient = new BlobServiceClient(accountUrl); | ||
|
||
// Get a ContainerClient for the specified container | ||
const containerClient = blobServiceClient.getContainerClient(containerName); | ||
|
||
// List all blobs in the container | ||
let blobs = containerClient.listBlobsFlat(); | ||
|
||
// Iterate over each blob | ||
for await (const blob of blobs) { | ||
// Construct the local file path | ||
const filePath = path.join(dataFolder, blob.name); | ||
|
||
// Check if the file already exists locally | ||
if (!fs.existsSync(filePath)) { | ||
// If the file doesn't exist locally, download it from Azure Blob Storage | ||
|
||
// Get a BlobClient for the blob | ||
const blobClient = containerClient.getBlobClient(blob.name); | ||
|
||
// Download the blob | ||
const downloadBlockBlobResponse = await blobClient.download(0); | ||
|
||
// Create a write stream for the local file | ||
const fileStream = fs.createWriteStream(filePath); | ||
|
||
// Pipe the downloaded blob to the file stream | ||
downloadBlockBlobResponse.readableStreamBody.pipe(fileStream); | ||
} | ||
} | ||
} | ||
|
||
// Export the function | ||
module.exports.downloadFilesFromBlobIfTheyDontExist = downloadFilesFromBlobIfTheyDontExist; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// Import the Embeddings module from the SearchComponents directory | ||
const Embeddings = require('../SearchComponents/embeddings'); | ||
|
||
// Function to convert a list of address objects into a single string | ||
function getCustomerAddresses(addresses) { | ||
let addressesString = ""; | ||
|
||
// Iterate over each address in the list | ||
for (let idx = 0; idx < addresses.length; idx++) { | ||
const address = addresses[idx]; | ||
// Concatenate each address field into the addressesString | ||
addressesString += (idx > 0 ? "; " : "") + | ||
(address.addressLine1 ? "Address Line - " + address.addressLine1 : "") + | ||
(address.addressLine2 ? " " + address.addressLine2 : "") + | ||
(address.city ? ", city - " + address.city : "") + | ||
(address.state ? ", state - " + address.state : "") + | ||
(address.country ? ", country - " + address.country : "") + | ||
(address.zipCode ? ", zipcode - " + address.zipCode : "") + | ||
(address.location ? ", location - " + address.location : ""); | ||
} | ||
|
||
// Return the concatenated string of addresses | ||
return addressesString; | ||
} | ||
|
||
// Asynchronous function to generate embeddings for various customer fields | ||
async function generateCustomerEmbedding(customer, embeddingsDeployment, AzureOpenAIClient) { | ||
// If the customer has a type, generate an embedding for it | ||
if (customer.type) { | ||
customer.customerTypeVector = await Embeddings.generateEmbeddings(customer.type, embeddingsDeployment, AzureOpenAIClient); | ||
} | ||
|
||
// If the customer has a title, generate an embedding for it | ||
if (customer.title) { | ||
customer.customerTitleVector = await Embeddings.generateEmbeddings(customer.title, embeddingsDeployment, AzureOpenAIClient); | ||
} | ||
|
||
// If the customer has a first and last name, generate an embedding for it | ||
if (customer.firstName && customer.lastName) { | ||
customer.customerNameVector = await Embeddings.generateEmbeddings(customer.firstName + " " + customer.lastName, embeddingsDeployment, AzureOpenAIClient); | ||
} | ||
|
||
// If the customer has an email address, generate an embedding for it | ||
if (customer.emailAddress) { | ||
customer.customerEmailAddressVector = await Embeddings.generateEmbeddings(customer.emailAddress, embeddingsDeployment, AzureOpenAIClient); | ||
} | ||
|
||
// If the customer has a phone number, generate an embedding for it | ||
if (customer.phoneNumber) { | ||
customer.customerPhoneNumberVector = await Embeddings.generateEmbeddings(customer.phoneNumber, embeddingsDeployment, AzureOpenAIClient); | ||
} | ||
|
||
// Get the string representation of the customer's addresses | ||
const address = getCustomerAddresses(customer.addresses); | ||
// If the customer has addresses, generate an embedding for them | ||
if (address.length > 0) { | ||
customer.customerAddressesVector = await Embeddings.generateEmbeddings(address, embeddingsDeployment, AzureOpenAIClient); | ||
} | ||
|
||
// Return the customer object with the added embeddings | ||
return customer; | ||
} | ||
|
||
// Export the generateCustomerEmbedding function | ||
module.exports.generateCustomerEmbedding = generateCustomerEmbedding; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
// Import the Embeddings module from the SearchComponents directory | ||
const Embeddings = require('../SearchComponents/embeddings'); | ||
|
||
// Asynchronous function to generate an embedding for a product | ||
async function generateProductEmbedding(product, embeddingsDeployment, AzureOpenAIClient) { | ||
|
||
// Construct a string representing the product's name and category | ||
const productName = "Category - " + product["categoryName"] + ", Name -" + product["name"]; | ||
|
||
// If the productName exists, generate an embedding for it | ||
if (productName) { | ||
// The embedding is generated using the Embeddings module's generateEmbeddings function | ||
// The resulting embedding is stored in the product object under the key "productVector" | ||
product["productVector"] = await Embeddings.generateEmbeddings(productName, embeddingsDeployment, AzureOpenAIClient); | ||
} | ||
|
||
// Return the product object with the added embedding | ||
return product; | ||
|
||
} | ||
|
||
// Export the generateProductEmbedding function | ||
module.exports.generateProductEmbedding = generateProductEmbedding; |
Oops, something went wrong.