Skip to content

Commit

Permalink
Merge branch 'main' into pdf-upload-error
Browse files Browse the repository at this point in the history
  • Loading branch information
bigadsoleiman authored Apr 21, 2024
2 parents 690f487 + d0edf9b commit 14de25d
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 19 deletions.
19 changes: 19 additions & 0 deletions cli/magic-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ const cfCountries = getCountryCodesAndNames();

const iamRoleRegExp = RegExp(/arn:aws:iam::\d+:role\/[\w-_]+/);
const kendraIdRegExp = RegExp(/^\w{8}-\w{4}-\w{4}-\w{4}-\w{12}$/);
const secretManagerArnRegExp = RegExp(/arn:aws:secretsmanager:[\w-_]+:\d+:secret:[\w-_]+/);

const embeddingModels = [
{
Expand Down Expand Up @@ -144,6 +145,7 @@ const embeddingModels = [
options.enableSagemakerModels = config.llms?.sagemaker
? config.llms?.sagemaker.length > 0
: false;
options.huggingfaceApiSecretArn = config.llms?.huggingfaceApiSecretArn;
options.enableSagemakerModelsSchedule = config.llms?.sagemakerSchedule?.enabled;
options.timezonePicker = config.llms?.sagemakerSchedule?.timezonePicker;
options.enableCronFormat = config.llms?.sagemakerSchedule?.enableCronFormat;
Expand Down Expand Up @@ -356,6 +358,22 @@ async function processCreateOptions(options: any): Promise<void> {
return !(this as any).state.answers.enableSagemakerModels;
},
},
{
type: "input",
name: "huggingfaceApiSecretArn",
message:
"Some HuggingFace models including mistral now require an API key, Please enter an Secrets Manager Secret ARN (see docs: Model Requirements)",
validate: (v: string) => {
const valid = secretManagerArnRegExp.test(v);
return v.length === 0 || valid
? true
: "If you are supplying a HF API key it needs to be a reference to a secrets manager secret ARN"
},
initial: options.huggingfaceApiSecretArn || "",
skip(): boolean {
return !(this as any).state.answers.enableSagemakerModels;
},
},
{
type: "confirm",
name: "enableSagemakerModelsSchedule",
Expand Down Expand Up @@ -724,6 +742,7 @@ async function processCreateOptions(options: any): Promise<void> {
: undefined,
llms: {
sagemaker: answers.sagemakerModels,
huggingfaceApiSecretArn: answers.huggingfaceApiSecretArn,
sagemakerSchedule: answers.enableSagemakerModelsSchedule
? {
enabled: answers.enableSagemakerModelsSchedule,
Expand Down
12 changes: 12 additions & 0 deletions docs/documentation/model-requirements.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ Note:

![sample](./assets/enable-models.gif "AWS GenAI Chatbot")

## HuggingFace Authentication

Some models hosted on HuggingFace require an API key for access, for example MistralAI and Meta models have now changed to be gated behind accepting their EULA

If you wish to continue using these models or access other models on HuggingFace which require authentication you can now supply this HF token as part of the installer.

When enabling sagemaker models in the installer it will now ask you for a Secrets Manager Secret ARN containing the HF API token.

You can read more about setting up access tokens on the [HF website](https://huggingface.co/docs/hub/en/security-tokens) Once you've got a token you may need to also navigate to a models page such as mistral7B to accept their terms before you can then use your token to access the model.

The secret you would create in secrets manager would be a plain text secret containing just the HF token itself.

## Third-party models requirements

You can also interact with external providers via their API, such as AI21 Labs, Cohere, OpenAI, etc.
Expand Down
20 changes: 11 additions & 9 deletions lib/models/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import * as cdk from "aws-cdk-lib";
import * as ssm from "aws-cdk-lib/aws-ssm";
import { Construct } from "constructs";
import * as iam from "aws-cdk-lib/aws-iam";
import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager';
import { Shared } from "../shared";
import {
Modality,
Expand Down Expand Up @@ -34,6 +35,10 @@ export class Models extends Construct {

const models: SageMakerModelEndpoint[] = [];

let hfTokenSecret: secretsmanager.Secret | undefined;
if (props.config.llms.huggingfaceApiSecretArn) {
hfTokenSecret = secretsmanager.Secret.fromSecretCompleteArn(this, 'HFTokenSecret', props.config.llms.huggingfaceApiSecretArn) as secretsmanager.Secret;
}
if (
props.config.llms?.sagemaker.includes(SupportedSageMakerModels.FalconLite)
) {
Expand Down Expand Up @@ -106,11 +111,12 @@ export class Models extends Construct {
),
},
container:
DeepLearningContainerImage.HUGGINGFACE_PYTORCH_TGI_INFERENCE_2_0_1_TGI1_1_0_GPU_PY39_CU118_UBUNTU20_04,
DeepLearningContainerImage.HUGGINGFACE_PYTORCH_TGI_INFERENCE_2_1_1_TGI2_0_0_GPU_PY310_CU121_UBUNTU22_04,
instanceType: SageMakerInstanceType.ML_G5_2XLARGE,
startupHealthCheckTimeoutInSeconds: 300,
endpointName: MISTRAL_7B_ENDPOINT_NAME,
environment: {
HF_TOKEN: hfTokenSecret?.secretValue.unsafeUnwrap().toString() || "",
SM_NUM_GPUS: JSON.stringify(1),
MAX_INPUT_LENGTH: JSON.stringify(2048),
MAX_TOTAL_TOKENS: JSON.stringify(4096),
Expand Down Expand Up @@ -152,14 +158,12 @@ export class Models extends Construct {
(subnet) => subnet.subnetId
),
},
container: DeepLearningContainerImage.fromDeepLearningContainerImage(
"huggingface-pytorch-tgi-inference",
"2.1.1-tgi1.3.3-gpu-py310-cu121-ubuntu20.04"
),
container: DeepLearningContainerImage.HUGGINGFACE_PYTORCH_TGI_INFERENCE_2_1_1_TGI2_0_0_GPU_PY310_CU121_UBUNTU22_04,
instanceType: SageMakerInstanceType.ML_G5_2XLARGE,
startupHealthCheckTimeoutInSeconds: 300,
endpointName: MISTRAL_7B_INSTRUCT2_ENDPOINT_NAME,
environment: {
HF_TOKEN: hfTokenSecret?.secretValue.unsafeUnwrap().toString() || "",
SM_NUM_GPUS: JSON.stringify(1),
MAX_INPUT_LENGTH: JSON.stringify(2048),
MAX_TOTAL_TOKENS: JSON.stringify(4096),
Expand Down Expand Up @@ -205,14 +209,12 @@ export class Models extends Construct {
(subnet) => subnet.subnetId
),
},
container: DeepLearningContainerImage.fromDeepLearningContainerImage(
"huggingface-pytorch-tgi-inference",
"2.1.1-tgi1.3.3-gpu-py310-cu121-ubuntu20.04"
),
container: DeepLearningContainerImage.HUGGINGFACE_PYTORCH_TGI_INFERENCE_2_1_1_TGI2_0_0_GPU_PY310_CU121_UBUNTU22_04,
instanceType: SageMakerInstanceType.ML_G5_48XLARGE,
startupHealthCheckTimeoutInSeconds: 300,
endpointName: MISTRAL_8x7B_INSTRUCT2_ENDPOINT_NAME,
environment: {
HF_TOKEN: hfTokenSecret?.secretValue.unsafeUnwrap().toString() || "",
SM_NUM_GPUS: JSON.stringify(8),
MAX_INPUT_LENGTH: JSON.stringify(24576),
MAX_TOTAL_TOKENS: JSON.stringify(32768),
Expand Down
1 change: 1 addition & 0 deletions lib/shared/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ export interface SystemConfig {
};
llms: {
sagemaker: SupportedSageMakerModels[];
huggingfaceApiSecretArn?: string;
sagemakerSchedule?: {
enabled?: boolean;
timezonePicker?: string;
Expand Down
30 changes: 21 additions & 9 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"@aws-cdk/aws-apigatewayv2-authorizers-alpha": "^2.114.1-alpha.0",
"@aws-cdk/aws-apigatewayv2-integrations-alpha": "^2.114.1-alpha.0",
"@aws-cdk/aws-cognito-identitypool-alpha": "^2.114.1-alpha.0",
"@cdklabs/generative-ai-cdk-constructs": "^0.1.50",
"@cdklabs/generative-ai-cdk-constructs": "^0.1.122",
"aws-cdk-lib": "2.126.0",
"cdk-nag": "2.27.170",
"commander": "^11.0.0",
Expand Down

0 comments on commit 14de25d

Please sign in to comment.