diff --git a/cspell.json b/cspell.json
index 80ac1464f23..43de9d0c9e8 100644
--- a/cspell.json
+++ b/cspell.json
@@ -1610,7 +1610,8 @@
"esac",
"voteField",
"ampx",
- "autodetection"
+ "autodetection",
+ "jamba"
],
"flagWords": ["hte", "full-stack", "Full-stack", "Full-Stack", "sudo"],
"patterns": [
diff --git a/src/pages/[platform]/ai/concepts/architecture/index.mdx b/src/pages/[platform]/ai/concepts/architecture/index.mdx
new file mode 100644
index 00000000000..044e8c12374
--- /dev/null
+++ b/src/pages/[platform]/ai/concepts/architecture/index.mdx
@@ -0,0 +1,52 @@
+import { getCustomStaticPath } from "@/utils/getCustomStaticPath";
+
+export const meta = {
+ title: "Architecture",
+ description:
+ "Amplify AI Kit fullstack architecture",
+ platforms: [
+ "javascript",
+ "react-native",
+ "angular",
+ "nextjs",
+ "react",
+ "vue",
+ ],
+};
+
+export const getStaticPaths = async () => {
+ return getCustomStaticPath(meta.platforms);
+};
+
+export function getStaticProps(context) {
+ return {
+ props: {
+ platform: context.params.platform,
+ meta,
+ },
+ };
+}
+
+
+
+The Amplify AI kit is built around the idea of routes. An AI route is like an API endpoint for interacting with backend AI functionality. AI routes are configured in an Amplify backend where you can define the authorization rules, what type of route (generation or conversation), AI model and inference configuration like temperature, what are the inputs and outputs, and what data it has access to. There are currently 2 types of AI routes:
+
+* **Conversation:** A conversation route is an asynchronous, multi-turn API. Conversations and messages are automatically stored in DynamoDB. Examples of this are any chat-based AI experience or conversational UI.
+* **Generation:** A single synchronous request-response API. A generation route is an AppSync Query that generates structured data according to your route definition. Common uses include generating structured data from unstructured input and summarization.
+
+
+## Cloud infrastructure
+
+When you create an AI route with the Amplify AI kit, it is using these services:
+
+### AWS AppSync
+Serverless API layer to authorize and route requests from the browser to AWS services.
+
+### Amazon DynamoDB
+Serverless database for storing conversation history.
+
+### AWS Lambda
+Serverless execution for conversations.
+
+### Amazon Bedrock
+Serverless foundation models.
diff --git a/src/pages/[platform]/ai/concepts/index.mdx b/src/pages/[platform]/ai/concepts/index.mdx
new file mode 100644
index 00000000000..1977d28d033
--- /dev/null
+++ b/src/pages/[platform]/ai/concepts/index.mdx
@@ -0,0 +1,33 @@
+import { getChildPageNodes } from '@/utils/getChildPageNodes';
+import { getCustomStaticPath } from "@/utils/getCustomStaticPath";
+
+export const meta = {
+ title: "Concepts",
+ description:
+ "Learn about what Amplify AI provisions and get an overview about generative AI concepts and terminology.",
+ route: '/[platform]/ai-kit/overview',
+ platforms: [
+ "javascript",
+ "react-native",
+ "angular",
+ "nextjs",
+ "react",
+ "vue",
+ ],
+};
+
+export const getStaticPaths = async () => {
+ return getCustomStaticPath(meta.platforms);
+};
+
+export function getStaticProps(context) {
+ const childPageNodes = getChildPageNodes(meta.route);
+ return {
+ props: {
+ meta,
+ childPageNodes
+ }
+ };
+}
+
+
diff --git a/src/pages/[platform]/ai/concepts/inference-configuration/index.mdx b/src/pages/[platform]/ai/concepts/inference-configuration/index.mdx
new file mode 100644
index 00000000000..303203fc3e1
--- /dev/null
+++ b/src/pages/[platform]/ai/concepts/inference-configuration/index.mdx
@@ -0,0 +1,95 @@
+import { getCustomStaticPath } from "@/utils/getCustomStaticPath";
+
+export const meta = {
+ title: "Inference Configuration",
+ description:
+ "Learn about inference configuration",
+ platforms: [
+ "javascript",
+ "react-native",
+ "angular",
+ "nextjs",
+ "react",
+ "vue",
+ ],
+};
+
+export const getStaticPaths = async () => {
+ return getCustomStaticPath(meta.platforms);
+};
+
+export function getStaticProps(context) {
+ return {
+ props: {
+ platform: context.params.platform,
+ meta,
+ },
+ };
+}
+
+
+
+
+LLMs have parameters that can be configured to change how the model behaves. This is called inference configuration or inference parameters. LLMs are actually *predicting* text based on the text input. This prediction is probabilistic, and can be tweaked by adjusting the inference configuration to allow for more creative or deterministic outputs. The proper configuration will depend on your use case.
+
+[Bedrock documentation on inference configuration](https://docs.aws.amazon.com/bedrock/latest/userguide/inference-parameters.html)
+
+
+
+Inference refers to the process of using a model to generate or predict output based on input data. Inference is using a model after it has been trained on a data set.
+
+
+
+
+
+
+
+## Setting inference configuration
+
+All generative AI routes in Amplify accept inference configuration as optional parameters. If you do not provide any inference configuration options, Bedrock will use [default ones for that particular model](#default-values).
+
+```ts
+a.generation({
+ aiModel: a.ai.model("Claude 3 Haiku"),
+ systemPrompt: `You are a helpful assistant`,
+ inferenceConfiguration: {
+ temperature: 0.2,
+ topP: 0.2,
+ maxTokens: 1000,
+ }
+})
+```
+
+## Definitions
+
+### Temperature
+
+Affects the shape of the probability distribution for the predicted output and influences the likelihood of the model selecting lower-probability outputs. Temperature is usually* number from 0 to 1, where a lower value will influence the model to select higher-probability options. Another way to think about temperature is to think about creativity. A low number (close to zero) would produce the least creative and most deterministic response.
+
+-* AI21 Labs Jamba models use a temperature range of 0 – 2.0
+
+### Top P
+
+Top p refers to the percentage of token candidates the model can choose from for the next token in the response. A lower value will decrease the size of the pool and limit the options to more likely outputs. A higher value will increase the size of the pool and allow for lower-probability tokens.
+
+
+### Max Tokens
+
+This parameter is used to limit the maximum response a model can give.
+
+
+## Default values
+
+
+| Model | Temperature | Top P | Max Tokens |
+| ----- | ----------- | ----- | ---------- |
+| [AI21 Labs Jamba](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-jamba.html#model-parameters-jamba-request-response) | 1.0* | 0.5 | 4096 |
+| [Meta Llama](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-meta.html#model-parameters-meta-request-response) | 0.5 | 0.9 | 512 |
+| [Amazon Titan](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-titan-text.html) | 0.7 | 0.9 | 512 |
+| [Anthropic Claude](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-anthropic-claude-messages.html#model-parameters-anthropic-claude-messages-request-response) | 1 | 0.999 | 512 |
+| [Cohere Command R](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-cohere-command-r-plus.html#model-parameters-cohere-command-request-response) | 0.3 | 0.75 | 512 |
+| [Mistral Large](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters-mistral-chat-completion.html#model-parameters-mistral-chat-completion-request-response) | 0.7 | 1 | 8192 |
+
+[Bedrock documentation on model default inference configuration](https://docs.aws.amazon.com/bedrock/latest/userguide/model-parameters.html)
+
+-* AI21 Labs Jamba models use a temperature range of 0 – 2.0
diff --git a/src/pages/[platform]/ai/concepts/models/index.mdx b/src/pages/[platform]/ai/concepts/models/index.mdx
new file mode 100644
index 00000000000..fd531c7d4aa
--- /dev/null
+++ b/src/pages/[platform]/ai/concepts/models/index.mdx
@@ -0,0 +1,135 @@
+import { getCustomStaticPath } from "@/utils/getCustomStaticPath";
+
+export const meta = {
+ title: "Models",
+ description:
+ "Learn about foundation models provided by Amazon Bedrock used for generative AI",
+ platforms: [
+ "javascript",
+ "react-native",
+ "angular",
+ "nextjs",
+ "react",
+ "vue",
+ ],
+};
+
+export const getStaticPaths = async () => {
+ return getCustomStaticPath(meta.platforms);
+};
+
+export function getStaticProps(context) {
+ return {
+ props: {
+ platform: context.params.platform,
+ meta,
+ },
+ };
+}
+
+
+A foundation model is a large, general-purpose machine learning model that has been pre-trained on a vast amount of data. These models are trained in an unsupervised or self-supervised manner, meaning they learn patterns and representations from the unlabeled training data without being given specific instructions or labels.
+
+Foundation models are useful because they are general-purpose and you don't need to train the models yourself, but are powerful enough to take on a range of applications.
+
+Foundation Models, which Large Language Models are a part of, are inherently stateless. They take input in the form of text or images and generate text or images. They are also inherently non-deterministic. Providing the same input can generate different output.
+
+
+
+## Getting model access
+
+Before you can invoke a foundation model on Bedrock you will need to [request access to the models in the AWS console](https://console.aws.amazon.com/bedrock/home#/modelaccess).
+
+Be sure to check the region you are building your Amplify app in!
+
+## Pricing and Limits
+
+Each foundation model in Amazon Bedrock has its own pricing and throughput limits for on-demand use. On-demand use is serverless, you don't need to provision any AWS resources to use and you only pay for what you use. The Amplify AI kit uses on-demand use for Bedrock.
+
+The cost for using foundation models is calculated by token usage. A token in generative AI refers to chunks of data that were sent as input and how much data was generated. A token is roughly equal to a word, but depends on the model being used. Each foundation model in Bedrock has its own pricing based on input and output tokens used.
+
+When you use the Amplify AI Kit, inference requests are charged to your AWS account based on Bedrock pricing. There is no Amplify markup, you are just using AWS resources in your own account.
+
+Always refer to [Bedrock pricing](https://aws.amazon.com/bedrock/pricing/) for the most up-to-date information on running generative AI with Amplify AI Kit.
+
+
+## Supported Providers and Models
+
+The Amplify AI Kit uses Bedrock's [Converse API](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference.html) to leverage a unified API across models. Most models have different structures to how they best work with input and how they format their output. For example, ...
+
+### AI21 Labs
+* [Jamba 1.5 Large](https://aws.amazon.com/blogs/aws/jamba-1-5-family-of-models-by-ai21-labs-is-now-available-in-amazon-bedrock/)
+* [Jamba 1.5 Mini](https://aws.amazon.com/blogs/aws/jamba-1-5-family-of-models-by-ai21-labs-is-now-available-in-amazon-bedrock/)
+
+
+### Anthropic
+* Claude 3 Haiku
+* Claude 3 Sonnet
+* Claude 3 Opus
+* Claude 3.5 Sonnet
+https://docs.anthropic.com/en/docs/about-claude/models
+
+### Cohere
+* Command R
+* Command R+
+
+### Meta
+* Llama 3.1
+
+### Mistral
+* Large
+* Large 2
+
+
+The Amplify AI Kit makes use of ["tools"](/[platform]/ai/concepts/tools) for both generation and conversation routes. [The models it supports must support tool use in the Converse API](https://docs.aws.amazon.com/bedrock/latest/userguide/conversation-inference-supported-models-features.html).
+
+Using the Converse API makes it easy to swap different models without having to drastically change how you interact with them.
+
+## Choosing a model
+
+Each model and model provider has their own strengths and weaknesses. We encourage you to try different models for different use-cases to find the right fit. Things to consider when choosing a model:
+
+### Context window
+
+Each model has its own context window size. The context window is how much information you can send to the model. FMs are stateless, but conversation routes manage message history, so the context window can continue to grow as you "chat" with a model. The context window for models is defined by the number of tokens it can receive.
+
+### Latency
+
+Smaller models tend to have a lower latency than larger models, but can also sometimes be less powerful.
+
+### Cost
+
+Each model has its own price and throughput.
+
+### Use-case fit
+
+Some models are trained to be better at certain tasks or with certain languages.
+
+Choosing the right model for your use case is balancing latency, cost, and performance.
+
+
+## Using different models
+
+Using the Amplify AI Kit you can easily use different models for different functionality in your application. Each AI route definition will have an `aiModel` attribute you define in your schema. To use different foundation models in your Amplify AI backend, update the `aiModel` using `a.ai.model()`:
+
+```ts
+const schema = a.schema({
+ summarizer: a.generation({
+ aiModel: a.ai.model("Claude 3 Haiku")
+ })
+})
+```
+
+The `a.ai.model()` function gives you access to friendly names for the Bedrock models. We will keep this function up-to-date as new models are added to Bedrock. In case there is a new model that has not yet been added, you can always use the model ID which can be found in the Bedrock console or documentation:
+
+```ts
+const schema = a.schema({
+ summarizer: a.generation({
+ aiModel: {
+ resourcePath: 'meta.llama3-1-405b-instruct-v1:0'
+ }
+ })
+})
+```
+
+
diff --git a/src/pages/[platform]/ai/concepts/prompting/index.mdx b/src/pages/[platform]/ai/concepts/prompting/index.mdx
new file mode 100644
index 00000000000..0a5ce6e67b8
--- /dev/null
+++ b/src/pages/[platform]/ai/concepts/prompting/index.mdx
@@ -0,0 +1,80 @@
+import { getCustomStaticPath } from "@/utils/getCustomStaticPath";
+
+export const meta = {
+ title: "Prompting",
+ description:
+ "Amplify AI Concepts: Prompting",
+ platforms: [
+ "javascript",
+ "react-native",
+ "angular",
+ "nextjs",
+ "react",
+ "vue",
+ ],
+};
+
+export const getStaticPaths = async () => {
+ return getCustomStaticPath(meta.platforms);
+};
+
+export function getStaticProps(context) {
+ return {
+ props: {
+ platform: context.params.platform,
+ meta,
+ },
+ };
+}
+
+
+
+
+
+LLM prompting refers to the process of providing a language model, such as Claude or Amazon Titan, with a specific input or "prompt" in order to generate a desired output. The prompt can be a sentence, a paragraph, or even a more complex sequence of instructions that guides the model to produce content that aligns with the user's intent.
+
+The way the prompt is structured and worded can significantly influence the model's response. By crafting the prompt carefully, users can leverage the LLM's extensive knowledge and language understanding capabilities to generate high-quality and relevant text, code, or other types of output.
+
+Effective prompting involves understanding the model's strengths and limitations, as well as experimenting with different prompt formats, styles, and techniques to elicit the desired responses. This can include using specific keywords, providing context, breaking down tasks into steps, and incorporating formatting elements like bullet points or code blocks.
+
+The model APIs have improved beyond providing a single string as input and getting a string as output. Newer models have a more structured API where you define a system prompt, message history, and tool configurations. The Amplify AI kit uses Bedrock's [Converse API](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_Converse.html), which has a structured input and output rather than only text in and text out.
+
+## Prompt structure
+
+* **system prompt:** provides high-level instructions to the LLM about its role and how it should respond
+* **messages:** The conversation history you want the model to respond to. The Amplify AI kit handles saving conversation history and providing it to the model.
+* **tool configuration:** information about the tools the model can choose to invoke. The Amplify AI kit handles creating the tool configuration for you as well as invoking the tools and re-prompting the model with the results.
+
+
+## How to customize the system prompt
+
+All AI routes in the Amplify AI kit require a system prompt. This will be used in all requests to the LLM.
+
+```ts
+reviewSummarizer: a.generation({
+ aiModel: a.ai.model("Claude 3.5 Sonnet"),
+ systemPrompt: `
+ You are a helpful assistant that summarizes reviews
+ for an ecommerce site.
+ `
+})
+```
+
+## Tips
+
+**Be as detailed as possible.** Try to give as much background and context as you can. Giving the LLM a role and scope typically helps focus the model's responses.
+
+**Say what it should and shouldn't do.** Sometimes LLMs can be a bit verbose or go on tangents. Giving it specific parameters like "Never use placeholder data".
+
+**Use multiple routes.** You can define as many conversation and generation routes as you like, so you don't need to try to fit all the context and functionality you need in a single route.
+
+**You don't need to put everything into the system prompt.** The message history or even just a single user message can contain a lot of dynamic information.
+
+**Prompting strategies differ based on the model.** Always read up on the model itself and what works/doesn't work well with the particular model you are using.
+
+## Prompting resources
+
+* [What is a prompt](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-a-prompt.html)
+* [What is prompt engineering](https://docs.aws.amazon.com/bedrock/latest/userguide/what-is-prompt-engineering.html)
+* [Design a prompt](https://docs.aws.amazon.com/bedrock/latest/userguide/design-a-prompt.html)
+* [Anthropic prompt library](https://docs.anthropic.com/en/prompt-library/library)
diff --git a/src/pages/[platform]/ai/concepts/streaming/index.mdx b/src/pages/[platform]/ai/concepts/streaming/index.mdx
new file mode 100644
index 00000000000..ca02874bcb3
--- /dev/null
+++ b/src/pages/[platform]/ai/concepts/streaming/index.mdx
@@ -0,0 +1,41 @@
+import { getCustomStaticPath } from "@/utils/getCustomStaticPath";
+
+export const meta = {
+ title: "Streaming",
+ description:
+ "Learn about how streaming works with LLMs and the Amplify AI kit",
+ platforms: [
+ "javascript",
+ "react-native",
+ "angular",
+ "nextjs",
+ "react",
+ "vue",
+ ],
+};
+
+export const getStaticPaths = async () => {
+ return getCustomStaticPath(meta.platforms);
+};
+
+export function getStaticProps(context) {
+ return {
+ props: {
+ platform: context.params.platform,
+ meta,
+ },
+ };
+}
+
+
+When an LLM generates a large amount of text, like over 100 words, it can take a while for the entire response to be generated. Rather than waiting for the entire response to come back, we can send back text as it is generated.
+
+Foundation model providers like Amazon Bedrock will typically have an HTTP streaming API which can send back the response in chunks.
+
+## How Amplify AI kit streaming works
+
+The Amplify AI kit does not use HTTP streaming from the backend to the frontend like other AI frameworks do. Instead, streaming updates are sent to the browser via a websocket connection to AWS AppSync.
+
+The Lambda that the Amplify AI kit provisions will call Bedrock with a streaming API request. The Lambda will receive the chunks from the HTTP streaming response and send updates to AppSync, which the client then subscribes to.
+
+If you are using the provided React hook, `useAIConversation` you don't really need to worry about this because it takes care of all of that for you and provides you with conversation messages as React state that is updated as chunks are received.
diff --git a/src/pages/[platform]/ai/concepts/tools/index.mdx b/src/pages/[platform]/ai/concepts/tools/index.mdx
new file mode 100644
index 00000000000..2559467e923
--- /dev/null
+++ b/src/pages/[platform]/ai/concepts/tools/index.mdx
@@ -0,0 +1,49 @@
+import { getCustomStaticPath } from "@/utils/getCustomStaticPath";
+
+export const meta = {
+ title: "Tools",
+ description:
+ "Amplify AI Concepts: Tool use",
+ platforms: [
+ "javascript",
+ "react-native",
+ "angular",
+ "nextjs",
+ "react",
+ "vue",
+ ],
+};
+
+export const getStaticPaths = async () => {
+ return getCustomStaticPath(meta.platforms);
+};
+
+export function getStaticProps(context) {
+ return {
+ props: {
+ platform: context.params.platform,
+ meta,
+ },
+ };
+}
+
+
+
+Large language models (LLMs) are stateless text generators, they have no knowledge of the real world and can't access data on their own. For example, if you asked an LLM "what is the weather in San Jose?" it would not be able to tell you because it does not know what the weather is today. Tools (sometimes referred to as function calling) are functions/APIs that LLMs can choose to invoke to get information about the world. This allows the LLM to answer questions with information not included in their training data like the weather, application-specific, and even user-specific data.
+
+When an LLM is prompted with tools, it can choose to respond by saying that it wants to call a tool to get some data or take an action on the user's behalf. That data is then added to the conversation history so the LLM can see what data was returned. Here is a simplified flow of what happens:
+
+1. User: "what is the weather in san jose?"
+2. Code: Call LLM with this message: "what is the weather in san jose?", and let it know it has access to a tool called `getWeather` that takes an input like `{ city: string }`
+3. LLM: "I want to call the 'getWeather' tool with the input `{city: 'san jose'}`"
+4. Code: Run `getWeather({city: 'san jose'})` and append the results to the conversation history so far and call the LLM again
+5. LLM: "In san jose it is 72 degrees and sunny"
+
+
+
+Note: the LLM itself is not actually executing any function or code. It responds with a special message saying that it wants to call that tool with specific input. That tool then needs to called and the results returned to the LLM in a message history. For more information on tools, see the [Bedrock docs on tool use](https://docs.aws.amazon.com/bedrock/latest/userguide/tool-use.html)
+
+
+
+
+