Skip to content

Commit

Permalink
Handle "TooLarge" models (#935)
Browse files Browse the repository at this point in the history
* Handle "TooLarge" models

* Rm `status` altogether to use `state`

* fix merge issue

* lint

* Only show the widgets when the modelingo is availabble

* fix check

* better syntax

* lint

* prettier

* wait until `modelLoadInfo` is determined

* fix race condition of onMount
  • Loading branch information
mishig25 authored Sep 22, 2023
1 parent 0e3c9aa commit 9f73af5
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 25 deletions.
Original file line number Diff line number Diff line change
@@ -1,34 +1,41 @@
<script lang="ts">
import type { WidgetProps, ModelLoadInfo, LoadingStatus } from "../types";
import type { WidgetProps, ModelLoadInfo, LoadState } from "../types";
import IconAzureML from "../../../Icons/IconAzureML.svelte";
export let model: WidgetProps["model"];
export let computeTime: string;
export let error: string;
export let modelLoadInfo: ModelLoadInfo = { status: "unknown" };
export let modelLoadInfo: ModelLoadInfo | undefined = undefined;
const status = {
const state = {
Loadable: "This model can be loaded on the Inference API on-demand.",
Loaded: "This model is currently loaded and running on the Inference API.",
TooBig:
"Model is too large to load onto the free Inference API. To try the model, launch it on Inference Endpoints instead.",
error: "⚠️ This model could not be loaded by the inference API. ⚠️",
loaded: "This model is currently loaded and running on the Inference API.",
unknown: "This model can be loaded on the Inference API on-demand.",
} as const;
const azureStatus = {
const azureState = {
Loadable: "This model can be loaded loaded on AzureML Managed Endpoint",
Loaded: "This model is loaded and running on AzureML Managed Endpoint",
TooBig:
"Model is too large to load onto the free Inference API. To try the model, launch it on Inference Endpoints instead.",
error: "⚠️ This model could not be loaded.",
loaded: "This model is loaded and running on AzureML Managed Endpoint",
unknown: "This model can be loaded loaded on AzureML Managed Endpoint",
} as const;
function getStatusReport(
modelLoadInfo: ModelLoadInfo,
statuses: Record<LoadingStatus, string>,
modelLoadInfo: ModelLoadInfo | undefined,
statuses: Record<LoadState, string>,
isAzure = false
): string {
if (modelLoadInfo.compute_type === "cpu" && modelLoadInfo.status === "loaded" && !isAzure) {
if (!modelLoadInfo) {
return "Model state unknown";
}
if (modelLoadInfo.compute_type === "cpu" && modelLoadInfo.state === "Loaded" && !isAzure) {
return `The model is loaded and running on <a class="hover:underline" href="https://huggingface.co/intel" target="_blank">Intel Xeon 3rd Gen Scalable CPU</a>`;
}
return statuses[modelLoadInfo.status];
return statuses[modelLoadInfo.state];
}
function getComputeTypeMsg(): string {
Expand All @@ -54,13 +61,13 @@
</div>
<div class="border-dotter mx-2 flex flex-1 -translate-y-px border-b border-gray-100" />
<div>
{@html getStatusReport(modelLoadInfo, azureStatus, true)}
{@html getStatusReport(modelLoadInfo, azureState, true)}
</div>
</div>
{:else if computeTime}
Computation time on {getComputeTypeMsg()}: {computeTime}
{:else}
{@html getStatusReport(modelLoadInfo, status)}
{@html getStatusReport(modelLoadInfo, state)}
{/if}
</div>
{#if error}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
export let previewInputSample: (sample: Record<string, any>) => void = () => {};
let isMaximized = false;
let modelLoadInfo: ModelLoadInfo = { status: "unknown" };
let modelLoadInfo: ModelLoadInfo | undefined = undefined;
let selectedInputGroup: string;
const inputSamples: WidgetInputSample[] = (model?.widgetData ?? [])
Expand All @@ -53,9 +53,9 @@
inputGroups.length === 1 ? inputGroups[0] : inputGroups.find(({ group }) => group === selectedInputGroup);
onMount(() => {
getModelLoadInfo(apiUrl, model.id, includeCredentials).then(info => {
modelLoadInfo = info;
});
(async () => {
modelLoadInfo = await getModelLoadInfo(apiUrl, model.id, includeCredentials);
})();
});
function onClickMaximizeBtn() {
Expand All @@ -65,8 +65,19 @@

<div
class="flex w-full max-w-full flex-col
{isMaximized ? 'fixed inset-0 z-20 bg-white p-12' : ''}"
{isMaximized ? 'fixed inset-0 z-20 bg-white p-12' : ''}
{!modelLoadInfo ? 'hidden' : ''}"
>
{#if modelLoadInfo?.state === "TooBig"}
<p class="text-sm text-gray-500">
Model is too large to load onto the free Inference API. To try the model, launch it on <a
class="underline"
href="https://ui.endpoints.huggingface.co/mishig/new?repository={encodeURIComponent(model.id)}"
>Inference Endpoints</a
>
instead.
</p>
{/if}
{#if isMaximized}
<button class="absolute top-6 right-12" on:click={onClickMaximizeBtn}>
<IconCross classNames="text-xl text-gray-500 hover:text-black" />
Expand Down
8 changes: 4 additions & 4 deletions js/src/lib/components/InferenceWidget/shared/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,12 @@ export async function getModelLoadInfo(
});
const output = await response.json();
if (response.ok && typeof output === "object" && output.loaded !== undefined) {
const status = output.loaded ? "loaded" : "unknown";
const computeType = output.compute_type;
return { status, compute_type: computeType };
// eslint-disable-next-line @typescript-eslint/naming-convention
const {state, compute_type} = output;
return {compute_type, state}
} else {
console.warn(response.status, output.error);
return { status: "error" };
return {state: "error" };
}
}

Expand Down
5 changes: 3 additions & 2 deletions js/src/lib/components/InferenceWidget/shared/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ export interface WidgetProps {
isLoggedIn?: boolean;
}

export type LoadingStatus = "error" | "loaded" | "unknown";

export type LoadState = "Loadable" | "Loaded" | "TooBig" | "error";

export type ComputeType = "cpu" | "gpu";

export interface ModelLoadInfo {
status: LoadingStatus;
state: LoadState;
compute_type?: ComputeType;
}

Expand Down
4 changes: 4 additions & 0 deletions js/src/routes/index.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
import ModeSwitcher from "../lib/components/DemoThemeSwitcher/DemoThemeSwitcher.svelte";
const models: ModelData[] = [
{
id: "WizardLM/WizardLM-70B-V1.0",
pipeline_tag: "text-generation",
},
{
id: "openai/clip-vit-base-patch16",
pipeline_tag: "zero-shot-image-classification",
Expand Down

0 comments on commit 9f73af5

Please sign in to comment.