Handle "TooLarge" models (#935)

* Handle "TooLarge" models * Rm `status` altogether to use `state` * fix merge issue * lint * Only show the widgets when the modelingo is availabble * fix check * better syntax * lint * prettier * wait until `modelLoadInfo` is determined * fix race condition of onMount
huggingface · Sep 22, 2023 · 9f73af5 · 9f73af5
1 parent 0e3c9aa
commit 9f73af5
Show file tree

Hide file tree

Showing 5 changed files with 48 additions and 25 deletions.
diff --git a/js/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte b/js/src/lib/components/InferenceWidget/shared/WidgetInfo/WidgetInfo.svelte
@@ -1,34 +1,41 @@
 <script lang="ts">
-	import type { WidgetProps, ModelLoadInfo, LoadingStatus } from "../types";
+	import type { WidgetProps, ModelLoadInfo, LoadState } from "../types";
 
 	import IconAzureML from "../../../Icons/IconAzureML.svelte";
 
 	export let model: WidgetProps["model"];
 	export let computeTime: string;
 	export let error: string;
-	export let modelLoadInfo: ModelLoadInfo = { status: "unknown" };
+	export let modelLoadInfo: ModelLoadInfo | undefined = undefined;
 
-	const status = {
+	const state = {
+		Loadable: "This model can be loaded on the Inference API on-demand.",
+		Loaded: "This model is currently loaded and running on the Inference API.",
+		TooBig:
+			"Model is too large to load onto the free Inference API. To try the model, launch it on Inference Endpoints instead.",
 		error: "⚠️ This model could not be loaded by the inference API. ⚠️",
-		loaded: "This model is currently loaded and running on the Inference API.",
-		unknown: "This model can be loaded on the Inference API on-demand.",
 	} as const;
 
-	const azureStatus = {
+	const azureState = {
+		Loadable: "This model can be loaded loaded on AzureML Managed Endpoint",
+		Loaded: "This model is loaded and running on AzureML Managed Endpoint",
+		TooBig:
+			"Model is too large to load onto the free Inference API. To try the model, launch it on Inference Endpoints instead.",
 		error: "⚠️ This model could not be loaded.",
-		loaded: "This model is loaded and running on AzureML Managed Endpoint",
-		unknown: "This model can be loaded loaded on AzureML Managed Endpoint",
 	} as const;
 
 	function getStatusReport(
-		modelLoadInfo: ModelLoadInfo,
-		statuses: Record<LoadingStatus, string>,
+		modelLoadInfo: ModelLoadInfo | undefined,
+		statuses: Record<LoadState, string>,
 		isAzure = false
 	): string {
-		if (modelLoadInfo.compute_type === "cpu" && modelLoadInfo.status === "loaded" && !isAzure) {
+		if (!modelLoadInfo) {
+			return "Model state unknown";
+		}
+		if (modelLoadInfo.compute_type === "cpu" && modelLoadInfo.state === "Loaded" && !isAzure) {
 			return `The model is loaded and running on <a class="hover:underline" href="https://huggingface.co/intel" target="_blank">Intel Xeon 3rd Gen Scalable CPU</a>`;
 		}
-		return statuses[modelLoadInfo.status];
+		return statuses[modelLoadInfo.state];
 	}
 
 	function getComputeTypeMsg(): string {
@@ -54,13 +61,13 @@
 				</div>
 				<div class="border-dotter mx-2 flex flex-1 -translate-y-px border-b border-gray-100" />
 				<div>
-					{@html getStatusReport(modelLoadInfo, azureStatus, true)}
+					{@html getStatusReport(modelLoadInfo, azureState, true)}
 				</div>
 			</div>
 		{:else if computeTime}
 			Computation time on {getComputeTypeMsg()}: {computeTime}
 		{:else}
-			{@html getStatusReport(modelLoadInfo, status)}
+			{@html getStatusReport(modelLoadInfo, state)}
 		{/if}
 	</div>
 	{#if error}

diff --git a/js/src/lib/components/InferenceWidget/shared/WidgetWrapper/WidgetWrapper.svelte b/js/src/lib/components/InferenceWidget/shared/WidgetWrapper/WidgetWrapper.svelte
@@ -29,7 +29,7 @@
 	export let previewInputSample: (sample: Record<string, any>) => void = () => {};
 
 	let isMaximized = false;
-	let modelLoadInfo: ModelLoadInfo = { status: "unknown" };
+	let modelLoadInfo: ModelLoadInfo | undefined = undefined;
 	let selectedInputGroup: string;
 
 	const inputSamples: WidgetInputSample[] = (model?.widgetData ?? [])
@@ -53,9 +53,9 @@
 		inputGroups.length === 1 ? inputGroups[0] : inputGroups.find(({ group }) => group === selectedInputGroup);
 
 	onMount(() => {
-		getModelLoadInfo(apiUrl, model.id, includeCredentials).then(info => {
-			modelLoadInfo = info;
-		});
+		(async () => {
+			modelLoadInfo = await getModelLoadInfo(apiUrl, model.id, includeCredentials);
+		})();
 	});
 
 	function onClickMaximizeBtn() {
@@ -65,8 +65,19 @@
 
 <div
 	class="flex w-full max-w-full flex-col
-	{isMaximized ? 'fixed inset-0 z-20 bg-white p-12' : ''}"
+	{isMaximized ? 'fixed inset-0 z-20 bg-white p-12' : ''}
+	{!modelLoadInfo ? 'hidden' : ''}"
 >
+	{#if modelLoadInfo?.state === "TooBig"}
+		<p class="text-sm text-gray-500">
+			Model is too large to load onto the free Inference API. To try the model, launch it on <a
+				class="underline"
+				href="https://ui.endpoints.huggingface.co/mishig/new?repository={encodeURIComponent(model.id)}"
+				>Inference Endpoints</a
+			>
+			instead.
+		</p>
+	{/if}
 	{#if isMaximized}
 		<button class="absolute top-6 right-12" on:click={onClickMaximizeBtn}>
 			<IconCross classNames="text-xl text-gray-500 hover:text-black" />

diff --git a/js/src/lib/components/InferenceWidget/shared/helpers.ts b/js/src/lib/components/InferenceWidget/shared/helpers.ts
@@ -184,12 +184,12 @@ export async function getModelLoadInfo(
 	});
 	const output = await response.json();
 	if (response.ok && typeof output === "object" && output.loaded !== undefined) {
-		const status = output.loaded ? "loaded" : "unknown";
-		const computeType = output.compute_type;
-		return { status, compute_type: computeType };
+		// eslint-disable-next-line @typescript-eslint/naming-convention
+		const {state, compute_type} = output;
+		return {compute_type, state}
 	} else {
 		console.warn(response.status, output.error);
-		return { status: "error" };
+		return {state: "error" };
 	}
 }
 

diff --git a/js/src/lib/components/InferenceWidget/shared/types.ts b/js/src/lib/components/InferenceWidget/shared/types.ts
@@ -11,12 +11,13 @@ export interface WidgetProps {
 	isLoggedIn?:        boolean;
 }
 
-export type LoadingStatus = "error" | "loaded" | "unknown";
+
+export type LoadState = "Loadable" | "Loaded" | "TooBig" | "error";
 
 export type ComputeType = "cpu" | "gpu";
 
 export interface ModelLoadInfo {
-	status:        LoadingStatus;
+	state:         LoadState;
 	compute_type?: ComputeType;
 }
 

diff --git a/js/src/routes/index.svelte b/js/src/routes/index.svelte
@@ -5,6 +5,10 @@
 	import ModeSwitcher from "../lib/components/DemoThemeSwitcher/DemoThemeSwitcher.svelte";
 
 	const models: ModelData[] = [
+		{
+			id: "WizardLM/WizardLM-70B-V1.0",
+			pipeline_tag: "text-generation",
+		},
 		{
 			id: "openai/clip-vit-base-patch16",
 			pipeline_tag: "zero-shot-image-classification",