first working version

GoogleChrome · Sep 4, 2024 · 2656434 · 2656434
1 parent 81b4d30
commit 2656434
Show file tree

Hide file tree

Showing 6 changed files with 169 additions and 13 deletions.
diff --git a/functional-samples/ai.gemini-in-the-cloud/manifest.json b/functional-samples/ai.gemini-in-the-cloud/manifest.json
@@ -6,7 +6,7 @@
   "background": {
     "service_worker": "background.js"
   },
-  "permissions": ["sidePanel", "tabs"],
+  "permissions": ["activeTab", "scripting", "sidePanel"],
   "host_permissions": ["<all_urls>"],
   "side_panel": {
     "default_path": "sidepanel/index.html"

diff --git a/functional-samples/ai.gemini-in-the-cloud/sidepanel/index.css b/functional-samples/ai.gemini-in-the-cloud/sidepanel/index.css
@@ -31,13 +31,8 @@ button.primary {
   color: white;
 }
 
-button.img {
-  background: #333;
-  color: white;
-  padding: 0 4px;
-  width: 32px;
-  height: 32px;
-  min-width: 32px;
+button > img {
+ vertical-align: middle;
 }
 
 button.secondary {

diff --git a/functional-samples/ai.gemini-in-the-cloud/sidepanel/index.html b/functional-samples/ai.gemini-in-the-cloud/sidepanel/index.html
@@ -4,7 +4,6 @@
     <link rel="stylesheet" type="text/css" href="index.css" />
   </head>
   <body>
-    <h1>Google Gemini</h1>
     <textarea
       id="input-prompt"
       placeholder='Type something, e.g. "Write a haiku about Chrome Extensions"'
@@ -33,27 +32,50 @@ <h1>Google Gemini</h1>
         accept="image/*"
         style="display: none"
       />
-      <button id="button-add-image" class="primary img">
+      <button id="button-add-image" class="primary">
         <img
           src="/images/screenshot-monitor-24.svg"
           width="24"
           height="24"
           alt="take screenshot"
         />
+        Add image
+      </button>
+      <button id="button-capture-image" class="primary">
+        <img
+          src="/images/screenshot-monitor-24.svg"
+          width="24"
+          height="24"
+          alt="capture screenshot"
+        />
+        Take Screenshot
       </button>
       <input
         type="file"
         id="audio-file"
         accept="audio/wav, audio/mp3"
         style="display: none"
       />
-      <button id="button-record-audio" class="primary img">
+      <button id="button-record-audio" class="primary" disabled>
         <img
           src="/images/record-audio-24.svg"
           width="24"
           height="24"
-          alt="take screenshot"
+          alt="Upload an audio file"
         />
+        Record audio
+      </button>
+      <button id="button-capture-audio" class="primary">
+        <img
+          src="/images/record-audio-24.svg"
+          width="24"
+          height="24"
+          alt="Capture tab audio"
+        />
+        Add audio file
+      </button>
+      <button id="button-capture-audio-stop" class="primary" hidden>
+        Stop recording
       </button>
     </div>
     <div>

diff --git a/functional-samples/ai.gemini-in-the-cloud/sidepanel/index.js b/functional-samples/ai.gemini-in-the-cloud/sidepanel/index.js
@@ -4,6 +4,9 @@ import {
   HarmCategory
 } from '../node_modules/@google/generative-ai/dist/index.mjs';
 
+import TabAudioRecorder from './tab-audio-recorder';
+import getScreenshot from './screenshot';
+
 // Important! Do not expose your API in your extension code. You have to
 // options:
 //
@@ -14,14 +17,18 @@ import {
 //
 // It is only OK to put your API key into this file if you're the only
 // user of your extension or for testing.
-const apiKey = 'AIzaSyA0tBKTK1ieL1Dyb2AsYk7q_tGsb6jN4o8';
+const apiKey = 'AIzaSyAiTmPF3fbapEgNtpkVyAOxNb0GZbvLfyE';
 
 let genAI = null;
 let model = null;
 let generationConfig = {
   temperature: 1
 };
 let promptFiles = [];
+let screenshotCount = 0;
+let audioRecordingCount = 0;
+
+const tabAudioRecorder = new TabAudioRecorder();
 
 const inputPrompt = document.body.querySelector('#input-prompt');
 const buttonPrompt = document.body.querySelector('#button-prompt');
@@ -36,6 +43,11 @@ const buttonAudioFile = document.getElementById('button-record-audio');
 const elementImageFile = document.getElementById('image-file');
 const buttonImageFile = document.getElementById('button-add-image');
 const listFiles = document.getElementById('files-list');
+const buttonCaptureAudio = document.getElementById('button-capture-audio');
+const buttonCaptureAudioStop = document.getElementById(
+  'button-capture-audio-stop'
+);
+const buttonCaptureImage = document.getElementById('button-capture-image');
 
 buttonImageFile.addEventListener(
   'click',
@@ -55,10 +67,51 @@ buttonAudioFile.addEventListener(
   },
   false
 );
+
 elementAudioFile.addEventListener('change', async () => {
   uploadFiles(elementAudioFile.files, 'audio');
 });
 
+buttonCaptureImage.addEventListener('click', async () => {
+  try {
+    const screenshot = await getScreenshot();
+    console.log('screenshot', screenshot);
+    const fileName =
+      screenshotCount > 0 ? `Screenshot_${screenshotCount + 1}` : 'Screenshot';
+    renderUploadedFile(fileName, 'image', screenshot.data);
+    const imagePart = {
+      inlineData: { data: screenshot.base64, mimeType: 'image/png' }
+    };
+    promptFiles.push({
+      name: fileName,
+      preview: screenshot,
+      mimeType: 'image/png',
+      imagePart
+    });
+    screenshotCount++;
+  } catch (e) {
+    console.log(e);
+    alert(e.message);
+  }
+});
+
+buttonCaptureAudio.addEventListener('click', async () => {
+  try {
+    await tabAudioRecorder.start();
+    hide(buttonCaptureAudio);
+    show(buttonCaptureAudioStop);
+  } catch (e) {
+    console.error(e);
+    alert(e.message);
+  }
+});
+
+buttonCaptureAudioStop.addEventListener('click', async () => {
+  tabAudioRecorder.stop();
+  show(buttonCaptureAudio);
+  hide(buttonCaptureAudioStop);
+});
+
 async function uploadFiles(files, type) {
   for (const file of files) {
     // getting base64 from file to render in DOM
@@ -168,6 +221,8 @@ buttonReset.addEventListener('click', async () => {
   model = null;
   elementAudioFile.value = null;
   elementImageFile.value = null;
+  screenshotCount = 0;
+  audioRecordingCount = 0;
 });
 
 function showLoading() {

diff --git a/functional-samples/ai.gemini-in-the-cloud/sidepanel/screenshot.js b/functional-samples/ai.gemini-in-the-cloud/sidepanel/screenshot.js
@@ -0,0 +1,24 @@
+const getScreenshot = async () => {
+  const imageDataUri = await chrome.tabs.captureVisibleTab({ format: 'png' });
+  const byteString = atob(imageDataUri.split(',')[1]);
+  const ab = new ArrayBuffer(byteString.length);
+  const ia = new Uint8Array(ab);
+  for (let i = 0; i < byteString.length; i++) {
+    ia[i] = byteString.charCodeAt(i);
+  }
+  return {
+    data: ia,
+    base64: await blobToBase64(new Blob([ia], { type: 'image/png' }))
+  };
+};
+
+const blobToBase64 = async (imageDataUri) => {
+  return new Promise((resolve, reject) => {
+    const reader = new FileReader();
+    reader.readAsDataURL(imageDataUri);
+    reader.onloadend = () => resolve(reader.result.split(',')[1]);
+    reader.onerror = reject;
+  });
+};
+
+export default getScreenshot;
diff --git a/functional-samples/ai.gemini-in-the-cloud/sidepanel/tab-audio-recorder.js b/functional-samples/ai.gemini-in-the-cloud/sidepanel/tab-audio-recorder.js
@@ -0,0 +1,60 @@
+export default class TabAudioRecorder {
+  constructor() {
+    this.recorder = null;
+    this.data = [];
+  }
+
+  async start() {
+    /*
+    const currentTab = await chrome.tabs.query({
+      active: true,
+      currentWindow: true
+    });
+    console.log('currentab', currentTab);
+
+    const streamId = await chrome.tabCapture.getMediaStreamId({
+      targetTabId: currentTab.id
+    });
+
+    const media = await navigator.mediaDevices.getUserMedia({
+      audio: {
+        mandatory: {
+          chromeMediaSource: 'tab',
+          chromeMediaSourceId: streamId
+        }
+      }
+    });
+    */
+
+    const media = await navigator.mediaDevices.getDisplayMedia({
+      audio: true,
+      video: false
+    });
+
+    // Continue to play the captured audio to the user.
+    const output = new AudioContext();
+    const source = output.createMediaStreamSource(media);
+    source.connect(output.destination);
+    // Start recording.
+    this.recorder = new MediaRecorder(media, { mimeType: 'audio/wav' });
+    this.recorder.ondataavailable = (event) => this.data.push(event.data);
+    this.recorder.onstop = () => {
+      const blob = new Blob(this.data, { type: 'video/wav' });
+      window.open(URL.createObjectURL(blob), '_blank');
+
+      // Clear state ready for next recording
+      this.recorder = undefined;
+      this.data = [];
+    };
+    this.recorder.start();
+  }
+
+  stop() {
+    if (!this.recorder) {
+      return;
+    }
+    this.recorder.stop();
+    // Stopping the tracks makes sure the recording icon in the tab is removed.
+    this.recorder.stream.getTracks().forEach((t) => t.stop());
+  }
+}