Skip to content

Commit

Permalink
first working version
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastianbenz committed Sep 4, 2024
1 parent 81b4d30 commit 2656434
Show file tree
Hide file tree
Showing 6 changed files with 169 additions and 13 deletions.
2 changes: 1 addition & 1 deletion functional-samples/ai.gemini-in-the-cloud/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"background": {
"service_worker": "background.js"
},
"permissions": ["sidePanel", "tabs"],
"permissions": ["activeTab", "scripting", "sidePanel"],
"host_permissions": ["<all_urls>"],
"side_panel": {
"default_path": "sidepanel/index.html"
Expand Down
9 changes: 2 additions & 7 deletions functional-samples/ai.gemini-in-the-cloud/sidepanel/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,8 @@ button.primary {
color: white;
}

button.img {
background: #333;
color: white;
padding: 0 4px;
width: 32px;
height: 32px;
min-width: 32px;
button > img {
vertical-align: middle;
}

button.secondary {
Expand Down
30 changes: 26 additions & 4 deletions functional-samples/ai.gemini-in-the-cloud/sidepanel/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
<link rel="stylesheet" type="text/css" href="index.css" />
</head>
<body>
<h1>Google Gemini</h1>
<textarea
id="input-prompt"
placeholder='Type something, e.g. "Write a haiku about Chrome Extensions"'
Expand Down Expand Up @@ -33,27 +32,50 @@ <h1>Google Gemini</h1>
accept="image/*"
style="display: none"
/>
<button id="button-add-image" class="primary img">
<button id="button-add-image" class="primary">
<img
src="/images/screenshot-monitor-24.svg"
width="24"
height="24"
alt="take screenshot"
/>
Add image
</button>
<button id="button-capture-image" class="primary">
<img
src="/images/screenshot-monitor-24.svg"
width="24"
height="24"
alt="capture screenshot"
/>
Take Screenshot
</button>
<input
type="file"
id="audio-file"
accept="audio/wav, audio/mp3"
style="display: none"
/>
<button id="button-record-audio" class="primary img">
<button id="button-record-audio" class="primary" disabled>
<img
src="/images/record-audio-24.svg"
width="24"
height="24"
alt="take screenshot"
alt="Upload an audio file"
/>
Record audio
</button>
<button id="button-capture-audio" class="primary">
<img
src="/images/record-audio-24.svg"
width="24"
height="24"
alt="Capture tab audio"
/>
Add audio file
</button>
<button id="button-capture-audio-stop" class="primary" hidden>
Stop recording
</button>
</div>
<div>
Expand Down
57 changes: 56 additions & 1 deletion functional-samples/ai.gemini-in-the-cloud/sidepanel/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ import {
HarmCategory
} from '../node_modules/@google/generative-ai/dist/index.mjs';

import TabAudioRecorder from './tab-audio-recorder';
import getScreenshot from './screenshot';

// Important! Do not expose your API in your extension code. You have to
// options:
//
Expand All @@ -14,14 +17,18 @@ import {
//
// It is only OK to put your API key into this file if you're the only
// user of your extension or for testing.
const apiKey = 'AIzaSyA0tBKTK1ieL1Dyb2AsYk7q_tGsb6jN4o8';
const apiKey = 'AIzaSyAiTmPF3fbapEgNtpkVyAOxNb0GZbvLfyE';

let genAI = null;
let model = null;
let generationConfig = {
temperature: 1
};
let promptFiles = [];
let screenshotCount = 0;
let audioRecordingCount = 0;

const tabAudioRecorder = new TabAudioRecorder();

const inputPrompt = document.body.querySelector('#input-prompt');
const buttonPrompt = document.body.querySelector('#button-prompt');
Expand All @@ -36,6 +43,11 @@ const buttonAudioFile = document.getElementById('button-record-audio');
const elementImageFile = document.getElementById('image-file');
const buttonImageFile = document.getElementById('button-add-image');
const listFiles = document.getElementById('files-list');
const buttonCaptureAudio = document.getElementById('button-capture-audio');
const buttonCaptureAudioStop = document.getElementById(
'button-capture-audio-stop'
);
const buttonCaptureImage = document.getElementById('button-capture-image');

buttonImageFile.addEventListener(
'click',
Expand All @@ -55,10 +67,51 @@ buttonAudioFile.addEventListener(
},
false
);

elementAudioFile.addEventListener('change', async () => {
uploadFiles(elementAudioFile.files, 'audio');
});

buttonCaptureImage.addEventListener('click', async () => {
try {
const screenshot = await getScreenshot();
console.log('screenshot', screenshot);
const fileName =
screenshotCount > 0 ? `Screenshot_${screenshotCount + 1}` : 'Screenshot';
renderUploadedFile(fileName, 'image', screenshot.data);
const imagePart = {
inlineData: { data: screenshot.base64, mimeType: 'image/png' }
};
promptFiles.push({
name: fileName,
preview: screenshot,
mimeType: 'image/png',
imagePart
});
screenshotCount++;
} catch (e) {
console.log(e);
alert(e.message);
}
});

buttonCaptureAudio.addEventListener('click', async () => {
try {
await tabAudioRecorder.start();
hide(buttonCaptureAudio);
show(buttonCaptureAudioStop);
} catch (e) {
console.error(e);
alert(e.message);
}
});

buttonCaptureAudioStop.addEventListener('click', async () => {
tabAudioRecorder.stop();
show(buttonCaptureAudio);
hide(buttonCaptureAudioStop);
});

async function uploadFiles(files, type) {
for (const file of files) {
// getting base64 from file to render in DOM
Expand Down Expand Up @@ -168,6 +221,8 @@ buttonReset.addEventListener('click', async () => {
model = null;
elementAudioFile.value = null;
elementImageFile.value = null;
screenshotCount = 0;
audioRecordingCount = 0;
});

function showLoading() {
Expand Down
24 changes: 24 additions & 0 deletions functional-samples/ai.gemini-in-the-cloud/sidepanel/screenshot.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
const getScreenshot = async () => {
const imageDataUri = await chrome.tabs.captureVisibleTab({ format: 'png' });
const byteString = atob(imageDataUri.split(',')[1]);
const ab = new ArrayBuffer(byteString.length);
const ia = new Uint8Array(ab);
for (let i = 0; i < byteString.length; i++) {
ia[i] = byteString.charCodeAt(i);
}
return {
data: ia,
base64: await blobToBase64(new Blob([ia], { type: 'image/png' }))
};
};

const blobToBase64 = async (imageDataUri) => {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.readAsDataURL(imageDataUri);
reader.onloadend = () => resolve(reader.result.split(',')[1]);
reader.onerror = reject;
});
};

export default getScreenshot;
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
export default class TabAudioRecorder {
constructor() {
this.recorder = null;
this.data = [];
}

async start() {
/*
const currentTab = await chrome.tabs.query({
active: true,
currentWindow: true
});
console.log('currentab', currentTab);
const streamId = await chrome.tabCapture.getMediaStreamId({
targetTabId: currentTab.id
});
const media = await navigator.mediaDevices.getUserMedia({
audio: {
mandatory: {
chromeMediaSource: 'tab',
chromeMediaSourceId: streamId
}
}
});
*/

const media = await navigator.mediaDevices.getDisplayMedia({
audio: true,
video: false
});

// Continue to play the captured audio to the user.
const output = new AudioContext();
const source = output.createMediaStreamSource(media);
source.connect(output.destination);
// Start recording.
this.recorder = new MediaRecorder(media, { mimeType: 'audio/wav' });
this.recorder.ondataavailable = (event) => this.data.push(event.data);
this.recorder.onstop = () => {
const blob = new Blob(this.data, { type: 'video/wav' });
window.open(URL.createObjectURL(blob), '_blank');

// Clear state ready for next recording
this.recorder = undefined;
this.data = [];
};
this.recorder.start();
}

stop() {
if (!this.recorder) {
return;
}
this.recorder.stop();
// Stopping the tracks makes sure the recording icon in the tab is removed.
this.recorder.stream.getTracks().forEach((t) => t.stop());
}
}

0 comments on commit 2656434

Please sign in to comment.