-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinal_project
404 lines (352 loc) · 14.4 KB
/
final_project
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
!pip install folium
!pip install geopy
!pip install SpeechRecognition
!sudo apt-get install portaudio19-dev
!pip install PyAudio
!pip install librosa
!pip install soundfile
!apt-get install -y ffmpeg
!pip install branca
#-------------------------------------------------------
# Import necessary libraries
from IPython.display import display, Javascript
import base64
import speech_recognition as sr
from geopy.geocoders import Nominatim
import librosa
import soundfile as sf
import IPython.display as ipd
import subprocess
import os
import time
# Improved JavaScript function to capture audio in the browser
js = """
async function recordAudio() {
const synth = window.speechSynthesis;
const utterance1 = new SpeechSynthesisUtterance('Speak slowly the city, state, and country you want to geolocate.');
synth.speak(utterance1);
await new Promise(resolve => setTimeout(resolve, 7000)); // Wait for the first message to complete
const utterance2 = new SpeechSynthesisUtterance('Then say "google" to begin and "siri" to end the recording.');
synth.speak(utterance2);
const div = document.createElement('div');
const audio = document.createElement('audio');
const statusLabel = document.createElement('p');
const instructionLabel = document.createElement('p');
const recognitionStatusLabel = document.createElement('p');
instructionLabel.textContent = 'Speak slowly the city, state, and country you want to geolocate:';
instructionLabel.style.color = 'green';
instructionLabel.style.marginBottom = '10px';
statusLabel.textContent = 'Ready to record...';
statusLabel.style.color = 'blue';
recognitionStatusLabel.style.color = 'red';
document.body.appendChild(div);
div.appendChild(instructionLabel);
div.appendChild(statusLabel);
div.appendChild(recognitionStatusLabel);
div.appendChild(audio);
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
let recorder = new MediaRecorder(stream);
let chunks = [];
audio.style.display = 'block';
audio.srcObject = stream;
audio.controls = true;
audio.muted = true;
// Set up speech recognition for start/stop commands
const recognition = new webkitSpeechRecognition();
recognition.lang = 'en-US';
recognition.continuous = true;
recognition.interimResults = false;
let isRecording = false;
recognition.onresult = (event) => {
const transcript = event.results[event.results.length - 1][0].transcript.trim().toLowerCase();
if (transcript.includes('google') && !isRecording) {
recorder.start();
isRecording = true;
statusLabel.textContent = 'Recording...';
recognitionStatusLabel.textContent = 'Command recognized: starting recording';
console.log('starting recording');
} else if (transcript.includes('siri') && isRecording) {
recorder.stop();
recognition.stop();
isRecording = false;
statusLabel.textContent = 'Recording stopped. Processing...';
recognitionStatusLabel.textContent = 'Command recognized: stopping recording';
console.log('stopping recording');
} else {
recognitionStatusLabel.textContent = 'Command not recognized. Please say "google" or "siri".';
console.log('command not recognized');
}
};
recognition.start();
recorder.ondataavailable = e => chunks.push(e.data);
await new Promise(resolve => recorder.onstop = resolve);
stream.getTracks().forEach(track => track.stop());
div.remove();
// Use the chunks for the actual recorded audio
const audioBlob = new Blob(chunks);
const audioArrayBuffer = await audioBlob.arrayBuffer();
return btoa(String.fromCharCode.apply(null, new Uint8Array(audioArrayBuffer)));
}
"""
# Function to call audio recording
def record_audio():
display(Javascript(js))
from google.colab import output
return output.eval_js("recordAudio()")
# Function to check and remove existing file
def remove_file_if_exists(file_path):
if os.path.exists(file_path):
os.remove(file_path)
print(f"Removed existing file: {file_path}")
# Function to convert audio using ffmpeg
def convert_audio(input_file, output_file):
# Check and remove existing output file
remove_file_if_exists(output_file)
# Command to convert audio
ffmpeg_command = ['ffmpeg', '-i', input_file, output_file]
result = subprocess.run(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print(result.stderr.decode()) # Print any ffmpeg error message
# Audio recording and processing
audio_base64 = record_audio()
if audio_base64 is None:
raise ValueError("The recorded audio is empty. Please try recording again.")
audio_data = base64.b64decode(audio_base64)
with open("audio.webm", "wb") as file:
file.write(audio_data)
print("Audio recorded to:", file.name)
# Check if the audio file was saved correctly
if os.path.getsize("audio.webm") == 0:
raise ValueError("The recorded audio is empty. Please try recording again.")
# Convert audio to PCM WAV using ffmpeg
convert_audio('audio.webm', 'audio.wav')
# Check if the file was created
if not os.path.exists("audio.wav"):
raise FileNotFoundError("audio.wav not found. Conversion might have failed.")
try:
# Load and display the recorded audio
audio, sample_rate = librosa.load('audio.wav', sr=None)
ipd.display(ipd.Audio(data=audio, rate=sample_rate))
except Exception as e:
print(f"Error loading or displaying audio: {e}")
# Speech recognition for transcription
recognizer = sr.Recognizer()
with sr.AudioFile('audio.wav') as source:
audio_recorded = recognizer.record(source)
# List of languages to try recognizing
languages = ['pt-BR', 'en-US', 'es-ES']
location_input = None
for language in languages:
try:
print(f"Trying to recognize speech in {language}...")
location_input = recognizer.recognize_google(audio_recorded, language=language)
# Remove the 'siri' command from transcription, if present
if "siri" in location_input.lower():
location_input = location_input.lower().replace("siri", "").strip()
print(f"Transcribed location input in {language}: {location_input}")
break # If recognition is successful, exit the loop
except sr.UnknownValueError:
print(f"Could not understand audio in {language}")
except sr.RequestError as e:
print(f"Failed to retrieve results for {language}; check the network connection")
if location_input is None:
print("Failed to recognize speech in all provided languages.")
# Geocoding and map updating with geopy
geolocator = Nominatim(user_agent="geopy_example")
location = geolocator.geocode(location_input)
if location:
print("Location found:", location.address)
print("Latitude:", location.latitude, "\nLongitude:", location.longitude)
else:
print("No location found for the input:", location_input)
#-------------------------------------------------------------------------------
# Import libraries for Flask server
from flask import Flask, render_template_string, request
from werkzeug.serving import make_server
import threading
import IPython.display
# Create Flask application
app = Flask(__name__)
# Function to render HTML page with controls and the map
def render_page(lat, lon, address):
return render_template_string('''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Map with Voice Reading Control</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.7.1/leaflet.css" />
<style>
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
display: flex;
flex-direction: column;
align-items: center;
height: 100vh;
overflow: hidden;
}
#controls {
display: flex;
justify-content: center;
padding: 10px;
width: 100%;
background-color: #f4f4f4;
border: 1px solid #ddd;
border-radius: 5px;
z-index: 1000;
}
button {
padding: 10px 20px;
font-size: 16px;
margin: 5px;
border: none;
border-radius: 5px;
cursor: pointer;
}
#activateButton {
background-color: #f44336;
color: white;
}
#stopButton {
background-color: #2196F3;
color: white;
}
#map-container {
width: 100%;
height: calc(100vh - 70px); /* Adjusts the map height */
max-width: 800px;
position: relative;
}
#map {
width: 100%;
height: 100%;
}
</style>
</head>
<body>
<div id="controls">
<button id="activateButton">🚫🔊 Disable voice reading</button>
<button id="stopButton">⏹️ Stop voice reading</button>
</div>
<div id="map-container">
<div id="map"></div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/leaflet/1.7.1/leaflet.js"></script>
<script>
// JavaScript code for controlling voice reading and monitoring map zoom level
var synth = window.speechSynthesis;
var msg = new SpeechSynthesisUtterance();
msg.text = "";
// Function to speak the text
function speak(text) {
msg.text = text;
synth.speak(msg);
}
// Event to disable voice reading
document.getElementById('activateButton').onclick = function() {
synth.cancel();
alert('Voice reading disabled');
}
// Event to stop voice reading
document.getElementById('stopButton').onclick = function() {
synth.cancel();
alert('Voice reading stopped');
}
// Function to get the map zoom level and read it out loud
function readZoomLevel() {
var zoomLevel = map.getZoom();
speak('Zoom level ' + zoomLevel);
}
// Add zoom change event to the map
var map = L.map('map').setView([{{ lat }}, {{ lon }}], 13);
L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
maxZoom: 19,
}).addTo(map);
map.on('zoomend', function() {
readZoomLevel();
});
// Add marker to the map
var marker = L.marker([{{ lat }}, {{ lon }}]).addTo(map)
.bindPopup('{{ address }}')
.openPopup();
</script>
</body>
</html>
''', lat=lat, lon=lon, address=address)
@app.route('/')
def index():
lat = request.args.get('lat', default=45.5236, type=float)
lon = request.args.get('lon', default=-122.6750, type=float)
address = request.args.get('address', default='Default Location', type=str)
return render_page(lat, lon, address)
class ServerThread(threading.Thread):
def __init__(self, app, port):
threading.Thread.__init__(self)
self.port = port
self.srv = make_server('127.0.0.1', port, app)
self.ctx = app.app_context()
self.ctx.push()
def run(self):
print('Starting server on port:', self.port)
self.srv.serve_forever()
def shutdown(self):
self.srv.shutdown()
def start_server(port=6060):
global server
if 'server' in globals() and server:
print('Stopping server')
stop_server()
server = ServerThread(app, port)
server.start()
def stop_server():
global server
if server:
server.shutdown()
server = None
# Start Flask server
start_server()
#-------------------------------------------------------------------------------
# Geocoding and map updating with geopy
geolocator = Nominatim(user_agent="geopy_example")
location = geolocator.geocode(location_input)
if location:
print("Location found:", location.address)
print("Latitude:", location.latitude, "\nLongitude:", location.longitude)
# Open Flask server with specified location
params = {
'lat': location.latitude,
'lon': location.longitude,
'address': location.address
}
url = f"http://127.0.0.1:6060/?lat={params['lat']}&lon={params['lon']}&address={params['address']}"
# Wait for the server to start
time.sleep(2)
# Display the Flask app in the notebook
def display_map(port, height):
shell = """
(async () => {
const url = await google.colab.kernel.proxyPort(%PORT%, {"cache": true});
const iframe = document.createElement('iframe');
iframe.src = url + '?lat=' + %LAT% + '&lon=' + %LON% + '&address=' + encodeURIComponent('%ADDRESS%');
iframe.setAttribute('width', '100%');
iframe.setAttribute('height', '%HEIGHT%');
iframe.setAttribute('frameborder', 0);
document.body.appendChild(iframe);
})();
"""
replacements = [
("%PORT%", "%d" % port),
("%LAT%", "%f" % params['lat']),
("%LON%", "%f" % params['lon']),
("%ADDRESS%", "%s" % params['address']),
("%HEIGHT%", "%d" % height),
]
for (k, v) in replacements:
shell = shell.replace(k, v)
script = IPython.display.Javascript(shell)
IPython.display.display(script)
display_map(6060, 600)
else:
print("No location found for the input:", location_input)