Skip to content

Commit

Permalink
한국어 STT API 테스트, 탐색 및 server.py 수정
Browse files Browse the repository at this point in the history
  • Loading branch information
phi-mint committed Oct 31, 2023
1 parent 19a27ab commit 33f6738
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 49 deletions.
97 changes: 57 additions & 40 deletions fastapi_backend/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -579,49 +579,66 @@ async def transcribe_api_endpoint(client_id: str = Form(...),
file: UploadFile = File(...)):

# Save temporary audio file
extension = os.path.splitext(file.filename)[1] # Get the file extension
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
audio_file = await file.read()
temp_audio_file.write(audio_file)
temp_audio_file.close()

wav_path = convert_audio_to_wav(temp_audio_file.name)

resp_token = requests.post(
'https://openapi.vito.ai/v1/authenticate',
data={'client_id': client_id,
'client_secret': client_secret}
)
resp_token.raise_for_status()
access_token = resp_token.json()['access_token']

time.sleep(5)

config = {
"diarization": {
"use_verification": False
},
"use_multi_channel": False
}
################## VITO API #################################
# Save temporary audio file
# extension = os.path.splitext(file.filename)[1] # Get the file extension
# temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=extension)
# audio_file = await file.read()

# temp_audio_file.write(audio_file)
# temp_audio_file.close()

# print(temp_audio_file)
# wav_path = convert_audio_to_wav(temp_audio_file.name)
# print(wav_path)

# resp_token = requests.post(
# 'https://openapi.vito.ai/v1/authenticate',
# data={'client_id': client_id,
# 'client_secret': client_secret}
# )
# resp_token.raise_for_status()
# access_token = resp_token.json()['access_token']

# time.sleep(5)

# config = {
# "diarization": {
# "use_verification": False
# },
# "use_multi_channel": False
# }

# resp_api = requests.post(
# 'https://openapi.vito.ai/v1/transcribe',
# headers={'Authorization': 'bearer '+ access_token},
# data={'config': json.dumps(config)},
# files={'file': open(wav_path, 'rb')}
# )

# resp_api.raise_for_status()
# api_id = resp_api.json()['id']

# resp_msg = requests.get(
# 'https://openapi.vito.ai/v1/transcribe/'+ api_id,
# headers={'Authorization': 'bearer '+ access_token},
# )

# transcription = resp_msg.json()['results']['utterances'][0]['msg']

################## VITO API Code End #################################

# openai whisper api code 추가
openai.api_key = api_key

resp_api = requests.post(
'https://openapi.vito.ai/v1/transcribe',
headers={'Authorization': 'bearer '+ access_token},
data={'config': json.dumps(config)},
files={'file': open(wav_path, 'rb')}
)

resp_api.raise_for_status()
api_id = resp_api.json()['id']
audio_file = await file.read()

time.sleep(20)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
temp_file.write(audio_file)
temp_file_path = temp_file.name

resp_msg = requests.get(
'https://openapi.vito.ai/v1/transcribe/'+ api_id,
headers={'Authorization': 'bearer '+ access_token},
)

transcription = resp_msg.json()['results']['utterances'][0]['msg']
with open(temp_file_path, 'rb') as open_audio_file:
transcription = openai.Audio.transcribe(model="whisper-1", file=open_audio_file, response_format="text", language='ko')

return {"transcription": transcription}
########### Speech2Text End #############
19 changes: 10 additions & 9 deletions streamlit_frontend/speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def main():

left_column, right_column = st.columns(2)
left_column.caption(translate('warning', st.session_state.ko_en))
uploaded_file = left_column.file_uploader(translate('choose_audio', st.session_state.ko_en), type=["wav", "mp3", "flac", "ogg"])
uploaded_file = left_column.file_uploader(translate('choose_audio', st.session_state.ko_en), type=["wav", "mp3", "flac", "ogg", "wma", "aac"])

mode = st.radio(translate('radio', st.session_state.ko_en), (translate('korean', st.session_state.ko_en),
translate('english', st.session_state.ko_en)), horizontal=True)
Expand All @@ -33,36 +33,37 @@ def main():
unsafe_allow_html=True,
)

client_id_input, client_secret_input, _, _ = st.columns(4)
api_key_input, _, _, _ = st.columns(4)

client_id = client_id_input.text_input(translate('Client_ID', st.session_state.ko_en),value="")
api_key = api_key_input.text_input(translate('api_key', st.session_state.ko_en), value="", type='password')

client_secret = client_secret_input.text_input(translate('Client_Secret', st.session_state.ko_en),value="", type='password')
# client_secret = client_secret_input.text_input(translate('Client_Secret', st.session_state.ko_en),value="", type='password')

_,_,_,l4_column,_,_,_,_ = st.columns(8)
submit_button = l4_column.button(translate('transcribe_button', st.session_state.ko_en), use_container_width=True)

if uploaded_file is not None :
st.session_state.transcription = None

right_column.markdown('#')
right_column.audio(uploaded_file)

if len(client_id) > 0 and len(client_secret) > 0 :
if len(api_key) > 0 :

wav_data = io.BytesIO(uploaded_file.read())

if submit_button:
response = requests.post(f"{BACKEND_URL}/speech_to_text_api", files={"file": wav_data}, data={'client_id': client_id,
'client_secret': client_secret})
response = requests.post(f"{BACKEND_URL}/speech_to_text_api", files={"file": wav_data}, data={'api_key': api_key})
if response.status_code != 200:
st.error(response.status_code)
else:
transcription = response.json()["transcription"]

st.session_state.transcription = ''.join(transcription)
st.text_area(label = "Transcription:", value = st.session_state.transcription, disabled=True)

_, right_column = st.columns(2)
right_column.caption('<div style="text-align: right;">Model Api: https://developers.vito.ai/</div>', unsafe_allow_html=True)
right_column.caption('<div style="text-align: right;">Model Api: https://platform.openai.com/docs/guides/speech-to-text</div>', unsafe_allow_html=True)

elif mode == 'english' or mode == '영어':
_,_,_,l4_column,_,_,_,_ = st.columns(8)
Expand Down

0 comments on commit 33f6738

Please sign in to comment.