ASR_demo/wav2text.py
2024-12-17 15:47:24 +08:00

21 lines
506 B
Python

from transformers import pipeline
transcriber = pipeline(
"automatic-speech-recognition",
model="./models--BELLE-2--Belle-whisper-large-v3-turbo-zh"
)
transcriber.model.config.forced_decoder_ids = (
transcriber.tokenizer.get_decoder_prompt_ids(
language="zh",
task="transcribe"
)
)
# 添加 return_timestamps=True
transcription = transcriber("audio.wav", return_timestamps=True)
print(transcription)
with open("audio.txt", "w", encoding="utf-8") as f:
f.write(str(transcription))