37 lines
1.3 KiB
Python
37 lines
1.3 KiB
Python
from funasr import AutoModel
|
|
from typing import List, Dict, Any
|
|
from src.pydantic_models import VADResponse
|
|
import time
|
|
|
|
def vad_model_use_online(file_path: str) -> List[Dict[str, Any]]:
|
|
chunk_size = 100 # ms
|
|
model = AutoModel(model="fsmn-vad", model_revision="v2.0.4", disable_update=True)
|
|
|
|
vad_result = VADResponse()
|
|
vad_result.time_chunk_index_callback = lambda index: print(f"回调: {index}")
|
|
items = []
|
|
import soundfile
|
|
|
|
speech, sample_rate = soundfile.read(file_path)
|
|
chunk_stride = int(chunk_size * sample_rate / 1000)
|
|
|
|
cache = {}
|
|
total_chunk_num = int(len((speech)-1)/chunk_stride+1)
|
|
for i in range(total_chunk_num):
|
|
time.sleep(0.1)
|
|
speech_chunk = speech[i*chunk_stride:(i+1)*chunk_stride]
|
|
is_final = i == total_chunk_num - 1
|
|
res = model.generate(input=speech_chunk, cache=cache, is_final=is_final, chunk_size=chunk_size)
|
|
if len(res[0]["value"]):
|
|
vad_result += VADResponse.from_raw(res)
|
|
for item in res[0]["value"]:
|
|
items.append(item)
|
|
vad_result.process_time_chunk()
|
|
|
|
# for item in items:
|
|
# print(item)
|
|
return vad_result
|
|
|
|
if __name__ == "__main__":
|
|
vad_result = vad_model_use_online("tests/vad_example.wav")
|
|
# print(vad_result) |