from funasr import AutoModel from typing import List, Dict, Any from src.pydantic_models import VADResponse import time def vad_model_use_online(file_path: str) -> List[Dict[str, Any]]: chunk_size = 100 # ms model = AutoModel(model="fsmn-vad", model_revision="v2.0.4", disable_update=True) vad_result = VADResponse() vad_result.time_chunk_index_callback = lambda index: print(f"回调: {index}") items = [] import soundfile speech, sample_rate = soundfile.read(file_path) chunk_stride = int(chunk_size * sample_rate / 1000) cache = {} total_chunk_num = int(len((speech)-1)/chunk_stride+1) for i in range(total_chunk_num): time.sleep(0.1) speech_chunk = speech[i*chunk_stride:(i+1)*chunk_stride] is_final = i == total_chunk_num - 1 res = model.generate(input=speech_chunk, cache=cache, is_final=is_final, chunk_size=chunk_size) if len(res[0]["value"]): vad_result += VADResponse.from_raw(res) for item in res[0]["value"]: items.append(item) vad_result.process_time_chunk() # for item in items: # print(item) return vad_result if __name__ == "__main__": vad_result = vad_model_use_online("tests/vad_example.wav") # print(vad_result)