from funasr import AutoModel from typing import List, Dict, Any from src.models import VADResponse from src.models import AudioBinary_Config from src.functor.audiochunk import AudioChunk from src.models import AudioBinary_Chunk from typing import Callable class VAD: def __init__(self, VAD_model = None, audio_config : AudioBinary_Config = None, callback: Callable = None, ): # vad model self.VAD_model = VAD_model if self.VAD_model is None: self.VAD_model = AutoModel(model="fsmn-vad", model_revision="v2.0.4", disable_update=True) # audio config self.audio_config = audio_config # vad result self.vad_result = VADResponse(time_chunk_index_callback=callback) # audio binary poll self.audio_chunk = AudioChunk( audio_config=self.audio_config ) self.cache = {} def push_binary_data(self, binary_data: bytes, ): # 压入二进制数据 self.audio_chunk.add_chunk(binary_data) # 处理音频块 res = self.VAD_model.generate(input=binary_data, cache=self.cache, chunk_size=self.audio_config.chunk_size, is_final=False) # print("VAD generate", res) if len(res[0]["value"]): self.vad_result += VADResponse.from_raw(res) def set_callback(self, callback: Callable, ): self.vad_result.time_chunk_index_callback = callback def process_vad_result(self, callback: Callable = None): # 处理VAD结果 callback = callback if callback is not None else self.vad_result.time_chunk_index_callback self.vad_result.process_time_chunk( lambda x : callback( AudioBinary_Chunk( start_time=x["start_time"], end_time=x["end_time"], chunk=self.audio_chunk.get_chunk(x["start_time"], x["end_time"]) ) ) )