60 lines
2.0 KiB
Python
60 lines
2.0 KiB
Python
from funasr import AutoModel
|
|
from typing import List, Dict, Any
|
|
from src.models import VADResponse
|
|
from src.models import AudioBinary_Config
|
|
from src.functor.audiochunk import AudioChunk
|
|
from src.models import AudioBinary_Chunk
|
|
from typing import Callable
|
|
|
|
class VAD:
|
|
|
|
def __init__(self,
|
|
VAD_model = None,
|
|
audio_config : AudioBinary_Config = None,
|
|
callback: Callable = None,
|
|
):
|
|
# vad model
|
|
self.VAD_model = VAD_model
|
|
if self.VAD_model is None:
|
|
self.VAD_model = AutoModel(model="fsmn-vad", model_revision="v2.0.4", disable_update=True)
|
|
# audio config
|
|
self.audio_config = audio_config
|
|
# vad result
|
|
self.vad_result = VADResponse(time_chunk_index_callback=callback)
|
|
# audio binary poll
|
|
self.audio_chunk = AudioChunk(
|
|
audio_config=self.audio_config
|
|
)
|
|
self.cache = {}
|
|
|
|
def push_binary_data(self,
|
|
binary_data: bytes,
|
|
):
|
|
# 压入二进制数据
|
|
self.audio_chunk.add_chunk(binary_data)
|
|
# 处理音频块
|
|
res = self.VAD_model.generate(input=binary_data,
|
|
cache=self.cache,
|
|
chunk_size=self.audio_config.chunk_size,
|
|
is_final=False)
|
|
# print("VAD generate", res)
|
|
if len(res[0]["value"]):
|
|
self.vad_result += VADResponse.from_raw(res)
|
|
|
|
def set_callback(self,
|
|
callback: Callable,
|
|
):
|
|
self.vad_result.time_chunk_index_callback = callback
|
|
|
|
def process_vad_result(self, callback: Callable = None):
|
|
# 处理VAD结果
|
|
callback = callback if callback is not None else self.vad_result.time_chunk_index_callback
|
|
self.vad_result.process_time_chunk(
|
|
lambda x : callback(
|
|
AudioBinary_Chunk(
|
|
start_time=x["start_time"],
|
|
end_time=x["end_time"],
|
|
chunk=self.audio_chunk.get_chunk(x["start_time"], x["end_time"])
|
|
)
|
|
)
|
|
) |