122 lines
3.5 KiB
Python
122 lines
3.5 KiB
Python
"""
|
|
模型使用测试
|
|
此处主要用于各类调用模型的处理数据与输出格式
|
|
请在主目录下test_main.py中调用
|
|
将需要测试的模型定义在函数中进行测试, 函数名称需要与测试内容匹配。
|
|
"""
|
|
|
|
from funasr import AutoModel
|
|
from typing import List, Dict, Any
|
|
from src.models import VADResponse
|
|
import time
|
|
|
|
|
|
def vad_model_use_online(file_path: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
在线VAD模型使用
|
|
"""
|
|
chunk_size = 100 # ms
|
|
model = AutoModel(model="fsmn-vad", model_revision="v2.0.4", disable_update=True)
|
|
|
|
vad_result = VADResponse()
|
|
vad_result.time_chunk_index_callback = lambda index: print(f"回调: {index}")
|
|
items = []
|
|
import soundfile
|
|
|
|
speech, sample_rate = soundfile.read(file_path)
|
|
chunk_stride = int(chunk_size * sample_rate / 1000)
|
|
|
|
cache = {}
|
|
total_chunk_num = int(len((speech) - 1) / chunk_stride + 1)
|
|
for i in range(total_chunk_num):
|
|
time.sleep(0.1)
|
|
speech_chunk = speech[i * chunk_stride : (i + 1) * chunk_stride]
|
|
is_final = i == total_chunk_num - 1
|
|
res = model.generate(
|
|
input=speech_chunk, cache=cache, is_final=is_final, chunk_size=chunk_size
|
|
)
|
|
if len(res[0]["value"]):
|
|
vad_result += VADResponse.from_raw(res)
|
|
for item in res[0]["value"]:
|
|
items.append(item)
|
|
vad_result.process_time_chunk()
|
|
|
|
# for item in items:
|
|
# print(item)
|
|
return vad_result
|
|
|
|
|
|
def vad_model_use_online_logic(file_path: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
在线VAD模型使用
|
|
测试LogicTrager
|
|
在Rebuild版本后LogicTrager中已弃用
|
|
"""
|
|
from src.logic_trager import LogicTrager
|
|
import soundfile
|
|
|
|
from src.config import parse_args
|
|
|
|
args = parse_args()
|
|
|
|
# from src.functor.model_loader import load_models
|
|
# models = load_models(args)
|
|
from src.model_loader import ModelLoader
|
|
|
|
models = ModelLoader(args)
|
|
|
|
chunk_size = 200 # ms
|
|
from src.models import AudioBinary_Config
|
|
import soundfile
|
|
|
|
speech, sample_rate = soundfile.read(file_path)
|
|
chunk_stride = int(chunk_size * sample_rate / 1000)
|
|
audio_config = AudioBinary_Config(
|
|
sample_rate=sample_rate, sample_width=2, channels=1, chunk_size=chunk_size
|
|
)
|
|
|
|
logic_trager = LogicTrager(models=models, audio_config=audio_config)
|
|
for i in range(len(speech) // chunk_stride + 1):
|
|
speech_chunk = speech[i * chunk_stride : (i + 1) * chunk_stride]
|
|
logic_trager.push_binary_data(speech_chunk)
|
|
|
|
# for item in items:
|
|
# print(item)
|
|
return None
|
|
|
|
|
|
def asr_model_use_offline(file_path: str) -> List[Dict[str, Any]]:
|
|
"""
|
|
ASR模型使用
|
|
离线ASR模型使用
|
|
"""
|
|
from funasr import AutoModel
|
|
|
|
model = AutoModel(
|
|
model="paraformer-zh",
|
|
model_revision="v2.0.4",
|
|
vad_model="fsmn-vad",
|
|
vad_model_revision="v2.0.4",
|
|
# punc_model="ct-punc-c", punc_model_revision="v2.0.4",
|
|
spk_model="cam++",
|
|
spk_model_revision="v2.0.2",
|
|
spk_mode="vad_segment",
|
|
auto_update=False,
|
|
)
|
|
|
|
import soundfile
|
|
|
|
from src.models import AudioBinary_Config
|
|
import soundfile
|
|
|
|
speech, sample_rate = soundfile.read(file_path)
|
|
result = model.generate(speech)
|
|
return result
|
|
|
|
|
|
# if __name__ == "__main__":
|
|
# 请在主目录下调用test_main.py文件进行测试
|
|
# vad_result = vad_model_use_online("tests/vad_example.wav")
|
|
# vad_result = vad_model_use_online_logic("tests/vad_example.wav")
|
|
# print(vad_result)
|