[说话人认证]SPKFunctor完成本地说话人embs/wav加载。

This commit is contained in:
Ziyang.Zhang 2025-07-01 15:33:29 +08:00
parent 4e9dd83d55
commit 1a296d8309
22 changed files with 546 additions and 141 deletions

13
data/denoise.py Normal file
View File

@ -0,0 +1,13 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
ans = pipeline(
Tasks.acoustic_noise_suppression,
model='iic/speech_frcrn_ans_cirm_16k')
wav_file = 'speaker_wav/HaiaoDuan.wav'
output_path = 'denoise_output/HaiaoDuan_denoise_output.wav'
result = ans(
wav_file,
output_path=output_path)

Binary file not shown.

Binary file not shown.

35
data/record.py Normal file
View File

@ -0,0 +1,35 @@
"""
本地录音保存为wav格式存储在data/speaker_wav目录下
"""
import pyaudio
import wave
def record_audio(filename, duration=5, format=pyaudio.paInt16, channels=1, rate=16000):
"""
本地录音保存为wav格式存储在data/speaker_wav目录下
"""
p = pyaudio.PyAudio()
stream = p.open(format=format, channels=channels, rate=rate, input=True, frames_per_buffer=1024)
print("按下回车键开始录音...")
input()
frames = []
for i in range(0, int(rate / 1024 * duration)):
data = stream.read(1024)
frames.append(data)
print("录音结束")
stream.stop_stream()
stream.close()
p.terminate()
wav_file = wave.open(filename, 'wb')
wav_file.setnchannels(channels)
wav_file.setsampwidth(p.get_sample_size(format))
wav_file.setframerate(rate)
wav_file.writeframes(b''.join(frames))
wav_file.close()
if __name__ == "__main__":
record_audio(
"data/speaker_wav/test.wav",
duration=5
)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

400
data/speakers.json Normal file
View File

@ -0,0 +1,400 @@
[
{
"speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b",
"speaker_name": "ZiyangZhang",
"wav_path": "/home/lyg/Code/funasr/data/speaker_wav/ZiyangZhang.wav",
"speaker_embs": [
-0.4249887466430664,
-0.12976674735546112,
1.6118208169937134,
1.3348901271820068,
0.1423041820526123,
0.16940945386886597,
-0.042910803109407425,
0.9634712934494019,
0.9677271246910095,
1.1112406253814697,
-2.0086846351623535,
1.729629635810852,
-0.3664000928401947,
2.4323978424072266,
-1.587996244430542,
-1.0803641080856323,
0.08011860400438309,
1.6515964269638062,
-1.1337167024612427,
-0.5088973045349121,
-1.0002555847167969,
0.11426643282175064,
-0.8616334199905396,
-0.006051262840628624,
0.44800689816474915,
0.6659525632858276,
-0.9864538908004761,
2.1259539127349854,
-0.49345871806144714,
-0.14384664595127106,
0.0742349922657013,
0.25577273964881897,
1.0516602993011475,
1.7297064065933228,
-0.44126248359680176,
1.3971654176712036,
0.04305446520447731,
-2.261837959289551,
-0.355578750371933,
-0.8388981819152832,
0.8178591728210449,
0.016942109912633896,
0.8212596774101257,
1.108891248703003,
-0.5182072520256042,
-0.07741295546293259,
0.9407528042793274,
0.026407398283481598,
-0.6210324168205261,
-2.0659642219543457,
0.13895569741725922,
-1.3570973873138428,
2.236407995223999,
-0.29706746339797974,
1.9819035530090332,
1.3580390214920044,
-0.5505754351615906,
0.7189999222755432,
-0.3190038502216339,
1.1075336933135986,
-1.4158482551574707,
0.20138776302337646,
0.8354343175888062,
0.1671304553747177,
-0.56927490234375,
1.057538390159607,
-0.2868591248989105,
0.005044424440711737,
0.49878695607185364,
-0.7493277192115784,
2.4639663696289062,
0.5516767501831055,
-0.2763596177101135,
-0.8769170641899109,
-1.296872615814209,
-0.5233777165412903,
-0.10551001876592636,
-0.5955559611320496,
-0.6046199202537537,
0.22645621001720428,
1.12480890750885,
-0.3678736388683319,
-1.1580262184143066,
-0.3625229299068451,
0.8251489996910095,
0.3464623987674713,
2.261840581893921,
-0.11341957747936249,
-0.6645990610122681,
0.8480257987976074,
-0.47770705819129944,
0.8085628747940063,
-0.26823946833610535,
-0.25040531158447266,
1.0610276460647583,
-0.14239133894443512,
-1.309299349784851,
-1.0987954139709473,
-0.1301683634519577,
-0.05199439451098442,
-0.07838833332061768,
-0.21310138702392578,
0.29347339272499084,
1.0793802738189697,
-1.813226342201233,
-1.1362330913543701,
-0.13013578951358795,
0.6647212505340576,
-0.34312230348587036,
0.5921282172203064,
0.26284533739089966,
0.9369505047798157,
0.1739131063222885,
0.7924790978431702,
0.3412249982357025,
0.16646981239318848,
-0.32468467950820923,
-0.5835385918617249,
0.05923287197947502,
1.191710352897644,
-0.3653518557548523,
-0.8665252923965454,
0.7419591546058655,
-1.7234965562820435,
0.3421083092689514,
-0.24517370760440826,
-0.8724228143692017,
-0.11004912108182907,
-0.10676378011703491,
-1.0688399076461792,
0.4397974908351898,
-0.9902229309082031,
-0.2676651179790497,
1.4346729516983032,
0.34571582078933716,
0.9091840386390686,
0.41458258032798767,
-0.7863419055938721,
0.6952191591262817,
0.8847752809524536,
0.15871241688728333,
-0.10740098357200623,
-0.5305340886116028,
1.0536329746246338,
-1.337695837020874,
0.23358777165412903,
-0.19285082817077637,
-0.5339606404304504,
-0.6768214106559753,
1.6815600395202637,
-0.36710524559020996,
-0.22888287901878357,
-0.2714850902557373,
-0.0895417258143425,
0.3480932116508484,
-0.19148986041545868,
0.44108960032463074,
0.03198949620127678,
-0.3665091097354889,
-0.6040502786636353,
0.37234461307525635,
-0.07462035119533539,
-0.18109525740146637,
-0.19882601499557495,
0.33298638463020325,
0.039957765489816666,
0.6185765266418457,
1.5921381711959839,
0.04164457693696022,
-0.7556226849555969,
-1.0537445545196533,
0.36932048201560974,
-0.2881639897823334,
-1.3762420415878296,
-0.6029151678085327,
-1.3592504262924194,
0.6726564168930054,
0.06349147856235504,
-0.4627697765827179,
1.1113581657409668,
-1.1767970323562622,
0.3900119662284851,
-0.3050364851951599,
-0.2807784676551819,
-0.7237444519996643,
-0.039161279797554016,
0.5845404267311096,
-0.4385261833667755,
-0.3988557755947113,
-1.235430359840393,
-0.648483395576477,
1.084520936012268
]
},
{
"speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b",
"speaker_name": "HaiaoDuan",
"wav_path": "/home/lyg/Code/funasr/data/speaker_wav/HaiaoDuan.wav",
"speaker_embs": [
-1.3490606546401978,
-0.9654964208602905,
0.6671794652938843,
2.3401081562042236,
-1.374346137046814,
0.24404077231884003,
0.08137784898281097,
0.10915698111057281,
0.8208633065223694,
-1.0312862396240234,
1.721955418586731,
-0.16976028680801392,
-1.0259445905685425,
-0.9134035706520081,
-1.3709611892700195,
-0.6821202635765076,
1.0825326442718506,
1.4931895732879639,
-0.06801076978445053,
-0.5044959187507629,
-1.3154232501983643,
-1.1049765348434448,
0.6122218370437622,
1.1061663627624512,
-0.2288999855518341,
-0.03568289428949356,
-0.9260172247886658,
1.1030527353286743,
-0.7439772486686707,
1.4323620796203613,
0.2221372127532959,
-0.8355774283409119,
0.6758987307548523,
0.8520456552505493,
-0.0186605341732502,
-0.981821596622467,
0.11743613332509995,
-0.3539535701274872,
-0.33924832940101624,
-0.510174036026001,
0.6893219351768494,
-0.10966216027736664,
-1.5873743295669556,
1.7041956186294556,
-0.9844599366188049,
-1.368901252746582,
0.44316115975379944,
-2.406590700149536,
0.9880101680755615,
0.8344699740409851,
0.22896111011505127,
-1.4464795589447021,
2.222980260848999,
-0.22508130967617035,
0.8659772276878357,
0.7801474928855896,
1.824644923210144,
-0.2455991804599762,
-0.06682202965021133,
0.07106778025627136,
-1.8072712421417236,
0.7733234763145447,
0.20490191876888275,
-1.119908094406128,
-1.2623472213745117,
0.34426289796829224,
0.7909225821495056,
0.47128093242645264,
-0.9976771473884583,
-0.6703121662139893,
0.7459381818771362,
1.0664807558059692,
0.659284770488739,
-0.49438077211380005,
0.1974140703678131,
-0.07557231187820435,
-1.324866533279419,
-1.2217090129852295,
-1.0160834789276123,
0.7517350912094116,
0.06301767379045486,
0.8621189594268799,
-1.033493161201477,
-0.18051855266094208,
-0.2633781135082245,
0.5859690308570862,
1.5803791284561157,
-0.7071301341056824,
-0.016185184940695763,
-0.5259001851081848,
-0.6252623796463013,
1.4383807182312012,
0.6068354845046997,
0.39534664154052734,
0.22612401843070984,
-1.541978120803833,
-2.575181484222412,
-0.9924071431159973,
1.9649298191070557,
-1.1940282583236694,
-0.6481325030326843,
-1.5226261615753174,
1.6535273790359497,
0.7740333676338196,
-1.8780876398086548,
0.627184271812439,
1.0915889739990234,
1.694388508796692,
-0.47886598110198975,
-0.04895557090640068,
0.3620351552963257,
0.640113115310669,
-0.4149058163166046,
-0.18083086609840393,
-0.30447620153427124,
0.022528085857629776,
-0.6550383567810059,
-0.3812088668346405,
-0.478842169046402,
0.6615785360336304,
0.49959492683410645,
-0.249789759516716,
1.7448066473007202,
-0.9037050008773804,
-0.7441433668136597,
0.5949154496192932,
-1.1230697631835938,
-0.2552490830421448,
0.4216223657131195,
-0.5870983004570007,
0.7283152937889099,
-0.13834434747695923,
-1.3267407417297363,
1.1050132513046265,
1.731435775756836,
0.3724023103713989,
0.830539882183075,
-1.032881736755371,
0.8204181790351868,
0.05735205113887787,
0.5442802906036377,
-0.7974395751953125,
0.18374553322792053,
-0.17642715573310852,
-0.051413919776678085,
-0.2413552850484848,
-0.43316808342933655,
-0.2594863772392273,
1.5363879203796387,
0.5056991577148438,
-1.3894445896148682,
-1.2057586908340454,
-0.48546579480171204,
-0.2659154236316681,
0.9767322540283203,
-1.97313392162323,
-0.3016327917575836,
-0.6123557686805725,
0.288481205701828,
0.2976057827472687,
0.08243764936923981,
0.6122551560401917,
-0.6019028425216675,
-0.10548368841409683,
-0.016991911455988884,
1.75961172580719,
0.6418831944465637,
0.3137458264827728,
0.25365981459617615,
-0.45389246940612793,
0.238858163356781,
0.2631453275680542,
1.1121031045913696,
-0.9991472363471985,
-0.8904637694358826,
-1.1346020698547363,
-1.1918814182281494,
-1.1205440759658813,
-1.486283779144287,
1.0530670881271362,
-0.583172082901001,
0.26391518115997314,
1.2654175758361816,
-0.8430055975914001,
0.21697403490543365,
-0.30710718035697937,
2.191946506500244,
-0.19980488717556,
-0.5966204404830933,
0.04923265427350998,
-0.8815436959266663,
0.9289136528968811
]
}
]

14
data/speakers.json.backup Normal file
View File

@ -0,0 +1,14 @@
[
{
"speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b",
"speaker_name": "ZiyangZhang",
"wav_path": "/home/lyg/Code/funasr/data/speaker_wav/ZiyangZhang.wav",
"speaker_embs": ""
},
{
"speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b",
"speaker_name": "HaiaoDuan",
"wav_path": "/home/lyg/Code/funasr/data/speaker_wav/HaiaoDuan.wav",
"speaker_embs": ""
}
]

36
main.py
View File

@ -1,30 +1,12 @@
from funasr import AutoModel from src.server import app
import uvicorn
from datetime import datetime
from src.utils.logger import get_module_logger, setup_root_logger
chunk_size = 200 # ms time = format(datetime.now(), "%Y-%m-%d %H:%M:%S")
model = AutoModel(model="fsmn-vad", model_revision="v2.0.4", disable_update=True) setup_root_logger(level="DEBUG", log_file=f"logs/fastapiserver_{time}.log")
logger = get_module_logger(__name__)
import soundfile
wav_file = "tests/vad_example.wav" if __name__ == "__main__":
speech, sample_rate = soundfile.read(wav_file) uvicorn.run(app, host="0.0.0.0", port=8000)
chunk_stride = int(chunk_size * sample_rate / 1000)
cache = {}
total_chunk_num = int(len((speech) - 1) / chunk_stride + 1)
for i in range(total_chunk_num):
speech_chunk = speech[i * chunk_stride : (i + 1) * chunk_stride]
is_final = i == total_chunk_num - 1
res = model.generate(
input=speech_chunk,
cache=cache,
is_final=is_final,
chunk_size=chunk_size,
disable_pbar=True,
)
if len(res[0]["value"]):
print(res)
print(f"len(speech): {len(speech)}")
print(f"len(speech_chunk): {len(speech_chunk)}")
print(f"total_chunk_num: {total_chunk_num}")
print(f"generateconfig: chunk_size: {chunk_size}, chunk_stride: {chunk_stride}")

View File

@ -11,6 +11,7 @@ import json
import torch import torch
import threading import threading
import numpy import numpy
import os
# 日志 # 日志
from src.utils.logger import get_module_logger from src.utils.logger import get_module_logger
@ -60,6 +61,7 @@ class SPKFunctor(BaseFunctor):
def add_speaker(self, speaker: SpeakerCreate) -> None: def add_speaker(self, speaker: SpeakerCreate) -> None:
self._spk_data.append(speaker) self._spk_data.append(speaker)
logger.debug("添加说话人: %s", speaker)
def verify(self, emb: numpy.ndarray) -> Dict: def verify(self, emb: numpy.ndarray) -> Dict:
# 将输入的numpy embedding转换为tensor # 将输入的numpy embedding转换为tensor
@ -116,6 +118,9 @@ class SPKFunctor(BaseFunctor):
self._input_queue: Queue = None # 输入队列 self._input_queue: Queue = None # 输入队列
self._audio_config: AudioBinary_Config = None # 音频配置 self._audio_config: AudioBinary_Config = None # 音频配置
logger.debug("加载本地说话人数据")
self.load_spk_data_local()
def load_spk_data_local( def load_spk_data_local(
self, self,
spk_data_path: str = 'data/speakers.json', spk_data_path: str = 'data/speakers.json',
@ -125,10 +130,38 @@ class SPKFunctor(BaseFunctor):
""" """
with open(spk_data_path, 'r') as f: with open(spk_data_path, 'r') as f:
spk_data = json.load(f) spk_data = json.load(f)
for spk in spk_data: for i, spk in enumerate(spk_data):
logger.debug("加载本地说话人数据: %s", spk)
if spk['speaker_embs'] == "" and spk['wav_path'] != "":
logger.debug("尝试转换本地wav为embs: %s", spk['wav_path'])
try:
# 读取数据为numpy数组
import soundfile as sf
import numpy as np
wav_data, sr = sf.read(spk['wav_path'], dtype='int16')
# 确保是单通道
if wav_data.ndim > 1:
wav_data = wav_data[:, 0]
# 转为numpy数组后送入pipeline
spk['speaker_embs'] = self._sv_pipeline([wav_data], output_emb=True)['embs'][0]
logger.debug("转换本地wav为embs: length=%s type=%s", len(spk['speaker_embs']), type(spk['speaker_embs']))
except Exception as e:
logger.error("转换本地wav为embs失败: %s", e)
else:
logger.debug("加载本地说话人数据: %s", spk)
# 将spk的speaker_embs转换为numpy
spk['speaker_embs'] = numpy.array(spk['speaker_embs'])
self._spk_verify.add_speaker(SpeakerCreate(**spk)) self._spk_verify.add_speaker(SpeakerCreate(**spk))
spk['speaker_embs'] = spk['speaker_embs'].tolist()
spk_data[i] = spk
# 保存更新后的数据
try:
with open(spk_data_path, 'w') as f:
json.dump(spk_data, f, indent=4)
except Exception as e:
logger.error("保存更新后的数据失败: %s", e)
def reset_cache(self) -> None: def reset_cache(self) -> None:
""" """
重置缓存, 用于任务完成后清理缓存数据, 准备下次任务 重置缓存, 用于任务完成后清理缓存数据, 准备下次任务

View File

@ -159,6 +159,7 @@ class VADFunctor(BaseFunctor):
self._audio_cache = numpy.concatenate((self._audio_cache, data)) self._audio_cache = numpy.concatenate((self._audio_cache, data))
elif isinstance(self._audio_cache, list): elif isinstance(self._audio_cache, list):
self._audio_cache.append(data) self._audio_cache.append(data)
if self._audiobinary_cache is None: if self._audiobinary_cache is None:
self._audiobinary_cache = data self._audiobinary_cache = data
else: else:
@ -175,17 +176,28 @@ class VADFunctor(BaseFunctor):
处理数据 处理数据
使用model进行生成, 并使用_do_callback进行回调 使用model进行生成, 并使用_do_callback进行回调
""" """
if data is None:
result = self._model["vad"].generate(
input=self._audio_cache,
cache=self._model_cache,
chunk_size=self._audio_config.chunk_size,
is_final=True,
)
self._do_callback(result[0]["value"])
return
self._predeal_data(data) self._predeal_data(data)
if len(self._audio_cache) >= self._audio_config.chunk_stride: if len(self._audio_cache) >= self._audio_config.chunk_stride:
result = self._model["vad"].generate( result = self._model["vad"].generate(
input=self._audio_cache, input=self._audio_cache,
cache=self._model_cache, cache=self._model_cache,
chunk_size=self._audio_config.chunk_size, chunk_size=self._audio_config.chunk_size,
max_end_silence_time = 300,
is_final=False, is_final=False,
) )
if len(result[0]["value"]) > 0: if len(result[0]["value"]) > 0:
self._do_callback(result[0]["value"]) self._do_callback(result[0]["value"])
# logger.debug(f"VADFunctor结果: {result[0]['value']}") logger.debug(f"VADFunctor结果: {result[0]['value']}")
self._audio_cache = None self._audio_cache = None
def _run(self): def _run(self):
@ -202,11 +214,11 @@ class VADFunctor(BaseFunctor):
while self._is_running: while self._is_running:
try: try:
data = self._input_queue.get(True, timeout=1) data = self._input_queue.get(True, timeout=1)
if data is None: # logger.debug("[VADFunctor]获取到的数据length: %s", len(data))
break
logger.debug("[VADFunctor]获取到的数据length: %s", len(data))
self._process(data) self._process(data)
self._input_queue.task_done() self._input_queue.task_done()
if data is None:
break
# 当队列为空时, 间隔1s检测是否进入停止事件。 # 当队列为空时, 间隔1s检测是否进入停止事件。
except Empty: except Empty:
if self._stop_event: if self._stop_event:
@ -252,67 +264,4 @@ class VADFunctor(BaseFunctor):
self._thread.join() self._thread.join()
with self._status_lock: with self._status_lock:
self._is_running = False self._is_running = False
return not self._thread.is_alive() return not self._thread.is_alive()
# class VAD:
# def __init__(
# self,
# VAD_model=None,
# audio_config: AudioBinary_Config = None,
# callback: Callable = None,
# ):
# # vad model
# self.VAD_model = VAD_model
# if self.VAD_model is None:
# self.VAD_model = AutoModel(
# model="fsmn-vad", model_revision="v2.0.4", disable_update=True
# )
# # audio config
# self.audio_config = audio_config
# # vad result
# self.vad_result = VADResponse(time_chunk_index_callback=callback)
# # audio binary poll
# self.audio_chunk = AudioChunk(audio_config=self.audio_config)
# self.cache = {}
# def push_binary_data(
# self,
# binary_data: bytes,
# ):
# # 压入二进制数据
# self.audio_chunk.add_chunk(binary_data)
# # 处理音频块
# res = self.VAD_model.generate(
# input=binary_data,
# cache=self.cache,
# chunk_size=self.audio_config.chunk_size,
# is_final=False,
# )
# # print("VAD generate", res)
# if len(res[0]["value"]):
# self.vad_result += VADResponse.from_raw(res)
# def set_callback(
# self,
# callback: Callable,
# ):
# self.vad_result.time_chunk_index_callback = callback
# def process_vad_result(self, callback: Callable = None):
# # 处理VAD结果
# callback = (
# callback
# if callback is not None
# else self.vad_result.time_chunk_index_callback
# )
# self.vad_result.process_time_chunk(
# lambda x: callback(
# AudioBinary_Chunk(
# start_time=x["start_time"],
# end_time=x["end_time"],
# chunk=self.audio_chunk.get_chunk(x["start_time"], x["end_time"]),
# )
# )
# )

View File

@ -1,5 +1,5 @@
""" """
src/schemas/speaker.py src/models/spk.py
------------------------ ------------------------
此模块定义与说话人speakers表对应的 Pydantic 模型用于 API 数据验证和序列化 此模块定义与说话人speakers表对应的 Pydantic 模型用于 API 数据验证和序列化
@ -14,10 +14,12 @@ from datetime import datetime
from typing import Optional, List from typing import Optional, List
from uuid import UUID from uuid import UUID
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from .base import BaseSchema from src.utils import get_module_logger
logger = get_module_logger(__name__)
# 基础模型,定义说话人的核心属性 # 基础模型,定义说话人的核心属性
class SpeakerBase(BaseSchema): class SpeakerBase(BaseModel):
speaker_id: UUID = Field( speaker_id: UUID = Field(
..., ...,
description="说话人唯一标识符" description="说话人唯一标识符"

View File

@ -224,7 +224,7 @@ class ASRPipeline(PipelineBase):
while self._is_running and not self._stop_event: while self._is_running and not self._stop_event:
try: try:
data = self._input_queue.get(timeout=self._queue_timeout) data = self._input_queue.get(timeout=self._queue_timeout)
logger.debug("[ASRpipeline]获取到的数据length: %s", len(data)) # logger.debug("[ASRpipeline]获取到的数据length: %s", len(data))
# 检查是否是结束信号 # 检查是否是结束信号
if data is None: if data is None:
logger.info("收到结束信号,管道准备停止") logger.info("收到结束信号,管道准备停止")

View File

@ -105,7 +105,7 @@ class ASRRunner(RunnerBase):
data = self._sender.recv() data = self._sender.recv()
if data is None: if data is None:
break break
logger.debug("[ASRRunner][SAR-%s]接收到的数据length: %s", self._name, len(data)) # logger.debug("[ASRRunner][SAR-%s]接收到的数据length: %s", self._name, len(data))
self._input_queue.put(data) self._input_queue.put(data)
self.stop() self.stop()

View File

@ -47,7 +47,7 @@ class MockWebSocketClient:
"""Puts data into the receive queue for the `recv` method to consume.""" """Puts data into the receive queue for the `recv` method to consume."""
if data is None: if data is None:
return return
logger.debug("Mock WS put_for_recv length: %s", len(data)) # logger.debug("Mock WS put_for_recv length: %s", len(data))
self.receive_queue.put(data) self.receive_queue.put(data)
@property @property

View File

@ -1,32 +0,0 @@
"""
-*- coding: utf-8 -*-
此模块是ASR的websocket端点, 使用FastAPI的websocket端点
"""
from fastapi import WebSocket, APIRouter
router = APIRouter()
from src.runner.ASRRunner import ASRRunner
ASRRunner_instance = ASRRunner()
from src.core import ModelLoader
model_loader = ModelLoader()
args = {
"asr_model": "paraformer-zh",
"asr_model_revision": "v2.0.4",
"vad_model": "fsmn-vad",
"vad_model_revision": "v2.0.4",
"spk_model": "cam++",
"spk_model_revision": "v2.0.2",
"audio_update": False,
}
models = model_loader.load_models(args)
@router.websocket("/asr_full")
async def asr_endpoint(websocket: WebSocket):
await websocket.accept()
while True:
data = await websocket.receive_text()
print(data)

View File

@ -10,6 +10,10 @@ from tests.runner.asr_runner_test import test_asr_runner
setup_root_logger(level="INFO", log_file="logs/test_main.log") setup_root_logger(level="INFO", log_file="logs/test_main.log")
logger = get_module_logger(__name__) logger = get_module_logger(__name__)
# 清空logs/test_main.log文件
with open("logs/test_main.log", "w") as f:
f.truncate()
# from tests.functor.vad_test import test_vad_functor # from tests.functor.vad_test import test_vad_functor
# logger.info("开始测试VAD函数器") # logger.info("开始测试VAD函数器")
# test_vad_functor() # test_vad_functor()

BIN
tests/XT_ZZY.wav Normal file

Binary file not shown.

BIN
tests/XT_ZZY_denoise.wav Normal file

Binary file not shown.

View File

@ -10,6 +10,10 @@ from src.core.model_loader import ModelLoader
from src.models import AudioBinary_Config from src.models import AudioBinary_Config
from src.utils.mock_websocket import MockWebSocketClient from src.utils.mock_websocket import MockWebSocketClient
from src.utils.logger import get_module_logger
logger = get_module_logger(__name__)
def test_asr_runner(): def test_asr_runner():
""" """
End-to-end test for ASRRunner. End-to-end test for ASRRunner.
@ -32,17 +36,18 @@ def test_asr_runner():
"audio_update": False, "audio_update": False,
} }
models = model_loader.load_models(args) models = model_loader.load_models(args)
audio_data, sample_rate = soundfile.read("tests/vad_example.wav") audio_file_path = "tests/XT_ZZY_denoise.wav"
audio_data, sample_rate = soundfile.read(audio_file_path)
logger.info(f"加载数据: {audio_file_path} , audio_data_length: {len(audio_data)}, audio_data_type: {type(audio_data)}, sample_rate: {sample_rate}")
# 2. Configure audio # 2. Configure audio
audio_config = AudioBinary_Config( audio_config = AudioBinary_Config(
chunk_size=200, # ms chunk_size=200, # ms
chunk_stride=1600, # 10ms stride for 16kHz chunk_stride=1000, # 10ms stride for 16kHz
sample_rate=sample_rate, sample_rate=sample_rate,
sample_width=2, # 16-bit sample_width=2, # 16-bit
channels=1, channels=2,
) )
audio_config.chunk_stride = int(audio_config.chunk_stride * sample_rate / 1000) audio_config.chunk_stride = int(audio_config.chunk_size * sample_rate / 1000)
# 3. Setup ASRRunner # 3. Setup ASRRunner
asr_runner = ASRRunner() asr_runner = ASRRunner()
@ -70,6 +75,6 @@ def test_asr_runner():
mock_ws.put_for_recv(chunk) mock_ws.put_for_recv(chunk)
# 6. Wait for results and assert # 6. Wait for results and assert
time.sleep(10) time.sleep(30)
# Signal end of audio stream by sending None # Signal end of audio stream by sending None
mock_ws.put_for_recv(None) mock_ws.put_for_recv(None)