diff --git a/data/denoise.py b/data/denoise.py new file mode 100644 index 0000000..de1adcb --- /dev/null +++ b/data/denoise.py @@ -0,0 +1,13 @@ +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks + + +ans = pipeline( + Tasks.acoustic_noise_suppression, + model='iic/speech_frcrn_ans_cirm_16k') + +wav_file = 'speaker_wav/HaiaoDuan.wav' +output_path = 'denoise_output/HaiaoDuan_denoise_output.wav' +result = ans( + wav_file, + output_path=output_path) diff --git a/data/denoise_output/HaiaoDuan_denoise_output.wav b/data/denoise_output/HaiaoDuan_denoise_output.wav new file mode 100644 index 0000000..8bac338 Binary files /dev/null and b/data/denoise_output/HaiaoDuan_denoise_output.wav differ diff --git a/data/denoise_output/ZiyangZhang_denoise_output.wav b/data/denoise_output/ZiyangZhang_denoise_output.wav new file mode 100644 index 0000000..5e0b274 Binary files /dev/null and b/data/denoise_output/ZiyangZhang_denoise_output.wav differ diff --git a/data/record.py b/data/record.py new file mode 100644 index 0000000..6a8c8cd --- /dev/null +++ b/data/record.py @@ -0,0 +1,35 @@ +""" +本地录音,保存为wav格式,存储在data/speaker_wav目录下 +""" +import pyaudio +import wave + +def record_audio(filename, duration=5, format=pyaudio.paInt16, channels=1, rate=16000): + """ + 本地录音,保存为wav格式,存储在data/speaker_wav目录下 + """ + p = pyaudio.PyAudio() + stream = p.open(format=format, channels=channels, rate=rate, input=True, frames_per_buffer=1024) + + print("按下回车键开始录音...") + input() + frames = [] + for i in range(0, int(rate / 1024 * duration)): + data = stream.read(1024) + frames.append(data) + print("录音结束") + stream.stop_stream() + stream.close() + p.terminate() + wav_file = wave.open(filename, 'wb') + wav_file.setnchannels(channels) + wav_file.setsampwidth(p.get_sample_size(format)) + wav_file.setframerate(rate) + wav_file.writeframes(b''.join(frames)) + wav_file.close() + +if __name__ == "__main__": + record_audio( + "data/speaker_wav/test.wav", + duration=5 + ) \ No newline at end of file diff --git a/data/speaker_wav/HaiaoDuan.wav b/data/speaker_wav/HaiaoDuan.wav new file mode 100644 index 0000000..8bac338 Binary files /dev/null and b/data/speaker_wav/HaiaoDuan.wav differ diff --git a/data/speaker_wav/HaiaoDuan_origin.wav b/data/speaker_wav/HaiaoDuan_origin.wav new file mode 100644 index 0000000..70239e9 Binary files /dev/null and b/data/speaker_wav/HaiaoDuan_origin.wav differ diff --git a/data/speaker_wav/ZiyangZhang.wav b/data/speaker_wav/ZiyangZhang.wav new file mode 100644 index 0000000..5e0b274 Binary files /dev/null and b/data/speaker_wav/ZiyangZhang.wav differ diff --git a/data/speaker_wav/ZiyangZhang_origin.wav b/data/speaker_wav/ZiyangZhang_origin.wav new file mode 100644 index 0000000..e090c9a Binary files /dev/null and b/data/speaker_wav/ZiyangZhang_origin.wav differ diff --git a/data/speakers.json b/data/speakers.json new file mode 100644 index 0000000..2985f53 --- /dev/null +++ b/data/speakers.json @@ -0,0 +1,400 @@ +[ + { + "speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b", + "speaker_name": "ZiyangZhang", + "wav_path": "/home/lyg/Code/funasr/data/speaker_wav/ZiyangZhang.wav", + "speaker_embs": [ + -0.4249887466430664, + -0.12976674735546112, + 1.6118208169937134, + 1.3348901271820068, + 0.1423041820526123, + 0.16940945386886597, + -0.042910803109407425, + 0.9634712934494019, + 0.9677271246910095, + 1.1112406253814697, + -2.0086846351623535, + 1.729629635810852, + -0.3664000928401947, + 2.4323978424072266, + -1.587996244430542, + -1.0803641080856323, + 0.08011860400438309, + 1.6515964269638062, + -1.1337167024612427, + -0.5088973045349121, + -1.0002555847167969, + 0.11426643282175064, + -0.8616334199905396, + -0.006051262840628624, + 0.44800689816474915, + 0.6659525632858276, + -0.9864538908004761, + 2.1259539127349854, + -0.49345871806144714, + -0.14384664595127106, + 0.0742349922657013, + 0.25577273964881897, + 1.0516602993011475, + 1.7297064065933228, + -0.44126248359680176, + 1.3971654176712036, + 0.04305446520447731, + -2.261837959289551, + -0.355578750371933, + -0.8388981819152832, + 0.8178591728210449, + 0.016942109912633896, + 0.8212596774101257, + 1.108891248703003, + -0.5182072520256042, + -0.07741295546293259, + 0.9407528042793274, + 0.026407398283481598, + -0.6210324168205261, + -2.0659642219543457, + 0.13895569741725922, + -1.3570973873138428, + 2.236407995223999, + -0.29706746339797974, + 1.9819035530090332, + 1.3580390214920044, + -0.5505754351615906, + 0.7189999222755432, + -0.3190038502216339, + 1.1075336933135986, + -1.4158482551574707, + 0.20138776302337646, + 0.8354343175888062, + 0.1671304553747177, + -0.56927490234375, + 1.057538390159607, + -0.2868591248989105, + 0.005044424440711737, + 0.49878695607185364, + -0.7493277192115784, + 2.4639663696289062, + 0.5516767501831055, + -0.2763596177101135, + -0.8769170641899109, + -1.296872615814209, + -0.5233777165412903, + -0.10551001876592636, + -0.5955559611320496, + -0.6046199202537537, + 0.22645621001720428, + 1.12480890750885, + -0.3678736388683319, + -1.1580262184143066, + -0.3625229299068451, + 0.8251489996910095, + 0.3464623987674713, + 2.261840581893921, + -0.11341957747936249, + -0.6645990610122681, + 0.8480257987976074, + -0.47770705819129944, + 0.8085628747940063, + -0.26823946833610535, + -0.25040531158447266, + 1.0610276460647583, + -0.14239133894443512, + -1.309299349784851, + -1.0987954139709473, + -0.1301683634519577, + -0.05199439451098442, + -0.07838833332061768, + -0.21310138702392578, + 0.29347339272499084, + 1.0793802738189697, + -1.813226342201233, + -1.1362330913543701, + -0.13013578951358795, + 0.6647212505340576, + -0.34312230348587036, + 0.5921282172203064, + 0.26284533739089966, + 0.9369505047798157, + 0.1739131063222885, + 0.7924790978431702, + 0.3412249982357025, + 0.16646981239318848, + -0.32468467950820923, + -0.5835385918617249, + 0.05923287197947502, + 1.191710352897644, + -0.3653518557548523, + -0.8665252923965454, + 0.7419591546058655, + -1.7234965562820435, + 0.3421083092689514, + -0.24517370760440826, + -0.8724228143692017, + -0.11004912108182907, + -0.10676378011703491, + -1.0688399076461792, + 0.4397974908351898, + -0.9902229309082031, + -0.2676651179790497, + 1.4346729516983032, + 0.34571582078933716, + 0.9091840386390686, + 0.41458258032798767, + -0.7863419055938721, + 0.6952191591262817, + 0.8847752809524536, + 0.15871241688728333, + -0.10740098357200623, + -0.5305340886116028, + 1.0536329746246338, + -1.337695837020874, + 0.23358777165412903, + -0.19285082817077637, + -0.5339606404304504, + -0.6768214106559753, + 1.6815600395202637, + -0.36710524559020996, + -0.22888287901878357, + -0.2714850902557373, + -0.0895417258143425, + 0.3480932116508484, + -0.19148986041545868, + 0.44108960032463074, + 0.03198949620127678, + -0.3665091097354889, + -0.6040502786636353, + 0.37234461307525635, + -0.07462035119533539, + -0.18109525740146637, + -0.19882601499557495, + 0.33298638463020325, + 0.039957765489816666, + 0.6185765266418457, + 1.5921381711959839, + 0.04164457693696022, + -0.7556226849555969, + -1.0537445545196533, + 0.36932048201560974, + -0.2881639897823334, + -1.3762420415878296, + -0.6029151678085327, + -1.3592504262924194, + 0.6726564168930054, + 0.06349147856235504, + -0.4627697765827179, + 1.1113581657409668, + -1.1767970323562622, + 0.3900119662284851, + -0.3050364851951599, + -0.2807784676551819, + -0.7237444519996643, + -0.039161279797554016, + 0.5845404267311096, + -0.4385261833667755, + -0.3988557755947113, + -1.235430359840393, + -0.648483395576477, + 1.084520936012268 + ] + }, + { + "speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b", + "speaker_name": "HaiaoDuan", + "wav_path": "/home/lyg/Code/funasr/data/speaker_wav/HaiaoDuan.wav", + "speaker_embs": [ + -1.3490606546401978, + -0.9654964208602905, + 0.6671794652938843, + 2.3401081562042236, + -1.374346137046814, + 0.24404077231884003, + 0.08137784898281097, + 0.10915698111057281, + 0.8208633065223694, + -1.0312862396240234, + 1.721955418586731, + -0.16976028680801392, + -1.0259445905685425, + -0.9134035706520081, + -1.3709611892700195, + -0.6821202635765076, + 1.0825326442718506, + 1.4931895732879639, + -0.06801076978445053, + -0.5044959187507629, + -1.3154232501983643, + -1.1049765348434448, + 0.6122218370437622, + 1.1061663627624512, + -0.2288999855518341, + -0.03568289428949356, + -0.9260172247886658, + 1.1030527353286743, + -0.7439772486686707, + 1.4323620796203613, + 0.2221372127532959, + -0.8355774283409119, + 0.6758987307548523, + 0.8520456552505493, + -0.0186605341732502, + -0.981821596622467, + 0.11743613332509995, + -0.3539535701274872, + -0.33924832940101624, + -0.510174036026001, + 0.6893219351768494, + -0.10966216027736664, + -1.5873743295669556, + 1.7041956186294556, + -0.9844599366188049, + -1.368901252746582, + 0.44316115975379944, + -2.406590700149536, + 0.9880101680755615, + 0.8344699740409851, + 0.22896111011505127, + -1.4464795589447021, + 2.222980260848999, + -0.22508130967617035, + 0.8659772276878357, + 0.7801474928855896, + 1.824644923210144, + -0.2455991804599762, + -0.06682202965021133, + 0.07106778025627136, + -1.8072712421417236, + 0.7733234763145447, + 0.20490191876888275, + -1.119908094406128, + -1.2623472213745117, + 0.34426289796829224, + 0.7909225821495056, + 0.47128093242645264, + -0.9976771473884583, + -0.6703121662139893, + 0.7459381818771362, + 1.0664807558059692, + 0.659284770488739, + -0.49438077211380005, + 0.1974140703678131, + -0.07557231187820435, + -1.324866533279419, + -1.2217090129852295, + -1.0160834789276123, + 0.7517350912094116, + 0.06301767379045486, + 0.8621189594268799, + -1.033493161201477, + -0.18051855266094208, + -0.2633781135082245, + 0.5859690308570862, + 1.5803791284561157, + -0.7071301341056824, + -0.016185184940695763, + -0.5259001851081848, + -0.6252623796463013, + 1.4383807182312012, + 0.6068354845046997, + 0.39534664154052734, + 0.22612401843070984, + -1.541978120803833, + -2.575181484222412, + -0.9924071431159973, + 1.9649298191070557, + -1.1940282583236694, + -0.6481325030326843, + -1.5226261615753174, + 1.6535273790359497, + 0.7740333676338196, + -1.8780876398086548, + 0.627184271812439, + 1.0915889739990234, + 1.694388508796692, + -0.47886598110198975, + -0.04895557090640068, + 0.3620351552963257, + 0.640113115310669, + -0.4149058163166046, + -0.18083086609840393, + -0.30447620153427124, + 0.022528085857629776, + -0.6550383567810059, + -0.3812088668346405, + -0.478842169046402, + 0.6615785360336304, + 0.49959492683410645, + -0.249789759516716, + 1.7448066473007202, + -0.9037050008773804, + -0.7441433668136597, + 0.5949154496192932, + -1.1230697631835938, + -0.2552490830421448, + 0.4216223657131195, + -0.5870983004570007, + 0.7283152937889099, + -0.13834434747695923, + -1.3267407417297363, + 1.1050132513046265, + 1.731435775756836, + 0.3724023103713989, + 0.830539882183075, + -1.032881736755371, + 0.8204181790351868, + 0.05735205113887787, + 0.5442802906036377, + -0.7974395751953125, + 0.18374553322792053, + -0.17642715573310852, + -0.051413919776678085, + -0.2413552850484848, + -0.43316808342933655, + -0.2594863772392273, + 1.5363879203796387, + 0.5056991577148438, + -1.3894445896148682, + -1.2057586908340454, + -0.48546579480171204, + -0.2659154236316681, + 0.9767322540283203, + -1.97313392162323, + -0.3016327917575836, + -0.6123557686805725, + 0.288481205701828, + 0.2976057827472687, + 0.08243764936923981, + 0.6122551560401917, + -0.6019028425216675, + -0.10548368841409683, + -0.016991911455988884, + 1.75961172580719, + 0.6418831944465637, + 0.3137458264827728, + 0.25365981459617615, + -0.45389246940612793, + 0.238858163356781, + 0.2631453275680542, + 1.1121031045913696, + -0.9991472363471985, + -0.8904637694358826, + -1.1346020698547363, + -1.1918814182281494, + -1.1205440759658813, + -1.486283779144287, + 1.0530670881271362, + -0.583172082901001, + 0.26391518115997314, + 1.2654175758361816, + -0.8430055975914001, + 0.21697403490543365, + -0.30710718035697937, + 2.191946506500244, + -0.19980488717556, + -0.5966204404830933, + 0.04923265427350998, + -0.8815436959266663, + 0.9289136528968811 + ] + } +] \ No newline at end of file diff --git a/data/speakers.json.backup b/data/speakers.json.backup new file mode 100644 index 0000000..f5b8642 --- /dev/null +++ b/data/speakers.json.backup @@ -0,0 +1,14 @@ +[ + { + "speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b", + "speaker_name": "ZiyangZhang", + "wav_path": "/home/lyg/Code/funasr/data/speaker_wav/ZiyangZhang.wav", + "speaker_embs": "" + }, + { + "speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b", + "speaker_name": "HaiaoDuan", + "wav_path": "/home/lyg/Code/funasr/data/speaker_wav/HaiaoDuan.wav", + "speaker_embs": "" + } +] \ No newline at end of file diff --git a/main.py b/main.py index 49697c4..8135688 100644 --- a/main.py +++ b/main.py @@ -1,30 +1,12 @@ -from funasr import AutoModel +from src.server import app +import uvicorn +from datetime import datetime +from src.utils.logger import get_module_logger, setup_root_logger -chunk_size = 200 # ms -model = AutoModel(model="fsmn-vad", model_revision="v2.0.4", disable_update=True) +time = format(datetime.now(), "%Y-%m-%d %H:%M:%S") +setup_root_logger(level="DEBUG", log_file=f"logs/fastapiserver_{time}.log") +logger = get_module_logger(__name__) -import soundfile -wav_file = "tests/vad_example.wav" -speech, sample_rate = soundfile.read(wav_file) -chunk_stride = int(chunk_size * sample_rate / 1000) - -cache = {} -total_chunk_num = int(len((speech) - 1) / chunk_stride + 1) -for i in range(total_chunk_num): - speech_chunk = speech[i * chunk_stride : (i + 1) * chunk_stride] - is_final = i == total_chunk_num - 1 - res = model.generate( - input=speech_chunk, - cache=cache, - is_final=is_final, - chunk_size=chunk_size, - disable_pbar=True, - ) - if len(res[0]["value"]): - print(res) - -print(f"len(speech): {len(speech)}") -print(f"len(speech_chunk): {len(speech_chunk)}") -print(f"total_chunk_num: {total_chunk_num}") -print(f"generateconfig: chunk_size: {chunk_size}, chunk_stride: {chunk_stride}") +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/src/functor/spk_functor.py b/src/functor/spk_functor.py index 1f0a800..bf6a0e6 100644 --- a/src/functor/spk_functor.py +++ b/src/functor/spk_functor.py @@ -11,6 +11,7 @@ import json import torch import threading import numpy +import os # 日志 from src.utils.logger import get_module_logger @@ -60,6 +61,7 @@ class SPKFunctor(BaseFunctor): def add_speaker(self, speaker: SpeakerCreate) -> None: self._spk_data.append(speaker) + logger.debug("添加说话人: %s", speaker) def verify(self, emb: numpy.ndarray) -> Dict: # 将输入的numpy embedding转换为tensor @@ -116,6 +118,9 @@ class SPKFunctor(BaseFunctor): self._input_queue: Queue = None # 输入队列 self._audio_config: AudioBinary_Config = None # 音频配置 + logger.debug("加载本地说话人数据") + self.load_spk_data_local() + def load_spk_data_local( self, spk_data_path: str = 'data/speakers.json', @@ -125,10 +130,38 @@ class SPKFunctor(BaseFunctor): """ with open(spk_data_path, 'r') as f: spk_data = json.load(f) - for spk in spk_data: + for i, spk in enumerate(spk_data): + logger.debug("加载本地说话人数据: %s", spk) + if spk['speaker_embs'] == "" and spk['wav_path'] != "": + logger.debug("尝试转换本地wav为embs: %s", spk['wav_path']) + try: + # 读取数据为numpy数组 + import soundfile as sf + import numpy as np + wav_data, sr = sf.read(spk['wav_path'], dtype='int16') + # 确保是单通道 + if wav_data.ndim > 1: + wav_data = wav_data[:, 0] + # 转为numpy数组后送入pipeline + spk['speaker_embs'] = self._sv_pipeline([wav_data], output_emb=True)['embs'][0] + logger.debug("转换本地wav为embs: length=%s type=%s", len(spk['speaker_embs']), type(spk['speaker_embs'])) + except Exception as e: + logger.error("转换本地wav为embs失败: %s", e) + else: + logger.debug("加载本地说话人数据: %s", spk) + # 将spk的speaker_embs转换为numpy + spk['speaker_embs'] = numpy.array(spk['speaker_embs']) self._spk_verify.add_speaker(SpeakerCreate(**spk)) - + spk['speaker_embs'] = spk['speaker_embs'].tolist() + spk_data[i] = spk + # 保存更新后的数据 + try: + with open(spk_data_path, 'w') as f: + json.dump(spk_data, f, indent=4) + except Exception as e: + logger.error("保存更新后的数据失败: %s", e) + def reset_cache(self) -> None: """ 重置缓存, 用于任务完成后清理缓存数据, 准备下次任务 diff --git a/src/functor/vad_functor.py b/src/functor/vad_functor.py index 79b1cf5..e682439 100644 --- a/src/functor/vad_functor.py +++ b/src/functor/vad_functor.py @@ -159,6 +159,7 @@ class VADFunctor(BaseFunctor): self._audio_cache = numpy.concatenate((self._audio_cache, data)) elif isinstance(self._audio_cache, list): self._audio_cache.append(data) + if self._audiobinary_cache is None: self._audiobinary_cache = data else: @@ -175,17 +176,28 @@ class VADFunctor(BaseFunctor): 处理数据 使用model进行生成, 并使用_do_callback进行回调 """ + if data is None: + result = self._model["vad"].generate( + input=self._audio_cache, + cache=self._model_cache, + chunk_size=self._audio_config.chunk_size, + is_final=True, + ) + self._do_callback(result[0]["value"]) + return + self._predeal_data(data) if len(self._audio_cache) >= self._audio_config.chunk_stride: result = self._model["vad"].generate( input=self._audio_cache, cache=self._model_cache, chunk_size=self._audio_config.chunk_size, + max_end_silence_time = 300, is_final=False, ) if len(result[0]["value"]) > 0: self._do_callback(result[0]["value"]) - # logger.debug(f"VADFunctor结果: {result[0]['value']}") + logger.debug(f"VADFunctor结果: {result[0]['value']}") self._audio_cache = None def _run(self): @@ -202,11 +214,11 @@ class VADFunctor(BaseFunctor): while self._is_running: try: data = self._input_queue.get(True, timeout=1) - if data is None: - break - logger.debug("[VADFunctor]获取到的数据length: %s", len(data)) + # logger.debug("[VADFunctor]获取到的数据length: %s", len(data)) self._process(data) self._input_queue.task_done() + if data is None: + break # 当队列为空时, 间隔1s检测是否进入停止事件。 except Empty: if self._stop_event: @@ -252,67 +264,4 @@ class VADFunctor(BaseFunctor): self._thread.join() with self._status_lock: self._is_running = False - return not self._thread.is_alive() - - -# class VAD: - -# def __init__( -# self, -# VAD_model=None, -# audio_config: AudioBinary_Config = None, -# callback: Callable = None, -# ): -# # vad model -# self.VAD_model = VAD_model -# if self.VAD_model is None: -# self.VAD_model = AutoModel( -# model="fsmn-vad", model_revision="v2.0.4", disable_update=True -# ) -# # audio config -# self.audio_config = audio_config -# # vad result -# self.vad_result = VADResponse(time_chunk_index_callback=callback) -# # audio binary poll -# self.audio_chunk = AudioChunk(audio_config=self.audio_config) -# self.cache = {} - -# def push_binary_data( -# self, -# binary_data: bytes, -# ): -# # 压入二进制数据 -# self.audio_chunk.add_chunk(binary_data) -# # 处理音频块 -# res = self.VAD_model.generate( -# input=binary_data, -# cache=self.cache, -# chunk_size=self.audio_config.chunk_size, -# is_final=False, -# ) -# # print("VAD generate", res) -# if len(res[0]["value"]): -# self.vad_result += VADResponse.from_raw(res) - -# def set_callback( -# self, -# callback: Callable, -# ): -# self.vad_result.time_chunk_index_callback = callback - -# def process_vad_result(self, callback: Callable = None): -# # 处理VAD结果 -# callback = ( -# callback -# if callback is not None -# else self.vad_result.time_chunk_index_callback -# ) -# self.vad_result.process_time_chunk( -# lambda x: callback( -# AudioBinary_Chunk( -# start_time=x["start_time"], -# end_time=x["end_time"], -# chunk=self.audio_chunk.get_chunk(x["start_time"], x["end_time"]), -# ) -# ) -# ) + return not self._thread.is_alive() \ No newline at end of file diff --git a/src/models/spk.py b/src/models/spk.py index 9e583d7..1497454 100644 --- a/src/models/spk.py +++ b/src/models/spk.py @@ -1,5 +1,5 @@ """ -src/schemas/speaker.py +src/models/spk.py ------------------------ 此模块定义与说话人(speakers)表对应的 Pydantic 模型,用于 API 数据验证和序列化。 @@ -14,10 +14,12 @@ from datetime import datetime from typing import Optional, List from uuid import UUID from pydantic import BaseModel, Field -from .base import BaseSchema +from src.utils import get_module_logger + +logger = get_module_logger(__name__) # 基础模型,定义说话人的核心属性 -class SpeakerBase(BaseSchema): +class SpeakerBase(BaseModel): speaker_id: UUID = Field( ..., description="说话人唯一标识符" diff --git a/src/pipeline/ASRpipeline.py b/src/pipeline/ASRpipeline.py index 2192385..f1f1c06 100644 --- a/src/pipeline/ASRpipeline.py +++ b/src/pipeline/ASRpipeline.py @@ -224,7 +224,7 @@ class ASRPipeline(PipelineBase): while self._is_running and not self._stop_event: try: data = self._input_queue.get(timeout=self._queue_timeout) - logger.debug("[ASRpipeline]获取到的数据length: %s", len(data)) + # logger.debug("[ASRpipeline]获取到的数据length: %s", len(data)) # 检查是否是结束信号 if data is None: logger.info("收到结束信号,管道准备停止") diff --git a/src/runner/ASRRunner.py b/src/runner/ASRRunner.py index c9d503a..e7898b9 100644 --- a/src/runner/ASRRunner.py +++ b/src/runner/ASRRunner.py @@ -105,7 +105,7 @@ class ASRRunner(RunnerBase): data = self._sender.recv() if data is None: break - logger.debug("[ASRRunner][SAR-%s]接收到的数据length: %s", self._name, len(data)) + # logger.debug("[ASRRunner][SAR-%s]接收到的数据length: %s", self._name, len(data)) self._input_queue.put(data) self.stop() diff --git a/src/utils/mock_websocket.py b/src/utils/mock_websocket.py index 6ae84a1..a2bfa9c 100644 --- a/src/utils/mock_websocket.py +++ b/src/utils/mock_websocket.py @@ -47,7 +47,7 @@ class MockWebSocketClient: """Puts data into the receive queue for the `recv` method to consume.""" if data is None: return - logger.debug("Mock WS put_for_recv length: %s", len(data)) + # logger.debug("Mock WS put_for_recv length: %s", len(data)) self.receive_queue.put(data) @property diff --git a/src/websockets/endpoint/asr_endpoint.py b/src/websockets/endpoint/asr_endpoint.py deleted file mode 100644 index 25adfbb..0000000 --- a/src/websockets/endpoint/asr_endpoint.py +++ /dev/null @@ -1,32 +0,0 @@ -""" - -*- coding: utf-8 -*- - 此模块是ASR的websocket端点, 使用FastAPI的websocket端点 -""" - -from fastapi import WebSocket, APIRouter - -router = APIRouter() - -from src.runner.ASRRunner import ASRRunner - -ASRRunner_instance = ASRRunner() - -from src.core import ModelLoader -model_loader = ModelLoader() -args = { - "asr_model": "paraformer-zh", - "asr_model_revision": "v2.0.4", - "vad_model": "fsmn-vad", - "vad_model_revision": "v2.0.4", - "spk_model": "cam++", - "spk_model_revision": "v2.0.2", - "audio_update": False, -} -models = model_loader.load_models(args) - -@router.websocket("/asr_full") -async def asr_endpoint(websocket: WebSocket): - await websocket.accept() - while True: - data = await websocket.receive_text() - print(data) diff --git a/test_main.py b/test_main.py index 4af9475..42bd7a9 100644 --- a/test_main.py +++ b/test_main.py @@ -10,6 +10,10 @@ from tests.runner.asr_runner_test import test_asr_runner setup_root_logger(level="INFO", log_file="logs/test_main.log") logger = get_module_logger(__name__) +# 清空logs/test_main.log文件 +with open("logs/test_main.log", "w") as f: + f.truncate() + # from tests.functor.vad_test import test_vad_functor # logger.info("开始测试VAD函数器") # test_vad_functor() diff --git a/tests/XT_ZZY.wav b/tests/XT_ZZY.wav new file mode 100644 index 0000000..f83d78a Binary files /dev/null and b/tests/XT_ZZY.wav differ diff --git a/tests/XT_ZZY_denoise.wav b/tests/XT_ZZY_denoise.wav new file mode 100644 index 0000000..30ae0ba Binary files /dev/null and b/tests/XT_ZZY_denoise.wav differ diff --git a/tests/runner/asr_runner_test.py b/tests/runner/asr_runner_test.py index 51008cd..a4dbec8 100644 --- a/tests/runner/asr_runner_test.py +++ b/tests/runner/asr_runner_test.py @@ -10,6 +10,10 @@ from src.core.model_loader import ModelLoader from src.models import AudioBinary_Config from src.utils.mock_websocket import MockWebSocketClient +from src.utils.logger import get_module_logger + +logger = get_module_logger(__name__) + def test_asr_runner(): """ End-to-end test for ASRRunner. @@ -32,17 +36,18 @@ def test_asr_runner(): "audio_update": False, } models = model_loader.load_models(args) - audio_data, sample_rate = soundfile.read("tests/vad_example.wav") - + audio_file_path = "tests/XT_ZZY_denoise.wav" + audio_data, sample_rate = soundfile.read(audio_file_path) + logger.info(f"加载数据: {audio_file_path} , audio_data_length: {len(audio_data)}, audio_data_type: {type(audio_data)}, sample_rate: {sample_rate}") # 2. Configure audio audio_config = AudioBinary_Config( chunk_size=200, # ms - chunk_stride=1600, # 10ms stride for 16kHz + chunk_stride=1000, # 10ms stride for 16kHz sample_rate=sample_rate, sample_width=2, # 16-bit - channels=1, + channels=2, ) - audio_config.chunk_stride = int(audio_config.chunk_stride * sample_rate / 1000) + audio_config.chunk_stride = int(audio_config.chunk_size * sample_rate / 1000) # 3. Setup ASRRunner asr_runner = ASRRunner() @@ -70,6 +75,6 @@ def test_asr_runner(): mock_ws.put_for_recv(chunk) # 6. Wait for results and assert - time.sleep(10) + time.sleep(30) # Signal end of audio stream by sending None mock_ws.put_for_recv(None)