[说话人认证]SPKFunctor完成本地说话人embs/wav加载。
This commit is contained in:
parent
4e9dd83d55
commit
1a296d8309
13
data/denoise.py
Normal file
13
data/denoise.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
from modelscope.pipelines import pipeline
|
||||||
|
from modelscope.utils.constant import Tasks
|
||||||
|
|
||||||
|
|
||||||
|
ans = pipeline(
|
||||||
|
Tasks.acoustic_noise_suppression,
|
||||||
|
model='iic/speech_frcrn_ans_cirm_16k')
|
||||||
|
|
||||||
|
wav_file = 'speaker_wav/HaiaoDuan.wav'
|
||||||
|
output_path = 'denoise_output/HaiaoDuan_denoise_output.wav'
|
||||||
|
result = ans(
|
||||||
|
wav_file,
|
||||||
|
output_path=output_path)
|
BIN
data/denoise_output/HaiaoDuan_denoise_output.wav
Normal file
BIN
data/denoise_output/HaiaoDuan_denoise_output.wav
Normal file
Binary file not shown.
BIN
data/denoise_output/ZiyangZhang_denoise_output.wav
Normal file
BIN
data/denoise_output/ZiyangZhang_denoise_output.wav
Normal file
Binary file not shown.
35
data/record.py
Normal file
35
data/record.py
Normal file
@ -0,0 +1,35 @@
|
|||||||
|
"""
|
||||||
|
本地录音,保存为wav格式,存储在data/speaker_wav目录下
|
||||||
|
"""
|
||||||
|
import pyaudio
|
||||||
|
import wave
|
||||||
|
|
||||||
|
def record_audio(filename, duration=5, format=pyaudio.paInt16, channels=1, rate=16000):
|
||||||
|
"""
|
||||||
|
本地录音,保存为wav格式,存储在data/speaker_wav目录下
|
||||||
|
"""
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
stream = p.open(format=format, channels=channels, rate=rate, input=True, frames_per_buffer=1024)
|
||||||
|
|
||||||
|
print("按下回车键开始录音...")
|
||||||
|
input()
|
||||||
|
frames = []
|
||||||
|
for i in range(0, int(rate / 1024 * duration)):
|
||||||
|
data = stream.read(1024)
|
||||||
|
frames.append(data)
|
||||||
|
print("录音结束")
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
p.terminate()
|
||||||
|
wav_file = wave.open(filename, 'wb')
|
||||||
|
wav_file.setnchannels(channels)
|
||||||
|
wav_file.setsampwidth(p.get_sample_size(format))
|
||||||
|
wav_file.setframerate(rate)
|
||||||
|
wav_file.writeframes(b''.join(frames))
|
||||||
|
wav_file.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
record_audio(
|
||||||
|
"data/speaker_wav/test.wav",
|
||||||
|
duration=5
|
||||||
|
)
|
BIN
data/speaker_wav/HaiaoDuan.wav
Normal file
BIN
data/speaker_wav/HaiaoDuan.wav
Normal file
Binary file not shown.
BIN
data/speaker_wav/HaiaoDuan_origin.wav
Normal file
BIN
data/speaker_wav/HaiaoDuan_origin.wav
Normal file
Binary file not shown.
BIN
data/speaker_wav/ZiyangZhang.wav
Normal file
BIN
data/speaker_wav/ZiyangZhang.wav
Normal file
Binary file not shown.
BIN
data/speaker_wav/ZiyangZhang_origin.wav
Normal file
BIN
data/speaker_wav/ZiyangZhang_origin.wav
Normal file
Binary file not shown.
400
data/speakers.json
Normal file
400
data/speakers.json
Normal file
@ -0,0 +1,400 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b",
|
||||||
|
"speaker_name": "ZiyangZhang",
|
||||||
|
"wav_path": "/home/lyg/Code/funasr/data/speaker_wav/ZiyangZhang.wav",
|
||||||
|
"speaker_embs": [
|
||||||
|
-0.4249887466430664,
|
||||||
|
-0.12976674735546112,
|
||||||
|
1.6118208169937134,
|
||||||
|
1.3348901271820068,
|
||||||
|
0.1423041820526123,
|
||||||
|
0.16940945386886597,
|
||||||
|
-0.042910803109407425,
|
||||||
|
0.9634712934494019,
|
||||||
|
0.9677271246910095,
|
||||||
|
1.1112406253814697,
|
||||||
|
-2.0086846351623535,
|
||||||
|
1.729629635810852,
|
||||||
|
-0.3664000928401947,
|
||||||
|
2.4323978424072266,
|
||||||
|
-1.587996244430542,
|
||||||
|
-1.0803641080856323,
|
||||||
|
0.08011860400438309,
|
||||||
|
1.6515964269638062,
|
||||||
|
-1.1337167024612427,
|
||||||
|
-0.5088973045349121,
|
||||||
|
-1.0002555847167969,
|
||||||
|
0.11426643282175064,
|
||||||
|
-0.8616334199905396,
|
||||||
|
-0.006051262840628624,
|
||||||
|
0.44800689816474915,
|
||||||
|
0.6659525632858276,
|
||||||
|
-0.9864538908004761,
|
||||||
|
2.1259539127349854,
|
||||||
|
-0.49345871806144714,
|
||||||
|
-0.14384664595127106,
|
||||||
|
0.0742349922657013,
|
||||||
|
0.25577273964881897,
|
||||||
|
1.0516602993011475,
|
||||||
|
1.7297064065933228,
|
||||||
|
-0.44126248359680176,
|
||||||
|
1.3971654176712036,
|
||||||
|
0.04305446520447731,
|
||||||
|
-2.261837959289551,
|
||||||
|
-0.355578750371933,
|
||||||
|
-0.8388981819152832,
|
||||||
|
0.8178591728210449,
|
||||||
|
0.016942109912633896,
|
||||||
|
0.8212596774101257,
|
||||||
|
1.108891248703003,
|
||||||
|
-0.5182072520256042,
|
||||||
|
-0.07741295546293259,
|
||||||
|
0.9407528042793274,
|
||||||
|
0.026407398283481598,
|
||||||
|
-0.6210324168205261,
|
||||||
|
-2.0659642219543457,
|
||||||
|
0.13895569741725922,
|
||||||
|
-1.3570973873138428,
|
||||||
|
2.236407995223999,
|
||||||
|
-0.29706746339797974,
|
||||||
|
1.9819035530090332,
|
||||||
|
1.3580390214920044,
|
||||||
|
-0.5505754351615906,
|
||||||
|
0.7189999222755432,
|
||||||
|
-0.3190038502216339,
|
||||||
|
1.1075336933135986,
|
||||||
|
-1.4158482551574707,
|
||||||
|
0.20138776302337646,
|
||||||
|
0.8354343175888062,
|
||||||
|
0.1671304553747177,
|
||||||
|
-0.56927490234375,
|
||||||
|
1.057538390159607,
|
||||||
|
-0.2868591248989105,
|
||||||
|
0.005044424440711737,
|
||||||
|
0.49878695607185364,
|
||||||
|
-0.7493277192115784,
|
||||||
|
2.4639663696289062,
|
||||||
|
0.5516767501831055,
|
||||||
|
-0.2763596177101135,
|
||||||
|
-0.8769170641899109,
|
||||||
|
-1.296872615814209,
|
||||||
|
-0.5233777165412903,
|
||||||
|
-0.10551001876592636,
|
||||||
|
-0.5955559611320496,
|
||||||
|
-0.6046199202537537,
|
||||||
|
0.22645621001720428,
|
||||||
|
1.12480890750885,
|
||||||
|
-0.3678736388683319,
|
||||||
|
-1.1580262184143066,
|
||||||
|
-0.3625229299068451,
|
||||||
|
0.8251489996910095,
|
||||||
|
0.3464623987674713,
|
||||||
|
2.261840581893921,
|
||||||
|
-0.11341957747936249,
|
||||||
|
-0.6645990610122681,
|
||||||
|
0.8480257987976074,
|
||||||
|
-0.47770705819129944,
|
||||||
|
0.8085628747940063,
|
||||||
|
-0.26823946833610535,
|
||||||
|
-0.25040531158447266,
|
||||||
|
1.0610276460647583,
|
||||||
|
-0.14239133894443512,
|
||||||
|
-1.309299349784851,
|
||||||
|
-1.0987954139709473,
|
||||||
|
-0.1301683634519577,
|
||||||
|
-0.05199439451098442,
|
||||||
|
-0.07838833332061768,
|
||||||
|
-0.21310138702392578,
|
||||||
|
0.29347339272499084,
|
||||||
|
1.0793802738189697,
|
||||||
|
-1.813226342201233,
|
||||||
|
-1.1362330913543701,
|
||||||
|
-0.13013578951358795,
|
||||||
|
0.6647212505340576,
|
||||||
|
-0.34312230348587036,
|
||||||
|
0.5921282172203064,
|
||||||
|
0.26284533739089966,
|
||||||
|
0.9369505047798157,
|
||||||
|
0.1739131063222885,
|
||||||
|
0.7924790978431702,
|
||||||
|
0.3412249982357025,
|
||||||
|
0.16646981239318848,
|
||||||
|
-0.32468467950820923,
|
||||||
|
-0.5835385918617249,
|
||||||
|
0.05923287197947502,
|
||||||
|
1.191710352897644,
|
||||||
|
-0.3653518557548523,
|
||||||
|
-0.8665252923965454,
|
||||||
|
0.7419591546058655,
|
||||||
|
-1.7234965562820435,
|
||||||
|
0.3421083092689514,
|
||||||
|
-0.24517370760440826,
|
||||||
|
-0.8724228143692017,
|
||||||
|
-0.11004912108182907,
|
||||||
|
-0.10676378011703491,
|
||||||
|
-1.0688399076461792,
|
||||||
|
0.4397974908351898,
|
||||||
|
-0.9902229309082031,
|
||||||
|
-0.2676651179790497,
|
||||||
|
1.4346729516983032,
|
||||||
|
0.34571582078933716,
|
||||||
|
0.9091840386390686,
|
||||||
|
0.41458258032798767,
|
||||||
|
-0.7863419055938721,
|
||||||
|
0.6952191591262817,
|
||||||
|
0.8847752809524536,
|
||||||
|
0.15871241688728333,
|
||||||
|
-0.10740098357200623,
|
||||||
|
-0.5305340886116028,
|
||||||
|
1.0536329746246338,
|
||||||
|
-1.337695837020874,
|
||||||
|
0.23358777165412903,
|
||||||
|
-0.19285082817077637,
|
||||||
|
-0.5339606404304504,
|
||||||
|
-0.6768214106559753,
|
||||||
|
1.6815600395202637,
|
||||||
|
-0.36710524559020996,
|
||||||
|
-0.22888287901878357,
|
||||||
|
-0.2714850902557373,
|
||||||
|
-0.0895417258143425,
|
||||||
|
0.3480932116508484,
|
||||||
|
-0.19148986041545868,
|
||||||
|
0.44108960032463074,
|
||||||
|
0.03198949620127678,
|
||||||
|
-0.3665091097354889,
|
||||||
|
-0.6040502786636353,
|
||||||
|
0.37234461307525635,
|
||||||
|
-0.07462035119533539,
|
||||||
|
-0.18109525740146637,
|
||||||
|
-0.19882601499557495,
|
||||||
|
0.33298638463020325,
|
||||||
|
0.039957765489816666,
|
||||||
|
0.6185765266418457,
|
||||||
|
1.5921381711959839,
|
||||||
|
0.04164457693696022,
|
||||||
|
-0.7556226849555969,
|
||||||
|
-1.0537445545196533,
|
||||||
|
0.36932048201560974,
|
||||||
|
-0.2881639897823334,
|
||||||
|
-1.3762420415878296,
|
||||||
|
-0.6029151678085327,
|
||||||
|
-1.3592504262924194,
|
||||||
|
0.6726564168930054,
|
||||||
|
0.06349147856235504,
|
||||||
|
-0.4627697765827179,
|
||||||
|
1.1113581657409668,
|
||||||
|
-1.1767970323562622,
|
||||||
|
0.3900119662284851,
|
||||||
|
-0.3050364851951599,
|
||||||
|
-0.2807784676551819,
|
||||||
|
-0.7237444519996643,
|
||||||
|
-0.039161279797554016,
|
||||||
|
0.5845404267311096,
|
||||||
|
-0.4385261833667755,
|
||||||
|
-0.3988557755947113,
|
||||||
|
-1.235430359840393,
|
||||||
|
-0.648483395576477,
|
||||||
|
1.084520936012268
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b",
|
||||||
|
"speaker_name": "HaiaoDuan",
|
||||||
|
"wav_path": "/home/lyg/Code/funasr/data/speaker_wav/HaiaoDuan.wav",
|
||||||
|
"speaker_embs": [
|
||||||
|
-1.3490606546401978,
|
||||||
|
-0.9654964208602905,
|
||||||
|
0.6671794652938843,
|
||||||
|
2.3401081562042236,
|
||||||
|
-1.374346137046814,
|
||||||
|
0.24404077231884003,
|
||||||
|
0.08137784898281097,
|
||||||
|
0.10915698111057281,
|
||||||
|
0.8208633065223694,
|
||||||
|
-1.0312862396240234,
|
||||||
|
1.721955418586731,
|
||||||
|
-0.16976028680801392,
|
||||||
|
-1.0259445905685425,
|
||||||
|
-0.9134035706520081,
|
||||||
|
-1.3709611892700195,
|
||||||
|
-0.6821202635765076,
|
||||||
|
1.0825326442718506,
|
||||||
|
1.4931895732879639,
|
||||||
|
-0.06801076978445053,
|
||||||
|
-0.5044959187507629,
|
||||||
|
-1.3154232501983643,
|
||||||
|
-1.1049765348434448,
|
||||||
|
0.6122218370437622,
|
||||||
|
1.1061663627624512,
|
||||||
|
-0.2288999855518341,
|
||||||
|
-0.03568289428949356,
|
||||||
|
-0.9260172247886658,
|
||||||
|
1.1030527353286743,
|
||||||
|
-0.7439772486686707,
|
||||||
|
1.4323620796203613,
|
||||||
|
0.2221372127532959,
|
||||||
|
-0.8355774283409119,
|
||||||
|
0.6758987307548523,
|
||||||
|
0.8520456552505493,
|
||||||
|
-0.0186605341732502,
|
||||||
|
-0.981821596622467,
|
||||||
|
0.11743613332509995,
|
||||||
|
-0.3539535701274872,
|
||||||
|
-0.33924832940101624,
|
||||||
|
-0.510174036026001,
|
||||||
|
0.6893219351768494,
|
||||||
|
-0.10966216027736664,
|
||||||
|
-1.5873743295669556,
|
||||||
|
1.7041956186294556,
|
||||||
|
-0.9844599366188049,
|
||||||
|
-1.368901252746582,
|
||||||
|
0.44316115975379944,
|
||||||
|
-2.406590700149536,
|
||||||
|
0.9880101680755615,
|
||||||
|
0.8344699740409851,
|
||||||
|
0.22896111011505127,
|
||||||
|
-1.4464795589447021,
|
||||||
|
2.222980260848999,
|
||||||
|
-0.22508130967617035,
|
||||||
|
0.8659772276878357,
|
||||||
|
0.7801474928855896,
|
||||||
|
1.824644923210144,
|
||||||
|
-0.2455991804599762,
|
||||||
|
-0.06682202965021133,
|
||||||
|
0.07106778025627136,
|
||||||
|
-1.8072712421417236,
|
||||||
|
0.7733234763145447,
|
||||||
|
0.20490191876888275,
|
||||||
|
-1.119908094406128,
|
||||||
|
-1.2623472213745117,
|
||||||
|
0.34426289796829224,
|
||||||
|
0.7909225821495056,
|
||||||
|
0.47128093242645264,
|
||||||
|
-0.9976771473884583,
|
||||||
|
-0.6703121662139893,
|
||||||
|
0.7459381818771362,
|
||||||
|
1.0664807558059692,
|
||||||
|
0.659284770488739,
|
||||||
|
-0.49438077211380005,
|
||||||
|
0.1974140703678131,
|
||||||
|
-0.07557231187820435,
|
||||||
|
-1.324866533279419,
|
||||||
|
-1.2217090129852295,
|
||||||
|
-1.0160834789276123,
|
||||||
|
0.7517350912094116,
|
||||||
|
0.06301767379045486,
|
||||||
|
0.8621189594268799,
|
||||||
|
-1.033493161201477,
|
||||||
|
-0.18051855266094208,
|
||||||
|
-0.2633781135082245,
|
||||||
|
0.5859690308570862,
|
||||||
|
1.5803791284561157,
|
||||||
|
-0.7071301341056824,
|
||||||
|
-0.016185184940695763,
|
||||||
|
-0.5259001851081848,
|
||||||
|
-0.6252623796463013,
|
||||||
|
1.4383807182312012,
|
||||||
|
0.6068354845046997,
|
||||||
|
0.39534664154052734,
|
||||||
|
0.22612401843070984,
|
||||||
|
-1.541978120803833,
|
||||||
|
-2.575181484222412,
|
||||||
|
-0.9924071431159973,
|
||||||
|
1.9649298191070557,
|
||||||
|
-1.1940282583236694,
|
||||||
|
-0.6481325030326843,
|
||||||
|
-1.5226261615753174,
|
||||||
|
1.6535273790359497,
|
||||||
|
0.7740333676338196,
|
||||||
|
-1.8780876398086548,
|
||||||
|
0.627184271812439,
|
||||||
|
1.0915889739990234,
|
||||||
|
1.694388508796692,
|
||||||
|
-0.47886598110198975,
|
||||||
|
-0.04895557090640068,
|
||||||
|
0.3620351552963257,
|
||||||
|
0.640113115310669,
|
||||||
|
-0.4149058163166046,
|
||||||
|
-0.18083086609840393,
|
||||||
|
-0.30447620153427124,
|
||||||
|
0.022528085857629776,
|
||||||
|
-0.6550383567810059,
|
||||||
|
-0.3812088668346405,
|
||||||
|
-0.478842169046402,
|
||||||
|
0.6615785360336304,
|
||||||
|
0.49959492683410645,
|
||||||
|
-0.249789759516716,
|
||||||
|
1.7448066473007202,
|
||||||
|
-0.9037050008773804,
|
||||||
|
-0.7441433668136597,
|
||||||
|
0.5949154496192932,
|
||||||
|
-1.1230697631835938,
|
||||||
|
-0.2552490830421448,
|
||||||
|
0.4216223657131195,
|
||||||
|
-0.5870983004570007,
|
||||||
|
0.7283152937889099,
|
||||||
|
-0.13834434747695923,
|
||||||
|
-1.3267407417297363,
|
||||||
|
1.1050132513046265,
|
||||||
|
1.731435775756836,
|
||||||
|
0.3724023103713989,
|
||||||
|
0.830539882183075,
|
||||||
|
-1.032881736755371,
|
||||||
|
0.8204181790351868,
|
||||||
|
0.05735205113887787,
|
||||||
|
0.5442802906036377,
|
||||||
|
-0.7974395751953125,
|
||||||
|
0.18374553322792053,
|
||||||
|
-0.17642715573310852,
|
||||||
|
-0.051413919776678085,
|
||||||
|
-0.2413552850484848,
|
||||||
|
-0.43316808342933655,
|
||||||
|
-0.2594863772392273,
|
||||||
|
1.5363879203796387,
|
||||||
|
0.5056991577148438,
|
||||||
|
-1.3894445896148682,
|
||||||
|
-1.2057586908340454,
|
||||||
|
-0.48546579480171204,
|
||||||
|
-0.2659154236316681,
|
||||||
|
0.9767322540283203,
|
||||||
|
-1.97313392162323,
|
||||||
|
-0.3016327917575836,
|
||||||
|
-0.6123557686805725,
|
||||||
|
0.288481205701828,
|
||||||
|
0.2976057827472687,
|
||||||
|
0.08243764936923981,
|
||||||
|
0.6122551560401917,
|
||||||
|
-0.6019028425216675,
|
||||||
|
-0.10548368841409683,
|
||||||
|
-0.016991911455988884,
|
||||||
|
1.75961172580719,
|
||||||
|
0.6418831944465637,
|
||||||
|
0.3137458264827728,
|
||||||
|
0.25365981459617615,
|
||||||
|
-0.45389246940612793,
|
||||||
|
0.238858163356781,
|
||||||
|
0.2631453275680542,
|
||||||
|
1.1121031045913696,
|
||||||
|
-0.9991472363471985,
|
||||||
|
-0.8904637694358826,
|
||||||
|
-1.1346020698547363,
|
||||||
|
-1.1918814182281494,
|
||||||
|
-1.1205440759658813,
|
||||||
|
-1.486283779144287,
|
||||||
|
1.0530670881271362,
|
||||||
|
-0.583172082901001,
|
||||||
|
0.26391518115997314,
|
||||||
|
1.2654175758361816,
|
||||||
|
-0.8430055975914001,
|
||||||
|
0.21697403490543365,
|
||||||
|
-0.30710718035697937,
|
||||||
|
2.191946506500244,
|
||||||
|
-0.19980488717556,
|
||||||
|
-0.5966204404830933,
|
||||||
|
0.04923265427350998,
|
||||||
|
-0.8815436959266663,
|
||||||
|
0.9289136528968811
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
14
data/speakers.json.backup
Normal file
14
data/speakers.json.backup
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b",
|
||||||
|
"speaker_name": "ZiyangZhang",
|
||||||
|
"wav_path": "/home/lyg/Code/funasr/data/speaker_wav/ZiyangZhang.wav",
|
||||||
|
"speaker_embs": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"speaker_id": "b7e2c8e2-1f3a-4c2a-9e7a-2c1d4e8f9a3b",
|
||||||
|
"speaker_name": "HaiaoDuan",
|
||||||
|
"wav_path": "/home/lyg/Code/funasr/data/speaker_wav/HaiaoDuan.wav",
|
||||||
|
"speaker_embs": ""
|
||||||
|
}
|
||||||
|
]
|
36
main.py
36
main.py
@ -1,30 +1,12 @@
|
|||||||
from funasr import AutoModel
|
from src.server import app
|
||||||
|
import uvicorn
|
||||||
|
from datetime import datetime
|
||||||
|
from src.utils.logger import get_module_logger, setup_root_logger
|
||||||
|
|
||||||
chunk_size = 200 # ms
|
time = format(datetime.now(), "%Y-%m-%d %H:%M:%S")
|
||||||
model = AutoModel(model="fsmn-vad", model_revision="v2.0.4", disable_update=True)
|
setup_root_logger(level="DEBUG", log_file=f"logs/fastapiserver_{time}.log")
|
||||||
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
import soundfile
|
|
||||||
|
|
||||||
wav_file = "tests/vad_example.wav"
|
if __name__ == "__main__":
|
||||||
speech, sample_rate = soundfile.read(wav_file)
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||||
chunk_stride = int(chunk_size * sample_rate / 1000)
|
|
||||||
|
|
||||||
cache = {}
|
|
||||||
total_chunk_num = int(len((speech) - 1) / chunk_stride + 1)
|
|
||||||
for i in range(total_chunk_num):
|
|
||||||
speech_chunk = speech[i * chunk_stride : (i + 1) * chunk_stride]
|
|
||||||
is_final = i == total_chunk_num - 1
|
|
||||||
res = model.generate(
|
|
||||||
input=speech_chunk,
|
|
||||||
cache=cache,
|
|
||||||
is_final=is_final,
|
|
||||||
chunk_size=chunk_size,
|
|
||||||
disable_pbar=True,
|
|
||||||
)
|
|
||||||
if len(res[0]["value"]):
|
|
||||||
print(res)
|
|
||||||
|
|
||||||
print(f"len(speech): {len(speech)}")
|
|
||||||
print(f"len(speech_chunk): {len(speech_chunk)}")
|
|
||||||
print(f"total_chunk_num: {total_chunk_num}")
|
|
||||||
print(f"generateconfig: chunk_size: {chunk_size}, chunk_stride: {chunk_stride}")
|
|
@ -11,6 +11,7 @@ import json
|
|||||||
import torch
|
import torch
|
||||||
import threading
|
import threading
|
||||||
import numpy
|
import numpy
|
||||||
|
import os
|
||||||
|
|
||||||
# 日志
|
# 日志
|
||||||
from src.utils.logger import get_module_logger
|
from src.utils.logger import get_module_logger
|
||||||
@ -60,6 +61,7 @@ class SPKFunctor(BaseFunctor):
|
|||||||
|
|
||||||
def add_speaker(self, speaker: SpeakerCreate) -> None:
|
def add_speaker(self, speaker: SpeakerCreate) -> None:
|
||||||
self._spk_data.append(speaker)
|
self._spk_data.append(speaker)
|
||||||
|
logger.debug("添加说话人: %s", speaker)
|
||||||
|
|
||||||
def verify(self, emb: numpy.ndarray) -> Dict:
|
def verify(self, emb: numpy.ndarray) -> Dict:
|
||||||
# 将输入的numpy embedding转换为tensor
|
# 将输入的numpy embedding转换为tensor
|
||||||
@ -116,6 +118,9 @@ class SPKFunctor(BaseFunctor):
|
|||||||
self._input_queue: Queue = None # 输入队列
|
self._input_queue: Queue = None # 输入队列
|
||||||
self._audio_config: AudioBinary_Config = None # 音频配置
|
self._audio_config: AudioBinary_Config = None # 音频配置
|
||||||
|
|
||||||
|
logger.debug("加载本地说话人数据")
|
||||||
|
self.load_spk_data_local()
|
||||||
|
|
||||||
def load_spk_data_local(
|
def load_spk_data_local(
|
||||||
self,
|
self,
|
||||||
spk_data_path: str = 'data/speakers.json',
|
spk_data_path: str = 'data/speakers.json',
|
||||||
@ -125,9 +130,37 @@ class SPKFunctor(BaseFunctor):
|
|||||||
"""
|
"""
|
||||||
with open(spk_data_path, 'r') as f:
|
with open(spk_data_path, 'r') as f:
|
||||||
spk_data = json.load(f)
|
spk_data = json.load(f)
|
||||||
for spk in spk_data:
|
for i, spk in enumerate(spk_data):
|
||||||
|
logger.debug("加载本地说话人数据: %s", spk)
|
||||||
|
if spk['speaker_embs'] == "" and spk['wav_path'] != "":
|
||||||
|
logger.debug("尝试转换本地wav为embs: %s", spk['wav_path'])
|
||||||
|
try:
|
||||||
|
# 读取数据为numpy数组
|
||||||
|
import soundfile as sf
|
||||||
|
import numpy as np
|
||||||
|
wav_data, sr = sf.read(spk['wav_path'], dtype='int16')
|
||||||
|
# 确保是单通道
|
||||||
|
if wav_data.ndim > 1:
|
||||||
|
wav_data = wav_data[:, 0]
|
||||||
|
# 转为numpy数组后送入pipeline
|
||||||
|
spk['speaker_embs'] = self._sv_pipeline([wav_data], output_emb=True)['embs'][0]
|
||||||
|
logger.debug("转换本地wav为embs: length=%s type=%s", len(spk['speaker_embs']), type(spk['speaker_embs']))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("转换本地wav为embs失败: %s", e)
|
||||||
|
else:
|
||||||
|
logger.debug("加载本地说话人数据: %s", spk)
|
||||||
|
# 将spk的speaker_embs转换为numpy
|
||||||
|
spk['speaker_embs'] = numpy.array(spk['speaker_embs'])
|
||||||
self._spk_verify.add_speaker(SpeakerCreate(**spk))
|
self._spk_verify.add_speaker(SpeakerCreate(**spk))
|
||||||
|
spk['speaker_embs'] = spk['speaker_embs'].tolist()
|
||||||
|
spk_data[i] = spk
|
||||||
|
# 保存更新后的数据
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(spk_data_path, 'w') as f:
|
||||||
|
json.dump(spk_data, f, indent=4)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("保存更新后的数据失败: %s", e)
|
||||||
|
|
||||||
def reset_cache(self) -> None:
|
def reset_cache(self) -> None:
|
||||||
"""
|
"""
|
||||||
|
@ -159,6 +159,7 @@ class VADFunctor(BaseFunctor):
|
|||||||
self._audio_cache = numpy.concatenate((self._audio_cache, data))
|
self._audio_cache = numpy.concatenate((self._audio_cache, data))
|
||||||
elif isinstance(self._audio_cache, list):
|
elif isinstance(self._audio_cache, list):
|
||||||
self._audio_cache.append(data)
|
self._audio_cache.append(data)
|
||||||
|
|
||||||
if self._audiobinary_cache is None:
|
if self._audiobinary_cache is None:
|
||||||
self._audiobinary_cache = data
|
self._audiobinary_cache = data
|
||||||
else:
|
else:
|
||||||
@ -175,17 +176,28 @@ class VADFunctor(BaseFunctor):
|
|||||||
处理数据
|
处理数据
|
||||||
使用model进行生成, 并使用_do_callback进行回调
|
使用model进行生成, 并使用_do_callback进行回调
|
||||||
"""
|
"""
|
||||||
|
if data is None:
|
||||||
|
result = self._model["vad"].generate(
|
||||||
|
input=self._audio_cache,
|
||||||
|
cache=self._model_cache,
|
||||||
|
chunk_size=self._audio_config.chunk_size,
|
||||||
|
is_final=True,
|
||||||
|
)
|
||||||
|
self._do_callback(result[0]["value"])
|
||||||
|
return
|
||||||
|
|
||||||
self._predeal_data(data)
|
self._predeal_data(data)
|
||||||
if len(self._audio_cache) >= self._audio_config.chunk_stride:
|
if len(self._audio_cache) >= self._audio_config.chunk_stride:
|
||||||
result = self._model["vad"].generate(
|
result = self._model["vad"].generate(
|
||||||
input=self._audio_cache,
|
input=self._audio_cache,
|
||||||
cache=self._model_cache,
|
cache=self._model_cache,
|
||||||
chunk_size=self._audio_config.chunk_size,
|
chunk_size=self._audio_config.chunk_size,
|
||||||
|
max_end_silence_time = 300,
|
||||||
is_final=False,
|
is_final=False,
|
||||||
)
|
)
|
||||||
if len(result[0]["value"]) > 0:
|
if len(result[0]["value"]) > 0:
|
||||||
self._do_callback(result[0]["value"])
|
self._do_callback(result[0]["value"])
|
||||||
# logger.debug(f"VADFunctor结果: {result[0]['value']}")
|
logger.debug(f"VADFunctor结果: {result[0]['value']}")
|
||||||
self._audio_cache = None
|
self._audio_cache = None
|
||||||
|
|
||||||
def _run(self):
|
def _run(self):
|
||||||
@ -202,11 +214,11 @@ class VADFunctor(BaseFunctor):
|
|||||||
while self._is_running:
|
while self._is_running:
|
||||||
try:
|
try:
|
||||||
data = self._input_queue.get(True, timeout=1)
|
data = self._input_queue.get(True, timeout=1)
|
||||||
if data is None:
|
# logger.debug("[VADFunctor]获取到的数据length: %s", len(data))
|
||||||
break
|
|
||||||
logger.debug("[VADFunctor]获取到的数据length: %s", len(data))
|
|
||||||
self._process(data)
|
self._process(data)
|
||||||
self._input_queue.task_done()
|
self._input_queue.task_done()
|
||||||
|
if data is None:
|
||||||
|
break
|
||||||
# 当队列为空时, 间隔1s检测是否进入停止事件。
|
# 当队列为空时, 间隔1s检测是否进入停止事件。
|
||||||
except Empty:
|
except Empty:
|
||||||
if self._stop_event:
|
if self._stop_event:
|
||||||
@ -253,66 +265,3 @@ class VADFunctor(BaseFunctor):
|
|||||||
with self._status_lock:
|
with self._status_lock:
|
||||||
self._is_running = False
|
self._is_running = False
|
||||||
return not self._thread.is_alive()
|
return not self._thread.is_alive()
|
||||||
|
|
||||||
|
|
||||||
# class VAD:
|
|
||||||
|
|
||||||
# def __init__(
|
|
||||||
# self,
|
|
||||||
# VAD_model=None,
|
|
||||||
# audio_config: AudioBinary_Config = None,
|
|
||||||
# callback: Callable = None,
|
|
||||||
# ):
|
|
||||||
# # vad model
|
|
||||||
# self.VAD_model = VAD_model
|
|
||||||
# if self.VAD_model is None:
|
|
||||||
# self.VAD_model = AutoModel(
|
|
||||||
# model="fsmn-vad", model_revision="v2.0.4", disable_update=True
|
|
||||||
# )
|
|
||||||
# # audio config
|
|
||||||
# self.audio_config = audio_config
|
|
||||||
# # vad result
|
|
||||||
# self.vad_result = VADResponse(time_chunk_index_callback=callback)
|
|
||||||
# # audio binary poll
|
|
||||||
# self.audio_chunk = AudioChunk(audio_config=self.audio_config)
|
|
||||||
# self.cache = {}
|
|
||||||
|
|
||||||
# def push_binary_data(
|
|
||||||
# self,
|
|
||||||
# binary_data: bytes,
|
|
||||||
# ):
|
|
||||||
# # 压入二进制数据
|
|
||||||
# self.audio_chunk.add_chunk(binary_data)
|
|
||||||
# # 处理音频块
|
|
||||||
# res = self.VAD_model.generate(
|
|
||||||
# input=binary_data,
|
|
||||||
# cache=self.cache,
|
|
||||||
# chunk_size=self.audio_config.chunk_size,
|
|
||||||
# is_final=False,
|
|
||||||
# )
|
|
||||||
# # print("VAD generate", res)
|
|
||||||
# if len(res[0]["value"]):
|
|
||||||
# self.vad_result += VADResponse.from_raw(res)
|
|
||||||
|
|
||||||
# def set_callback(
|
|
||||||
# self,
|
|
||||||
# callback: Callable,
|
|
||||||
# ):
|
|
||||||
# self.vad_result.time_chunk_index_callback = callback
|
|
||||||
|
|
||||||
# def process_vad_result(self, callback: Callable = None):
|
|
||||||
# # 处理VAD结果
|
|
||||||
# callback = (
|
|
||||||
# callback
|
|
||||||
# if callback is not None
|
|
||||||
# else self.vad_result.time_chunk_index_callback
|
|
||||||
# )
|
|
||||||
# self.vad_result.process_time_chunk(
|
|
||||||
# lambda x: callback(
|
|
||||||
# AudioBinary_Chunk(
|
|
||||||
# start_time=x["start_time"],
|
|
||||||
# end_time=x["end_time"],
|
|
||||||
# chunk=self.audio_chunk.get_chunk(x["start_time"], x["end_time"]),
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
# )
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
src/schemas/speaker.py
|
src/models/spk.py
|
||||||
------------------------
|
------------------------
|
||||||
此模块定义与说话人(speakers)表对应的 Pydantic 模型,用于 API 数据验证和序列化。
|
此模块定义与说话人(speakers)表对应的 Pydantic 模型,用于 API 数据验证和序列化。
|
||||||
|
|
||||||
@ -14,10 +14,12 @@ from datetime import datetime
|
|||||||
from typing import Optional, List
|
from typing import Optional, List
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from .base import BaseSchema
|
from src.utils import get_module_logger
|
||||||
|
|
||||||
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
# 基础模型,定义说话人的核心属性
|
# 基础模型,定义说话人的核心属性
|
||||||
class SpeakerBase(BaseSchema):
|
class SpeakerBase(BaseModel):
|
||||||
speaker_id: UUID = Field(
|
speaker_id: UUID = Field(
|
||||||
...,
|
...,
|
||||||
description="说话人唯一标识符"
|
description="说话人唯一标识符"
|
||||||
|
@ -224,7 +224,7 @@ class ASRPipeline(PipelineBase):
|
|||||||
while self._is_running and not self._stop_event:
|
while self._is_running and not self._stop_event:
|
||||||
try:
|
try:
|
||||||
data = self._input_queue.get(timeout=self._queue_timeout)
|
data = self._input_queue.get(timeout=self._queue_timeout)
|
||||||
logger.debug("[ASRpipeline]获取到的数据length: %s", len(data))
|
# logger.debug("[ASRpipeline]获取到的数据length: %s", len(data))
|
||||||
# 检查是否是结束信号
|
# 检查是否是结束信号
|
||||||
if data is None:
|
if data is None:
|
||||||
logger.info("收到结束信号,管道准备停止")
|
logger.info("收到结束信号,管道准备停止")
|
||||||
|
@ -105,7 +105,7 @@ class ASRRunner(RunnerBase):
|
|||||||
data = self._sender.recv()
|
data = self._sender.recv()
|
||||||
if data is None:
|
if data is None:
|
||||||
break
|
break
|
||||||
logger.debug("[ASRRunner][SAR-%s]接收到的数据length: %s", self._name, len(data))
|
# logger.debug("[ASRRunner][SAR-%s]接收到的数据length: %s", self._name, len(data))
|
||||||
self._input_queue.put(data)
|
self._input_queue.put(data)
|
||||||
self.stop()
|
self.stop()
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ class MockWebSocketClient:
|
|||||||
"""Puts data into the receive queue for the `recv` method to consume."""
|
"""Puts data into the receive queue for the `recv` method to consume."""
|
||||||
if data is None:
|
if data is None:
|
||||||
return
|
return
|
||||||
logger.debug("Mock WS put_for_recv length: %s", len(data))
|
# logger.debug("Mock WS put_for_recv length: %s", len(data))
|
||||||
self.receive_queue.put(data)
|
self.receive_queue.put(data)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -1,32 +0,0 @@
|
|||||||
"""
|
|
||||||
-*- coding: utf-8 -*-
|
|
||||||
此模块是ASR的websocket端点, 使用FastAPI的websocket端点
|
|
||||||
"""
|
|
||||||
|
|
||||||
from fastapi import WebSocket, APIRouter
|
|
||||||
|
|
||||||
router = APIRouter()
|
|
||||||
|
|
||||||
from src.runner.ASRRunner import ASRRunner
|
|
||||||
|
|
||||||
ASRRunner_instance = ASRRunner()
|
|
||||||
|
|
||||||
from src.core import ModelLoader
|
|
||||||
model_loader = ModelLoader()
|
|
||||||
args = {
|
|
||||||
"asr_model": "paraformer-zh",
|
|
||||||
"asr_model_revision": "v2.0.4",
|
|
||||||
"vad_model": "fsmn-vad",
|
|
||||||
"vad_model_revision": "v2.0.4",
|
|
||||||
"spk_model": "cam++",
|
|
||||||
"spk_model_revision": "v2.0.2",
|
|
||||||
"audio_update": False,
|
|
||||||
}
|
|
||||||
models = model_loader.load_models(args)
|
|
||||||
|
|
||||||
@router.websocket("/asr_full")
|
|
||||||
async def asr_endpoint(websocket: WebSocket):
|
|
||||||
await websocket.accept()
|
|
||||||
while True:
|
|
||||||
data = await websocket.receive_text()
|
|
||||||
print(data)
|
|
@ -10,6 +10,10 @@ from tests.runner.asr_runner_test import test_asr_runner
|
|||||||
setup_root_logger(level="INFO", log_file="logs/test_main.log")
|
setup_root_logger(level="INFO", log_file="logs/test_main.log")
|
||||||
logger = get_module_logger(__name__)
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
|
# 清空logs/test_main.log文件
|
||||||
|
with open("logs/test_main.log", "w") as f:
|
||||||
|
f.truncate()
|
||||||
|
|
||||||
# from tests.functor.vad_test import test_vad_functor
|
# from tests.functor.vad_test import test_vad_functor
|
||||||
# logger.info("开始测试VAD函数器")
|
# logger.info("开始测试VAD函数器")
|
||||||
# test_vad_functor()
|
# test_vad_functor()
|
||||||
|
BIN
tests/XT_ZZY.wav
Normal file
BIN
tests/XT_ZZY.wav
Normal file
Binary file not shown.
BIN
tests/XT_ZZY_denoise.wav
Normal file
BIN
tests/XT_ZZY_denoise.wav
Normal file
Binary file not shown.
@ -10,6 +10,10 @@ from src.core.model_loader import ModelLoader
|
|||||||
from src.models import AudioBinary_Config
|
from src.models import AudioBinary_Config
|
||||||
from src.utils.mock_websocket import MockWebSocketClient
|
from src.utils.mock_websocket import MockWebSocketClient
|
||||||
|
|
||||||
|
from src.utils.logger import get_module_logger
|
||||||
|
|
||||||
|
logger = get_module_logger(__name__)
|
||||||
|
|
||||||
def test_asr_runner():
|
def test_asr_runner():
|
||||||
"""
|
"""
|
||||||
End-to-end test for ASRRunner.
|
End-to-end test for ASRRunner.
|
||||||
@ -32,17 +36,18 @@ def test_asr_runner():
|
|||||||
"audio_update": False,
|
"audio_update": False,
|
||||||
}
|
}
|
||||||
models = model_loader.load_models(args)
|
models = model_loader.load_models(args)
|
||||||
audio_data, sample_rate = soundfile.read("tests/vad_example.wav")
|
audio_file_path = "tests/XT_ZZY_denoise.wav"
|
||||||
|
audio_data, sample_rate = soundfile.read(audio_file_path)
|
||||||
|
logger.info(f"加载数据: {audio_file_path} , audio_data_length: {len(audio_data)}, audio_data_type: {type(audio_data)}, sample_rate: {sample_rate}")
|
||||||
# 2. Configure audio
|
# 2. Configure audio
|
||||||
audio_config = AudioBinary_Config(
|
audio_config = AudioBinary_Config(
|
||||||
chunk_size=200, # ms
|
chunk_size=200, # ms
|
||||||
chunk_stride=1600, # 10ms stride for 16kHz
|
chunk_stride=1000, # 10ms stride for 16kHz
|
||||||
sample_rate=sample_rate,
|
sample_rate=sample_rate,
|
||||||
sample_width=2, # 16-bit
|
sample_width=2, # 16-bit
|
||||||
channels=1,
|
channels=2,
|
||||||
)
|
)
|
||||||
audio_config.chunk_stride = int(audio_config.chunk_stride * sample_rate / 1000)
|
audio_config.chunk_stride = int(audio_config.chunk_size * sample_rate / 1000)
|
||||||
|
|
||||||
# 3. Setup ASRRunner
|
# 3. Setup ASRRunner
|
||||||
asr_runner = ASRRunner()
|
asr_runner = ASRRunner()
|
||||||
@ -70,6 +75,6 @@ def test_asr_runner():
|
|||||||
mock_ws.put_for_recv(chunk)
|
mock_ws.put_for_recv(chunk)
|
||||||
|
|
||||||
# 6. Wait for results and assert
|
# 6. Wait for results and assert
|
||||||
time.sleep(10)
|
time.sleep(30)
|
||||||
# Signal end of audio stream by sending None
|
# Signal end of audio stream by sending None
|
||||||
mock_ws.put_for_recv(None)
|
mock_ws.put_for_recv(None)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user