STT_Server/tests/runner/asr_runner_test.py

81 lines
2.6 KiB
Python

"""
ASRRunner test
"""
import queue
import time
import soundfile
import numpy as np
from src.runner.ASRRunner import ASRRunner
from src.core.model_loader import ModelLoader
from src.models import AudioBinary_Config
from src.utils.mock_websocket import MockWebSocketClient
from src.utils.logger import get_module_logger
logger = get_module_logger(__name__)
def test_asr_runner():
"""
End-to-end test for ASRRunner.
1. Loads models.
2. Configures and initializes ASRRunner.
3. Creates a mock WebSocket client.
4. Starts a new SenderAndReceiver (SAR) instance in the runner.
5. Streams audio data via the mock WebSocket.
6. Asserts that the received transcription matches the expected text.
"""
# 1. Load models
model_loader = ModelLoader()
args = {
"asr_model": "paraformer-zh",
"asr_model_revision": "v2.0.4",
"vad_model": "fsmn-vad",
"vad_model_revision": "v2.0.4",
"spk_model": "cam++",
"spk_model_revision": "v2.0.2",
"audio_update": False,
}
models = model_loader.load_models(args)
audio_file_path = "tests/XT_ZZY_denoise.wav"
audio_data, sample_rate = soundfile.read(audio_file_path)
logger.info(f"加载数据: {audio_file_path} , audio_data_length: {len(audio_data)}, audio_data_type: {type(audio_data)}, sample_rate: {sample_rate}")
# 2. Configure audio
audio_config = AudioBinary_Config(
chunk_size=200, # ms
chunk_stride=1000, # 10ms stride for 16kHz
sample_rate=sample_rate,
sample_width=2, # 16-bit
channels=2,
)
audio_config.chunk_stride = int(audio_config.chunk_size * sample_rate / 1000)
# 3. Setup ASRRunner
asr_runner = ASRRunner()
asr_runner.set_default_config(
audio_config=audio_config,
models=models,
)
# 4. Create Mock WebSocket and start SAR
mock_ws = MockWebSocketClient()
sar_id = asr_runner.new_SAR(
ws=mock_ws,
name="test_sar",
)
assert sar_id is not None, "Failed to create a new SAR instance"
# 5. Simulate streaming audio
print(f"Sending audio data of length {len(audio_data)} samples.")
audio_clip_len = 200
for i in range(0, len(audio_data), audio_clip_len):
chunk = audio_data[i : i + audio_clip_len]
if not isinstance(chunk, np.ndarray) or chunk.size == 0:
break
# Simulate receiving binary data over WebSocket
mock_ws.put_for_recv(chunk)
# 6. Wait for results and assert
time.sleep(30)
# Signal end of audio stream by sending None
mock_ws.put_for_recv(None)