Merge branch 'feature_logger' into dev

[Feature] 添加了logger用于管理日志,同时测试了ASR、PUNC、SPK模型效果;
[BUG] 发现BUG:使用funasr的一些模块会导致logger被更改,这一点需要进一步讨论解决方案
This commit is contained in:
Ziyang.Zhang 2025-04-16 14:30:40 +08:00
commit 1392168126
5 changed files with 136 additions and 18 deletions

View File

@ -5,16 +5,12 @@
"""
import numpy as np
import logging
from src.utils.logger import get_module_logger
from typing import List, Optional, Union
from src.models import AudioBinary_Config
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('AudioChunk')
logger = get_module_logger(__name__, level="INFO")
class AudioChunk:
"""音频数据块管理类用于存储和处理16KHz音频数据"""

View File

@ -4,15 +4,10 @@
逻辑触发器类 - 用于处理音频数据并触发相应的处理逻辑
"""
import logging
from src.utils.logger import get_module_logger
from typing import Any, Dict, Type, Callable
# 配置日志
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('LogicTrager')
logger = get_module_logger(__name__, level="INFO")
class AutoAfterMeta(type):
"""
@ -143,7 +138,7 @@ class LogicTrager(metaclass=AutoAfterMeta):
"""
音频处理
"""
print("LogicTrager push_audio_chunk [{}ms:{}ms] (len={})".format(chunk.start_time, chunk.end_time, len(chunk.chunk)))
logger.info("LogicTrager push_audio_chunk [{}ms:{}ms] (len={})".format(chunk.start_time, chunk.end_time, len(chunk.chunk)))
self._audio_chunk.append(chunk)
def __after__push_audio_chunk(self) -> None:
@ -162,6 +157,7 @@ class LogicTrager(metaclass=AutoAfterMeta):
"""
压入结果队列后处理
"""
logger.info("FINISH Result=")
pass
def __call__(self):

91
src/utils/logger.py Normal file
View File

@ -0,0 +1,91 @@
import logging
import sys
from pathlib import Path
from typing import Optional
def setup_logger(
name: str = None,
level: str = "INFO",
log_file: Optional[str] = None,
log_format: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
date_format: str = "%Y-%m-%d %H:%M:%S",
) -> logging.Logger:
"""
设置并返回一个配置好的logger实例
Args:
name: logger的名称默认为None使用root logger
level: 日志级别默认为"INFO"
log_file: 日志文件路径默认为None仅控制台输出
log_format: 日志格式
date_format: 日期格式
Returns:
logging.Logger: 配置好的logger实例
"""
# 获取logger实例
logger = logging.getLogger(name)
# 设置日志级别
level = getattr(logging, level.upper())
logger.setLevel(level)
print(f"添加处理器 {name} {log_file} {log_format} {date_format}")
# 创建格式器
formatter = logging.Formatter(log_format, date_format)
# 添加控制台处理器
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setFormatter(formatter)
logger.addHandler(console_handler)
# 如果指定了日志文件,添加文件处理器
if log_file:
# 确保日志目录存在
log_path = Path(log_file)
log_path.parent.mkdir(parents=True, exist_ok=True)
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# 注意:移除了 propagate = False允许日志传递
return logger
def setup_root_logger(
level: str = "INFO",
log_file: Optional[str] = None
) -> None:
"""
配置根日志器
Args:
level: 日志级别
log_file: 日志文件路径
"""
setup_logger(None, level, log_file)
def get_module_logger(
module_name: str,
level: Optional[str] = None, # 改为可选参数
log_file: Optional[str] = None # 一般不需要单独指定
) -> logging.Logger:
"""
获取模块级别的logger
Args:
module_name: 模块名称通常传入__name__
level: 可选的日志级别如果不指定则继承父级配置
log_file: 可选的日志文件路径一般不需要指定
"""
logger = logging.getLogger(module_name)
# 只有显式指定了level才设置
if level:
logger.setLevel(getattr(logging, level.upper()))
# 只有显式指定了log_file才添加文件处理器
if log_file:
setup_logger(module_name, level or "INFO", log_file)
return logger

View File

@ -1,4 +1,22 @@
from tests.modelsuse import vad_model_use_online_logic
from src.utils.logger import get_module_logger, setup_root_logger
from tests.modelsuse import vad_model_use_online_logic, asr_model_use_offline
import json
setup_root_logger(level="INFO",log_file="logs/test_main.log")
logger = get_module_logger(__name__)
logger.info("开始测试")
vad_result = vad_model_use_online_logic("tests/vad_example.wav")
print(vad_result)
logger.info("测试结束")
if vad_result is None:
logger.warning("VAD结果为空")
else:
logger.info(f"VAD结果: {vad_result}")
asr_result = asr_model_use_offline("tests/vad_example.wav")
# asr_result str->dict
setup_root_logger(level="INFO",log_file="logs/test_main.log")
result = asr_result[0]['sentence_info']
for item in result:
#[{'start': 70, 'end': 2340, 'sentence': '试 错 的 过 程 很 简 单', 'timestamp': [[380, 620], [640, 740], [740, 940], [940, 1020], [1020, 1260], [1500, 1740], [1740, 1840], [1840, 2135]], 'spk': 0}
logger.info(f"spk[{item['spk']}] [{item['start']}ms:{item['end']}ms] {item['sentence'].replace(' ', '')}")

View File

@ -58,8 +58,25 @@ def vad_model_use_online_logic(file_path: str) -> List[Dict[str, Any]]:
# for item in items:
# print(item)
return None
def asr_model_use_offline(file_path: str) -> List[Dict[str, Any]]:
from funasr import AutoModel
model = AutoModel(model="paraformer-zh", model_revision="v2.0.4",
vad_model="fsmn-vad", vad_model_revision="v2.0.4",
# punc_model="ct-punc-c", punc_model_revision="v2.0.4",
spk_model="cam++", spk_model_revision="v2.0.2",
spk_mode="vad_segment",
auto_update=False,
)
import soundfile
from src.models import AudioBinary_Config
import soundfile
speech, sample_rate = soundfile.read(file_path)
result = model.generate(speech)
return result
if __name__ == "__main__":
# vad_result = vad_model_use_online("tests/vad_example.wav")