feat: 添加结构化日志系统,更新LLM配置与全部文档

新增:
- backend/logger.py — 集中日志模块 (JSON格式 + trace_id + 独立llm.log)
- @log_node / @_log_route 装饰器覆盖17个节点和8个路由

改进:
- backend/llm.py — _LLMLoggingWrapper 自动记录LLM输入输出
- backend/llm.py — API Key优先读ANTHROPIC_API_KEY,模型名改为MiniMax-M2.7
- backend/llm.py — get_llm() 新增caller参数标识调用来源
- backend/validation.py — 新增验证结果/连接失败日志
- backend/session.py — 新增会话创建/删除日志
- app.py — 新增用户交互日志 (输入/执行/异常/会话操作)
- app.py — 提前导入torchvision抑制transformers懒加载报错
- .env.example — 新增LOG_DIR/LOG_LEVEL/ANTHROPIC_API_KEY等配置项
- .gitignore — 新增logs/和db/忽略规则

文档:
- ROADMAP.md — 新增阶段四: 可观测性
- README.md — 补充日志架构/LLM配置/项目结构
- CLAUDE.md — 同步最新配置/日志/MAX_RETRY(3)
- CODE_GUIDE.md — 新增第15章日志系统,更新架构图/LLM/配置
This commit is contained in:
2026-05-19 23:40:01 +08:00
parent 6467fd4ae5
commit 067880bf2e
13 changed files with 753 additions and 82 deletions
+138 -11
View File
@@ -1,12 +1,17 @@
"""大语言模型工厂:支持 OpenAI 兼容的云端 API、Anthropic 兼容 API 和本地 Ollama。"""
import os
import time
from typing import Any
from dotenv import load_dotenv
from backend.logger import get_logger
load_dotenv()
_llm_log = get_logger("llm")
class _BaseLLM:
"""LLM 统一接口基类 — 所有后端都提供 invoke() 和 stream()。"""
@@ -18,7 +23,124 @@ class _BaseLLM:
raise NotImplementedError
def get_llm():
class _LLMLoggingWrapper(_BaseLLM):
"""包装任何 LLM 后端,自动记录输入/输出到 llm.log。"""
def __init__(self, inner: _BaseLLM, model: str, backend: str, caller: str = ""):
self._inner = inner
self._model = model
self._backend = backend
self._caller = caller
def invoke(self, prompt: str) -> Any:
t0 = time.time()
prompt_len = len(prompt)
prompt_preview = prompt[:500]
_llm_log.debug(
"LLM invoke 请求",
extra={
"direction": "request",
"model": self._model,
"backend": self._backend,
"caller": self._caller,
"prompt_length": prompt_len,
"prompt_preview": prompt_preview,
"prompt": prompt[:10000],
},
)
try:
result = self._inner.invoke(prompt)
elapsed = round((time.time() - t0) * 1000)
content = getattr(result, "content", str(result))
resp_len = len(content)
resp_preview = content[:500]
_llm_log.info(
"LLM invoke 完成",
extra={
"direction": "response",
"model": self._model,
"backend": self._backend,
"caller": self._caller,
"duration_ms": elapsed,
"response_length": resp_len,
"response_preview": resp_preview,
"response": content[:10000],
},
)
return result
except Exception as e:
elapsed = round((time.time() - t0) * 1000)
_llm_log.error(
"LLM invoke 异常",
extra={
"direction": "error",
"model": self._model,
"backend": self._backend,
"caller": self._caller,
"duration_ms": elapsed,
"error": str(e),
"prompt": prompt[:10000],
},
)
raise
def stream(self, prompt: str):
t0 = time.time()
prompt_len = len(prompt)
prompt_preview = prompt[:500]
_llm_log.debug(
"LLM stream 请求",
extra={
"direction": "request",
"model": self._model,
"backend": self._backend,
"caller": self._caller,
"prompt_length": prompt_len,
"prompt_preview": prompt_preview,
"prompt": prompt[:10000],
},
)
full = []
try:
for chunk in self._inner.stream(prompt):
full.append(chunk)
yield chunk
elapsed = round((time.time() - t0) * 1000)
resp_text = "".join(full)
resp_len = len(resp_text)
resp_preview = resp_text[:500]
_llm_log.info(
"LLM stream 完成",
extra={
"direction": "response",
"model": self._model,
"backend": self._backend,
"caller": self._caller,
"duration_ms": elapsed,
"response_length": resp_len,
"response_preview": resp_preview,
"response": resp_text[:10000],
},
)
except Exception as e:
elapsed = round((time.time() - t0) * 1000)
_llm_log.error(
"LLM stream 异常",
extra={
"direction": "error",
"model": self._model,
"backend": self._backend,
"caller": self._caller,
"duration_ms": elapsed,
"error": str(e),
"prompt": prompt[:10000],
},
)
raise
def _build_raw_llm(caller: str = "") -> tuple[_BaseLLM, str, str]:
"""构造原始 LLM 实例,返回 (实例, model名, backend名)。"""
backend = os.getenv("LLM_BACKEND", "cloud")
if backend == "local":
from langchain_ollama import ChatOllama
@@ -34,20 +156,18 @@ def get_llm():
for chunk in raw.stream(prompt):
yield chunk.content
return OllamaWrapper()
return OllamaWrapper(), model, f"local/{model}"
provider = os.getenv("LLM_PROVIDER", "openai")
if provider == "anthropic":
from anthropic import Anthropic
api_key = os.getenv("OPENAI_API_KEY", "")
base_url = os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
model = os.getenv("LLM_MODEL", "minimax-2.7")
api_key = os.getenv("ANTHROPIC_API_KEY") or os.getenv("OPENAI_API_KEY", "")
base_url = os.getenv("ANTHROPIC_BASE_URL") or os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
model = os.getenv("LLM_MODEL", "MiniMax-M2.7")
temperature = 0.1
max_tokens = 4096
os.environ["NO_PROXY"] = "*"
client = Anthropic(api_key=api_key, base_url=base_url, timeout=120)
class MiniMaxLLM(_BaseLLM):
@@ -80,12 +200,13 @@ def get_llm():
)
return resp.input_tokens
return MiniMaxLLM()
return MiniMaxLLM(), model, f"cloud/anthropic/{model}"
else:
from langchain_openai import ChatOpenAI
model = os.getenv("LLM_MODEL", "gpt-4o")
raw = ChatOpenAI(
model=os.getenv("LLM_MODEL", "gpt-4o"),
model=model,
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
temperature=0.1,
@@ -99,8 +220,14 @@ def get_llm():
for chunk in raw.stream(prompt):
yield chunk.content
return OpenAIWrapper()
return OpenAIWrapper(), model, f"cloud/openai/{model}"
def get_llm(caller: str = "") -> _BaseLLM:
"""返回带日志的 LLM 实例。caller 用于标识调用来源(如 generate、classify_intent)。"""
inner, model, backend = _build_raw_llm(caller)
return _LLMLoggingWrapper(inner, model=model, backend=backend, caller=caller)
def get_llm_for_correction():
return get_llm()
return get_llm(caller="correction")
+167
View File
@@ -0,0 +1,167 @@
"""集中日志模块。
提供:
- 结构化 JSON 日志(每行一条记录)
- 请求级 trace_id(通过 contextvars 自动传播)
- 独立的 LLM 调用日志文件
- 日志轮转(按大小 10MB,保留 5 个备份)
用法:
from backend.logger import get_logger, set_trace_id
# 业务日志
log = get_logger("agent")
log.info("节点开始执行", extra={"node": "classify_intent", "session_id": "xxx"})
# LLM 日志
llm_log = get_logger("llm")
llm_log.info("LLM 请求", extra={"prompt": "...", "model": "gpt-4o"})
"""
import json
import logging
import os
import sys
import uuid
from contextvars import ContextVar
from datetime import datetime, timezone, timedelta
from logging.handlers import RotatingFileHandler
from pathlib import Path
from typing import Optional
LOG_DIR = Path(os.getenv("LOG_DIR", "./logs"))
LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG")
LLM_LOG_FILE = "llm.log"
APP_LOG_FILE = "app.log"
CHINA_TZ = timezone(timedelta(hours=8))
_trace_id_var: ContextVar[str] = ContextVar("trace_id", default="")
def generate_trace_id() -> str:
return uuid.uuid4().hex[:16]
def get_trace_id() -> str:
tid = _trace_id_var.get()
if not tid:
tid = generate_trace_id()
_trace_id_var.set(tid)
return tid
def set_trace_id(trace_id: str):
_trace_id_var.set(trace_id)
class JsonFormatter(logging.Formatter):
"""将日志记录格式化为单行 JSON,便于后续分析。
LogRecord 标准属性的键(不放入 extra)。
通过 logging.Logger.debug(msg, extra={...}) 传入的键会自动设为
LogRecord 属性,由本格式化器收集到 extra 字段中。
"""
_STANDARD_ATTRS: set[str] = frozenset({
"args", "asctime", "created", "exc_info", "exc_text", "filename",
"funcName", "levelname", "levelno", "lineno", "module", "msecs",
"message", "msg", "name", "pathname", "process", "processName",
"relativeCreated", "stack_info", "thread", "threadName",
"extra_fields", "taskName",
})
def _collect_extra(self, record: logging.LogRecord) -> dict:
"""从 LogRecord 上收集非标准属性 → 合并为 extra dict。"""
extra = dict(getattr(record, "extra_fields", {}))
for key, val in record.__dict__.items():
if key not in self._STANDARD_ATTRS and not key.startswith("_"):
extra[key] = val
return extra
def format(self, record: logging.LogRecord) -> str:
log_entry = {
"timestamp": datetime.now(CHINA_TZ).isoformat(),
"level": record.levelname,
"logger": record.name,
"trace_id": get_trace_id(),
"message": record.getMessage(),
"module": record.module,
"function": record.funcName,
"line": record.lineno,
}
extra = self._collect_extra(record)
if extra:
log_entry["extra"] = extra
if record.exc_info and record.exc_info[0]:
import traceback
log_entry["exception"] = traceback.format_exception(
record.exc_info[0], record.exc_info[1], record.exc_info[2]
)
return json.dumps(log_entry, ensure_ascii=False)
def _create_handler(filename: str, level: int) -> RotatingFileHandler:
handler = RotatingFileHandler(
filename=str(LOG_DIR / filename),
maxBytes=10 * 1024 * 1024,
backupCount=5,
encoding="utf-8",
)
handler.setLevel(level)
handler.setFormatter(JsonFormatter())
return handler
def _get_level() -> int:
return getattr(logging, LOG_LEVEL.upper(), logging.DEBUG)
def get_logger(name: str) -> logging.Logger:
"""获取指定名称的 logger,自动配置了 JSON 格式化 + 文件轮转。
name="llm" → 输出到 logs/llm.log(仅 LLM 调用相关)
其他 name → 输出到 logs/app.log
"""
logger = logging.getLogger(f"jrxml.{name}")
if logger.handlers:
return logger
LOG_DIR.mkdir(parents=True, exist_ok=True)
level = _get_level()
logger.setLevel(level)
logger.propagate = False
if name == "llm":
logger.addHandler(_create_handler(LLM_LOG_FILE, level))
else:
logger.addHandler(_create_handler(APP_LOG_FILE, level))
return logger
class _ExtraAdapter(logging.LoggerAdapter):
"""支持通过 adapter.extra 合并 extra 字段的适配器。"""
def process(self, msg, kwargs):
extra = kwargs.pop("extra", {})
merged = {**self.extra, **extra} if self.extra or extra else None
if merged:
kwargs["extra"] = {"extra_fields": merged}
return msg, kwargs
def get_trace_logger(name: str) -> _ExtraAdapter:
"""返回一个自动附带 trace_id 的 logger 适配器。
用法:
log = get_trace_logger("agent")
log.info("节点完成", extra={"node": "generate"})
"""
logger = get_logger(name)
return _ExtraAdapter(logger, {"trace_id": get_trace_id()})
+6
View File
@@ -12,8 +12,12 @@ from typing import Optional
from dotenv import load_dotenv
from backend.logger import get_logger
load_dotenv()
_session_log = get_logger("session")
SESSIONS_DIR = Path(os.getenv("SESSIONS_DIR", "./sessions"))
@@ -43,6 +47,7 @@ def create_session(name: str = "", agent_state: Optional[dict] = None) -> dict:
}
with open(_session_path(sid), "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
_session_log.info("创建会话", extra={"session_id": sid, "session_name": data["session_name"]})
return data
@@ -104,6 +109,7 @@ def delete_session(session_id: str) -> bool:
fp = _session_path(session_id)
if fp.exists():
fp.unlink()
_session_log.info("删除会话", extra={"session_id": session_id})
return True
return False
+21 -6
View File
@@ -5,22 +5,37 @@ import os
import httpx
from dotenv import load_dotenv
from backend.logger import get_logger
load_dotenv()
_val_log = get_logger("validation")
VALIDATION_URL = os.getenv("VALIDATION_SERVICE_URL", "http://localhost:8001/validate")
def validate_jrxml(jrxml_text: str) -> dict:
"""将 JRXML 发送到验证服务并返回 {valid: bool, error: str}。"""
jrxml_length = len(jrxml_text)
try:
with httpx.Client(timeout=30.0) as client:
resp = client.post(VALIDATION_URL, json={"jrxml": jrxml_text})
resp.raise_for_status()
return resp.json()
result = resp.json()
_val_log.info(
"验证完成",
extra={
"valid": result.get("valid"),
"error": result.get("error", ""),
"jrxml_length": jrxml_length,
},
)
return result
except httpx.ConnectError:
return {
"valid": False,
"error": f"无法连接到验证服务 ({VALIDATION_URL})。是否正在运行?",
}
error_msg = f"无法连接到验证服务 ({VALIDATION_URL})。是否正在运行?"
_val_log.error("验证服务连接失败", extra={"error": error_msg, "url": VALIDATION_URL})
return {"valid": False, "error": error_msg}
except Exception as e:
return {"valid": False, "error": f"验证请求失败: {str(e)}"}
error_msg = f"验证请求失败: {str(e)}"
_val_log.error("验证请求异常", extra={"error": str(e), "url": VALIDATION_URL})
return {"valid": False, "error": error_msg}