Files
agent_jrxml/backend/llm.py
T
panda 70614dff5e feat: comprehensive v2 upgrade — streaming, error KB, file upload, layout analysis
Major changes:
- Streaming: LLM统一 _BaseLLM 接口 (invoke + stream), generate/modify/correct
  节点使用 get_stream_writer() 实现逐字输出, UI 节点平铺展开自动折叠
- Prompt外部化: 7个prompt拆分到 prompts/*.md, loader.py 支持热重载
- 错误自增长: backend/error_kb.py — 指纹去重 + ChromaDB持久化,
  correct_jrxml→validate 通过时自动入库, retrieve同时搜索错误KB
- 文件上传: backend/file_parser.py — PDF/DOCX/图片/文本解析,
  侧边栏多文件上传, 文本自动注入下一条消息
- A4模板识别: backend/layout_analyzer.py — 三种模式(完整A4/行片段修改/行片段新建),
  PaddleOCR元素提取 + 行分组 + JRXML section匹配
- 会话历史下载: jrxml_versions版本追踪 + 侧边栏历史版本下载按钮
- 预览修复: route_after_save跳过预览/导出意图的验证循环
- Ctrl+C修复: JS注入拦截Streamlit裸c键清缓存

Docs: CLAUDE.md (完整项目文档), ROADMAP.md (改进路线图)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-19 15:02:53 +08:00

106 lines
3.4 KiB
Python

"""大语言模型工厂:支持 OpenAI 兼容的云端 API、Anthropic 兼容 API 和本地 Ollama。"""
import os
from typing import Any
from dotenv import load_dotenv
load_dotenv()
class _BaseLLM:
"""LLM 统一接口基类 — 所有后端都提供 invoke() 和 stream()。"""
def invoke(self, prompt: str) -> Any:
raise NotImplementedError
def stream(self, prompt: str):
raise NotImplementedError
def get_llm():
backend = os.getenv("LLM_BACKEND", "cloud")
if backend == "local":
from langchain_ollama import ChatOllama
model = os.getenv("LOCAL_LLM_MODEL", "qwen2.5-coder:7b")
raw = ChatOllama(model=model, temperature=0.1)
class OllamaWrapper(_BaseLLM):
def invoke(self, prompt):
return raw.invoke(prompt)
def stream(self, prompt):
for chunk in raw.stream(prompt):
yield chunk.content
return OllamaWrapper()
provider = os.getenv("LLM_PROVIDER", "openai")
if provider == "anthropic":
from anthropic import Anthropic
api_key = os.getenv("OPENAI_API_KEY", "")
base_url = os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
model = os.getenv("LLM_MODEL", "minimax-2.7")
temperature = 0.1
max_tokens = 4096
os.environ["NO_PROXY"] = "*"
client = Anthropic(api_key=api_key, base_url=base_url, timeout=120)
class MiniMaxLLM(_BaseLLM):
def invoke(self, prompt: str) -> Any:
resp = client.messages.create(
model=model,
max_tokens=max_tokens,
temperature=temperature,
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
)
for block in resp.content:
if block.type == "text":
return type("Response", (), {"content": block.text})()
return type("Response", (), {"content": ""})()
def stream(self, prompt: str):
with client.messages.stream(
model=model,
max_tokens=max_tokens,
temperature=temperature,
messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
) as s:
for text in s.text_stream:
yield text
def get_num_tokens(self, text: str) -> int:
resp = client.messages.count_tokens(
model=model,
messages=[{"role": "user", "content": [{"type": "text", "text": text}]}],
)
return resp.input_tokens
return MiniMaxLLM()
else:
from langchain_openai import ChatOpenAI
raw = ChatOpenAI(
model=os.getenv("LLM_MODEL", "gpt-4o"),
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
temperature=0.1,
)
class OpenAIWrapper(_BaseLLM):
def invoke(self, prompt):
return raw.invoke(prompt)
def stream(self, prompt):
for chunk in raw.stream(prompt):
yield chunk.content
return OpenAIWrapper()
def get_llm_for_correction():
return get_llm()