70614dff5e
Major changes: - Streaming: LLM统一 _BaseLLM 接口 (invoke + stream), generate/modify/correct 节点使用 get_stream_writer() 实现逐字输出, UI 节点平铺展开自动折叠 - Prompt外部化: 7个prompt拆分到 prompts/*.md, loader.py 支持热重载 - 错误自增长: backend/error_kb.py — 指纹去重 + ChromaDB持久化, correct_jrxml→validate 通过时自动入库, retrieve同时搜索错误KB - 文件上传: backend/file_parser.py — PDF/DOCX/图片/文本解析, 侧边栏多文件上传, 文本自动注入下一条消息 - A4模板识别: backend/layout_analyzer.py — 三种模式(完整A4/行片段修改/行片段新建), PaddleOCR元素提取 + 行分组 + JRXML section匹配 - 会话历史下载: jrxml_versions版本追踪 + 侧边栏历史版本下载按钮 - 预览修复: route_after_save跳过预览/导出意图的验证循环 - Ctrl+C修复: JS注入拦截Streamlit裸c键清缓存 Docs: CLAUDE.md (完整项目文档), ROADMAP.md (改进路线图) Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
49 lines
1.8 KiB
Python
49 lines
1.8 KiB
Python
"""嵌入模型工厂:支持本地 Sentence-Transformers 和云端 API。
|
|
|
|
调用方式:
|
|
get_embeddings() → LangChain 兼容的 embeddings 对象
|
|
get_st_model() → 原始 SentenceTransformer 实例
|
|
"""
|
|
|
|
import os
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
|
|
def get_embeddings():
|
|
"""返回 LangChain 兼容的 embeddings 对象(用于 langchain_chroma 等)。"""
|
|
backend = os.getenv("EMBED_BACKEND", "local")
|
|
if backend == "cloud":
|
|
from langchain_openai import OpenAIEmbeddings
|
|
|
|
return OpenAIEmbeddings(
|
|
model=os.getenv("EMBED_CLOUD_MODEL", "text-embedding-3-small"),
|
|
api_key=os.getenv("OPENAI_API_KEY"),
|
|
base_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
|
|
)
|
|
else:
|
|
try:
|
|
from langchain_huggingface import HuggingFaceEmbeddings
|
|
except ImportError:
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
|
model = os.getenv("RAG_EMBED_MODEL", os.getenv("LOCAL_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B"))
|
|
return HuggingFaceEmbeddings(model_name=model)
|
|
|
|
|
|
def get_st_model():
|
|
"""返回原始 SentenceTransformer 实例(与 rag_jrxml 子模块使用方式一致)。"""
|
|
import torch
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
model_name = os.getenv("RAG_EMBED_MODEL", os.getenv("LOCAL_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B"))
|
|
use_gpu = os.getenv("RAG_USE_GPU", "true").lower() in ("true", "1")
|
|
use_fp16 = os.getenv("RAG_USE_FP16", "true").lower() in ("true", "1")
|
|
|
|
device = "cuda" if (use_gpu and torch.cuda.is_available()) else "cpu"
|
|
model = SentenceTransformer(model_name, device=device)
|
|
if device == "cuda" and use_fp16:
|
|
model = model.half()
|
|
return model
|