Files
agent_jrxml/backend/embeddings.py
panda 70614dff5e feat: comprehensive v2 upgrade — streaming, error KB, file upload, layout analysis
Major changes:
- Streaming: LLM统一 _BaseLLM 接口 (invoke + stream), generate/modify/correct
  节点使用 get_stream_writer() 实现逐字输出, UI 节点平铺展开自动折叠
- Prompt外部化: 7个prompt拆分到 prompts/*.md, loader.py 支持热重载
- 错误自增长: backend/error_kb.py — 指纹去重 + ChromaDB持久化,
  correct_jrxml→validate 通过时自动入库, retrieve同时搜索错误KB
- 文件上传: backend/file_parser.py — PDF/DOCX/图片/文本解析,
  侧边栏多文件上传, 文本自动注入下一条消息
- A4模板识别: backend/layout_analyzer.py — 三种模式(完整A4/行片段修改/行片段新建),
  PaddleOCR元素提取 + 行分组 + JRXML section匹配
- 会话历史下载: jrxml_versions版本追踪 + 侧边栏历史版本下载按钮
- 预览修复: route_after_save跳过预览/导出意图的验证循环
- Ctrl+C修复: JS注入拦截Streamlit裸c键清缓存

Docs: CLAUDE.md (完整项目文档), ROADMAP.md (改进路线图)

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-19 15:02:53 +08:00

49 lines
1.8 KiB
Python

"""嵌入模型工厂:支持本地 Sentence-Transformers 和云端 API。
调用方式:
get_embeddings() → LangChain 兼容的 embeddings 对象
get_st_model() → 原始 SentenceTransformer 实例
"""
import os
from dotenv import load_dotenv
load_dotenv()
def get_embeddings():
"""返回 LangChain 兼容的 embeddings 对象(用于 langchain_chroma 等)。"""
backend = os.getenv("EMBED_BACKEND", "local")
if backend == "cloud":
from langchain_openai import OpenAIEmbeddings
return OpenAIEmbeddings(
model=os.getenv("EMBED_CLOUD_MODEL", "text-embedding-3-small"),
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
)
else:
try:
from langchain_huggingface import HuggingFaceEmbeddings
except ImportError:
from langchain_community.embeddings import HuggingFaceEmbeddings
model = os.getenv("RAG_EMBED_MODEL", os.getenv("LOCAL_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B"))
return HuggingFaceEmbeddings(model_name=model)
def get_st_model():
"""返回原始 SentenceTransformer 实例(与 rag_jrxml 子模块使用方式一致)。"""
import torch
from sentence_transformers import SentenceTransformer
model_name = os.getenv("RAG_EMBED_MODEL", os.getenv("LOCAL_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B"))
use_gpu = os.getenv("RAG_USE_GPU", "true").lower() in ("true", "1")
use_fp16 = os.getenv("RAG_USE_FP16", "true").lower() in ("true", "1")
device = "cuda" if (use_gpu and torch.cuda.is_available()) else "cpu"
model = SentenceTransformer(model_name, device=device)
if device == "cuda" and use_fp16:
model = model.half()
return model