b280c2b453
Add rag submodule for semantic JRXML chunk retrieval, refactor retrieve node to use RAGSearcher, and fix missing api_key in Anthropic SDK client initialization. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
49 lines
1.8 KiB
Python
49 lines
1.8 KiB
Python
"""嵌入模型工厂:支持本地 Sentence-Transformers 和云端 API。
|
|
|
|
调用方式:
|
|
get_embeddings() → LangChain 兼容的 embeddings 对象
|
|
get_st_embeddings() → 原始 SentenceTransformer 实例
|
|
"""
|
|
|
|
import os
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
|
|
|
|
def get_embeddings():
|
|
"""返回 LangChain 兼容的 embeddings 对象(用于 langchain_chroma 等)。"""
|
|
backend = os.getenv("EMBED_BACKEND", "local")
|
|
if backend == "cloud":
|
|
from langchain_openai import OpenAIEmbeddings
|
|
|
|
return OpenAIEmbeddings(
|
|
model=os.getenv("EMBED_CLOUD_MODEL", "text-embedding-3-small"),
|
|
api_key=os.getenv("OPENAI_API_KEY"),
|
|
base_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
|
|
)
|
|
else:
|
|
try:
|
|
from langchain_huggingface import HuggingFaceEmbeddings
|
|
except ImportError:
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings
|
|
|
|
model = os.getenv("RAG_EMBED_MODEL", os.getenv("LOCAL_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B"))
|
|
return HuggingFaceEmbeddings(model_name=model)
|
|
|
|
|
|
def get_st_model():
|
|
"""返回原始 SentenceTransformer 实例(与 rag_jrxml 子模块使用方式一致)。"""
|
|
import torch
|
|
from sentence_transformers import SentenceTransformer
|
|
|
|
model_name = os.getenv("RAG_EMBED_MODEL", os.getenv("LOCAL_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B"))
|
|
use_gpu = os.getenv("RAG_USE_GPU", "true").lower() in ("true", "1")
|
|
use_fp16 = os.getenv("RAG_USE_FP16", "true").lower() in ("true", "1")
|
|
|
|
device = "cuda" if (use_gpu and torch.cuda.is_available()) else "cpu"
|
|
model = SentenceTransformer(model_name, device=device)
|
|
if device == "cuda" and use_fp16:
|
|
model = model.half()
|
|
return model
|