"""嵌入模型工厂:支持本地 Sentence-Transformers 和云端 API。 调用方式: get_embeddings() → LangChain 兼容的 embeddings 对象 get_st_embeddings() → 原始 SentenceTransformer 实例 """ import os from dotenv import load_dotenv load_dotenv() def get_embeddings(): """返回 LangChain 兼容的 embeddings 对象(用于 langchain_chroma 等)。""" backend = os.getenv("EMBED_BACKEND", "local") if backend == "cloud": from langchain_openai import OpenAIEmbeddings return OpenAIEmbeddings( model=os.getenv("EMBED_CLOUD_MODEL", "text-embedding-3-small"), api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"), ) else: try: from langchain_huggingface import HuggingFaceEmbeddings except ImportError: from langchain_community.embeddings import HuggingFaceEmbeddings model = os.getenv("RAG_EMBED_MODEL", os.getenv("LOCAL_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B")) return HuggingFaceEmbeddings(model_name=model) def get_st_model(): """返回原始 SentenceTransformer 实例(与 rag_jrxml 子模块使用方式一致)。""" import torch from sentence_transformers import SentenceTransformer model_name = os.getenv("RAG_EMBED_MODEL", os.getenv("LOCAL_EMBED_MODEL", "Qwen/Qwen3-Embedding-0.6B")) use_gpu = os.getenv("RAG_USE_GPU", "true").lower() in ("true", "1") use_fp16 = os.getenv("RAG_USE_FP16", "true").lower() in ("true", "1") device = "cuda" if (use_gpu and torch.cuda.is_available()) else "cpu" model = SentenceTransformer(model_name, device=device) if device == "cuda" and use_fp16: model = model.half() return model