"""大语言模型工厂:支持 OpenAI 兼容的云端 API、Anthropic 兼容 API 和本地 Ollama。""" import os from typing import Any from dotenv import load_dotenv load_dotenv() class _BaseLLM: """LLM 统一接口基类 — 所有后端都提供 invoke() 和 stream()。""" def invoke(self, prompt: str) -> Any: raise NotImplementedError def stream(self, prompt: str): raise NotImplementedError def get_llm(): backend = os.getenv("LLM_BACKEND", "cloud") if backend == "local": from langchain_ollama import ChatOllama model = os.getenv("LOCAL_LLM_MODEL", "qwen2.5-coder:7b") raw = ChatOllama(model=model, temperature=0.1) class OllamaWrapper(_BaseLLM): def invoke(self, prompt): return raw.invoke(prompt) def stream(self, prompt): for chunk in raw.stream(prompt): yield chunk.content return OllamaWrapper() provider = os.getenv("LLM_PROVIDER", "openai") if provider == "anthropic": from anthropic import Anthropic api_key = os.getenv("OPENAI_API_KEY", "") base_url = os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic") model = os.getenv("LLM_MODEL", "minimax-2.7") temperature = 0.1 max_tokens = 4096 os.environ["NO_PROXY"] = "*" client = Anthropic(api_key=api_key, base_url=base_url, timeout=120) class MiniMaxLLM(_BaseLLM): def invoke(self, prompt: str) -> Any: resp = client.messages.create( model=model, max_tokens=max_tokens, temperature=temperature, messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}], ) for block in resp.content: if block.type == "text": return type("Response", (), {"content": block.text})() return type("Response", (), {"content": ""})() def stream(self, prompt: str): with client.messages.stream( model=model, max_tokens=max_tokens, temperature=temperature, messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}], ) as s: for text in s.text_stream: yield text def get_num_tokens(self, text: str) -> int: resp = client.messages.count_tokens( model=model, messages=[{"role": "user", "content": [{"type": "text", "text": text}]}], ) return resp.input_tokens return MiniMaxLLM() else: from langchain_openai import ChatOpenAI raw = ChatOpenAI( model=os.getenv("LLM_MODEL", "gpt-4o"), api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"), temperature=0.1, ) class OpenAIWrapper(_BaseLLM): def invoke(self, prompt): return raw.invoke(prompt) def stream(self, prompt): for chunk in raw.stream(prompt): yield chunk.content return OpenAIWrapper() def get_llm_for_correction(): return get_llm()