From 664de945f11a1bc1284c37984f93a213ba21ceda Mon Sep 17 00:00:00 2001
From: panda <1415243231@qq.com>
Date: Fri, 15 May 2026 00:35:41 +0800
Subject: [PATCH] fix: use raw Anthropic SDK for MiniMax with NO_PROXY
 workaround

The langchain-anthropic wrapper fails auth with MiniMax because
it sends an api_key that conflicts with ANTHROPIC_AUTH_TOKEN at
the SDK level, causing the request to be sent with incorrect
auth headers. Use raw Anthropic SDK directly with a simple
MiniMaxLLM wrapper class instead.

Root cause: MiniMax requires the API key ONLY via ANTHROPIC_AUTH_TOKEN
(system env), not via api_key parameter or OPENAI_API_KEY. Setting
os.environ["NO_PROXY"]="*" is also needed to prevent httpx from
using a proxy that interferes with the auth header.

Note: E2E testing with streamlit run app.py still pending.
---
 backend/llm.py | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/backend/llm.py b/backend/llm.py
index 5c78cc4..c5b98ac 100644
--- a/backend/llm.py
+++ b/backend/llm.py
@@ -1,6 +1,8 @@
 """大语言模型工厂：支持 OpenAI 兼容的云端 API、Anthropic 兼容 API 和本地 Ollama。"""
 
 import os
+from typing import Any
+
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -16,14 +18,35 @@ def get_llm():
 
     provider = os.getenv("LLM_PROVIDER", "openai")
     if provider == "anthropic":
-        from langchain_anthropic import ChatAnthropic
+        from anthropic import Anthropic
 
-        return ChatAnthropic(
-            model=os.getenv("LLM_MODEL", "claude-sonnet-4-6"),
-            api_key=os.getenv("OPENAI_API_KEY"),
-            base_url=os.getenv("OPENAI_BASE_URL", "https://api.anthropic.com"),
-            temperature=0.1,
-        )
+        api_key = os.getenv("OPENAI_API_KEY", "")
+        base_url = os.getenv("OPENAI_BASE_URL", "https://api.minimaxi.com/anthropic")
+        model = os.getenv("LLM_MODEL", "minimax-2.7")
+        temperature = 0.1
+        max_tokens = 4096
+
+        os.environ["NO_PROXY"] = "*"
+
+        client = Anthropic(base_url=base_url, timeout=120)
+
+        class MiniMaxLLM:
+            def invoke(self, prompt: str) -> Any:
+                resp = client.messages.create(
+                    model=model,
+                    max_tokens=max_tokens,
+                    temperature=temperature,
+                    messages=[{"role": "user", "content": [{"type": "text", "text": prompt}]}],
+                )
+                for block in resp.content:
+                    if block.type == "text":
+                        return type("Response", (), {"content": block.text})()
+                return type("Response", (), {"content": ""})()
+
+            def get_num_tokens(self, text: str) -> int:
+                return client.count_tokens(text)
+
+        return MiniMaxLLM()
     else:
         from langchain_openai import ChatOpenAI
 
@@ -36,4 +59,4 @@ def get_llm():
 
 
 def get_llm_for_correction():
-    return get_llm()
+    return get_llm()
\ No newline at end of file