20260602备份
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
====================================================================
|
||||
智能客服节点日志查看器 v1.0.0
|
||||
====================================================================
|
||||
|
||||
目录说明:
|
||||
────────────────────────────────────────────────────────────────
|
||||
1. dist/ - 可执行文件目录
|
||||
- 智能客服节点日志查看器.exe (直接双击运行)
|
||||
|
||||
2. 日志查看器.py - Python源码文件
|
||||
|
||||
3. config.json - 配置文件(程序自动生成)
|
||||
|
||||
4. 使用说明.md - 详细使用说明
|
||||
|
||||
5. requirements.txt - Python依赖列表
|
||||
|
||||
6. build.py - 打包脚本
|
||||
|
||||
|
||||
快速开始指南:
|
||||
────────────────────────────────────────────────────────────────
|
||||
方式一:使用可执行文件(推荐)
|
||||
1. 进入 dist/ 目录
|
||||
2. 双击 "智能客服节点日志查看器.exe"
|
||||
3. 首次使用需要配置API凭证(见下文)
|
||||
|
||||
方式二:使用Python源码
|
||||
1. 安装依赖:pip install -r requirements.txt
|
||||
2. 运行:python 日志查看器.py
|
||||
|
||||
|
||||
首次使用配置:
|
||||
────────────────────────────────────────────────────────────────
|
||||
步骤1:获取API凭证
|
||||
1. 登录 https://agent.udesk.cn
|
||||
2. 按 F12 打开开发者工具
|
||||
3. 切换到"网络"标签页
|
||||
4. 刷新页面或点击任意功能
|
||||
5. 找到带有 "authorization: Bearer" 的请求
|
||||
6. 右键复制为 cURL (bash)
|
||||
|
||||
步骤2:配置程序
|
||||
1. 打开程序,点击"配置"按钮
|
||||
2. 粘贴 curl 命令到文本框
|
||||
3. 点击"解析Curl"
|
||||
4. 点击"应用"保存配置
|
||||
|
||||
|
||||
主要功能:
|
||||
────────────────────────────────────────────────────────────────
|
||||
1. 日志获取:按日期范围获取对话历史
|
||||
2. 人工审核:对关键节点输出进行审核
|
||||
3. 大模型评审:AI自动对比测试用例
|
||||
4. Excel导出:生成完整的审核报告
|
||||
|
||||
|
||||
常见问题:
|
||||
────────────────────────────────────────────────────────────────
|
||||
Q: 提示401 Invalid token?
|
||||
A: Token已过期,重新获取curl并解析
|
||||
|
||||
Q: 程序无法启动?
|
||||
A: 检查是否被杀毒软件拦截,尝试以管理员身份运行
|
||||
|
||||
Q: 如何转换使用说明为Word文档?
|
||||
A: 可以使用在线工具(如 https://www.markdowntoword.com/)
|
||||
或用 Typora/VS Code 等编辑器打开后另存为
|
||||
|
||||
|
||||
文件大小优化说明:
|
||||
────────────────────────────────────────────────────────────────
|
||||
为了最小化可执行文件大小,已排除以下非必要模块:
|
||||
- numpy, pandas, matplotlib (数据分析库)
|
||||
- PIL, cv2 (图像处理)
|
||||
- unittest, pytest (测试框架)
|
||||
- 其他未使用的标准库
|
||||
|
||||
|
||||
技术支持:
|
||||
────────────────────────────────────────────────────────────────
|
||||
如有问题,请查看:
|
||||
1. 程序同目录的日志文件(如有)
|
||||
2. 使用说明.md 详细文档
|
||||
3. 检查 config.json 配置是否正确
|
||||
|
||||
|
||||
版本历史:
|
||||
────────────────────────────────────────────────────────────────
|
||||
v1.0.0 (2026-05-21)
|
||||
- 初始版本发布
|
||||
- 支持日志查看与审核
|
||||
- 支持大模型自动评审
|
||||
- 支持Excel导出
|
||||
|
||||
====================================================================
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"5a6c380c-fb06-440f-8042-9c3a03c5c164_4e52f67e-ed4e-44db-b8f6-fd63ed9a4e32": {
|
||||
"大模型二次生成": "正确"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
def check_pyinstaller():
|
||||
try:
|
||||
import PyInstaller
|
||||
print("PyInstaller 已安装")
|
||||
return True
|
||||
except ImportError:
|
||||
print("正在安装 PyInstaller...")
|
||||
subprocess.check_call([sys.executable, "-m", "pip", "install", "pyinstaller"])
|
||||
return True
|
||||
|
||||
def build_exe():
|
||||
print("开始打包...")
|
||||
|
||||
cmd = [
|
||||
sys.executable, "-m", "PyInstaller",
|
||||
"--onefile",
|
||||
"--windowed",
|
||||
"--name=智能客服节点日志查看器",
|
||||
"--strip",
|
||||
"--noupx",
|
||||
"--exclude-module=unittest",
|
||||
"--exclude-module=pytest",
|
||||
"--exclude-module=numpy",
|
||||
"--exclude-module=pandas",
|
||||
"--exclude-module=matplotlib",
|
||||
"--exclude-module=PIL",
|
||||
"--exclude-module=cv2",
|
||||
"--exclude-module=scipy",
|
||||
"--exclude-module=sklearn",
|
||||
"--exclude-module=email",
|
||||
"--exclude-module=http.server",
|
||||
"--exclude-module=xml",
|
||||
"--exclude-module=multiprocessing",
|
||||
"--exclude-module=concurrent",
|
||||
"--exclude-module=asyncio",
|
||||
"--hidden-import=openpyxl.cell._writer",
|
||||
"日志查看器.py"
|
||||
]
|
||||
|
||||
print("执行命令:", " ".join(cmd))
|
||||
subprocess.check_call(cmd)
|
||||
|
||||
print("\n打包完成!")
|
||||
print("可执行文件位于: dist/智能客服节点日志查看器.exe")
|
||||
|
||||
if __name__ == "__main__":
|
||||
os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
||||
if check_pyinstaller():
|
||||
build_exe()
|
||||
@@ -0,0 +1,34 @@
|
||||
@echo off
|
||||
echo ================================================
|
||||
echo 智能客服节点日志查看器 - 打包脚本
|
||||
echo ================================================
|
||||
echo.
|
||||
|
||||
echo 正在检查Python环境...
|
||||
python -c "import tkinter; print('tkinter available')"
|
||||
if %errorlevel% neq 0 (
|
||||
echo ERROR: tkinter 不可用,请检查Python环境
|
||||
pause
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
echo.
|
||||
echo 开始打包...
|
||||
python -m PyInstaller ^
|
||||
--onefile ^
|
||||
--windowed ^
|
||||
--name="智能客服节点日志查看器" ^
|
||||
--hidden-import=tkinter ^
|
||||
--hidden-import=tkinter.ttk ^
|
||||
--hidden-import=tkinter.messagebox ^
|
||||
--hidden-import=tkinter.filedialog ^
|
||||
--hidden-import=_tkinter ^
|
||||
--hidden-import=openpyxl.cell._writer ^
|
||||
--collect-all=tkinter ^
|
||||
--collect-all=tkinter.ttk ^
|
||||
日志查看器.py
|
||||
|
||||
echo.
|
||||
echo 打包完成!
|
||||
echo 输出文件: dist\智能客服节点日志查看器.exe
|
||||
pause
|
||||
@@ -0,0 +1,46 @@
|
||||
{
|
||||
"app_id": "eae84278-36c0-437d-addf-5547b0ae3bc2",
|
||||
"api_token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiNWQzNjMzZjMtNTIzZS00NTRjLTgyYzgtMTczMjc2YjVjMjg0IiwiZXhwIjoxNzc5NDUzMzMyLCJpc3MiOiJTRUxGX0hPU1RFRCIsInN1YiI6IkNvbnNvbGUgQVBJIFBhc3Nwb3J0In0.TqJeWS53JP4ThYBTGLF-aYgz9PUUsoRPswEBgX9p0Tc",
|
||||
"cookies": {
|
||||
"ut_user_id": "null",
|
||||
"ut_global_id": "%22ca426419-74d7-4dc8-bc5c-c866b096b297%22",
|
||||
"_gcl_au": "1.1.505382516.1778740165",
|
||||
"_ga": "GA1.1.968517445.1778741596",
|
||||
"sensorsdata2015jssdkcross": "%7B%22distinct_id%22%3A%2219e2542d1f1efd-0b7accb742cea4-4c657b58-2073600-19e2542d1f21d61%22%2C%22%24device_id%22%3A%2219e2542d1f1efd-0b7accb742cea4-4c657b58-2073600-19e2542d1f21d61%22%2C%22props%22%3A%7B%7D%7D",
|
||||
"Qs_lvt_102458": "1778741596%2C1779092047",
|
||||
"Hm_lvt_85cdbdd6ba7f014cd503e9f1cd5e5ba0": "1778741596,1779092047",
|
||||
"Qs_pv_102458": "4477521906714103000%2C2213764348932670000%2C1655296003018746400%2C2955048170989231600%2C930806901093578800",
|
||||
"_ga_WPQK651LHJ": "GS2.1.s1779095976$o3$g0$t1779095976$j60$l0$h0'"
|
||||
},
|
||||
"key_node_titles": [
|
||||
"大模型",
|
||||
"单次反思",
|
||||
"大模型二次生成"
|
||||
],
|
||||
"window": {
|
||||
"width": 1400,
|
||||
"height": 800,
|
||||
"min_width": 1200,
|
||||
"min_height": 600
|
||||
},
|
||||
"default_date_range": {
|
||||
"days_before": 1
|
||||
},
|
||||
"columns": {
|
||||
"index": 50,
|
||||
"chat_id": 120,
|
||||
"title": 100,
|
||||
"time": 120,
|
||||
"query": 150,
|
||||
"node_output": 150,
|
||||
"node_audit": 100,
|
||||
"answer": 150
|
||||
},
|
||||
"excel_export": {
|
||||
"default_dir": "desktop"
|
||||
},
|
||||
"last_date_range": {
|
||||
"start_date": "2026-05-21",
|
||||
"end_date": "2026-05-22"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,3 @@
|
||||
requests
|
||||
openpyxl
|
||||
anthropic
|
||||
@@ -0,0 +1,42 @@
|
||||
import anthropic
|
||||
import time
|
||||
|
||||
api_key = "sk-cp-ayedGY_WYs9N0n2hYlAhbYYAYodr7ym7a1y8DgdyCcgx439ONVJzIgZmaR7JmB5bh4iA5ZiLlFy6dOLpHSLtmG8G5WH4EKLDLZXM9gbwAupxZUuqIAUnUEk"
|
||||
|
||||
try:
|
||||
print("正在连接 MiniMax API...")
|
||||
client = anthropic.Anthropic(
|
||||
api_key=api_key,
|
||||
base_url="https://api.minimaxi.com/anthropic",
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
print("正在发送测试请求...")
|
||||
start = time.time()
|
||||
|
||||
message = client.messages.create(
|
||||
model="MiniMax-M2.7",
|
||||
max_tokens=100,
|
||||
system="You are a helpful assistant.",
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Hello, how are you?"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
elapsed = time.time() - start
|
||||
print(f"API调用成功!耗时: {elapsed:.2f}秒")
|
||||
|
||||
for block in message.content:
|
||||
if block.type == "text":
|
||||
print(f"响应: {block.text}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"API调用失败: {type(e).__name__}: {str(e)}")
|
||||
@@ -0,0 +1,13 @@
|
||||
import pandas as pd
|
||||
import traceback
|
||||
|
||||
try:
|
||||
print("正在读取Excel文件...")
|
||||
df = pd.read_excel(r"C:\Users\hp_z66\Desktop\自动化测试.xlsx")
|
||||
print(f"成功读取,共 {len(df)} 行数据")
|
||||
print("列名:", df.columns.tolist())
|
||||
print("\n前3行数据:")
|
||||
print(df.head(3))
|
||||
except Exception as e:
|
||||
print(f"读取失败: {type(e).__name__}: {str(e)}")
|
||||
traceback.print_exc()
|
||||
@@ -0,0 +1,13 @@
|
||||
import tkinter as tk
|
||||
from tkinter import messagebox
|
||||
|
||||
if __name__ == "__main__":
|
||||
root = tk.Tk()
|
||||
root.title("测试Tkinter")
|
||||
label = tk.Label(root, text="Tkinter正常运行!")
|
||||
label.pack(pady=20)
|
||||
btn = tk.Button(root, text="点击测试", command=lambda: messagebox.showinfo("测试", "功能正常!"))
|
||||
btn.pack(pady=10)
|
||||
root.after(3000, root.destroy) # 3秒后自动关闭
|
||||
root.mainloop()
|
||||
print("测试通过,运行成功!")
|
||||
@@ -0,0 +1,16 @@
|
||||
import sys
|
||||
try:
|
||||
import tkinter as tk
|
||||
print("SUCCESS: tkinter imported")
|
||||
|
||||
root = tk.Tk()
|
||||
root.title("Test")
|
||||
label = tk.Label(root, text="Tkinter works!")
|
||||
label.pack()
|
||||
root.after(2000, root.destroy)
|
||||
root.mainloop()
|
||||
print("SUCCESS: Tkinter window shown")
|
||||
|
||||
except ImportError as e:
|
||||
print(f"ERROR: {e}")
|
||||
sys.exit(1)
|
||||
@@ -0,0 +1,168 @@
|
||||
# 智能客服节点日志查看器 - 使用说明
|
||||
|
||||
## 使用指南
|
||||
|
||||
### 1. 首次配置
|
||||
|
||||
#### 步骤1:获取API凭证
|
||||
|
||||
1. 登录Udesk控制台:https://agent.udesk.cn
|
||||
2. 打开浏览器开发者工具(F12),切换到"网络"标签页
|
||||
3. 执行一个API请求(如刷新页面)
|
||||
4. 找到带有`authorization: Bearer xxx`的请求
|
||||
5. 需要先切换到网络试图,然后找到刚刚的请求命令
|
||||
|
||||

|
||||
|
||||
1. 复制完整的curl命令
|
||||
|
||||

|
||||
|
||||
#### 步骤2:配置程序
|
||||
|
||||
1. 点击"配置"按钮
|
||||
2. 在"Curl命令"文本框粘贴curl命令
|
||||
3. 点击"解析Curl"按钮
|
||||
4. 系统自动填充App ID、API Token和Cookies
|
||||
5. (可选)在"关键节点"标签页编辑需要监控的节点
|
||||
6. 点击"应用"保存配置
|
||||
7. 
|
||||
|
||||
### 2. 获取数据
|
||||
|
||||
1. 选择开始日期和结束日期
|
||||
2. 点击"获取数据"按钮
|
||||
3. 等待数据加载完成
|
||||
4. 在"审核记录"标签页查看关键对话
|
||||
|
||||
### 3. 人工审核(直接excel表格操作即可)
|
||||
|
||||
1. 在"审核记录"表格中找到需要审核的记录
|
||||
2. 双击"大模型审核"或"二次生成审核"单元格
|
||||
3. 选择审核结果(正确/错误/需改进)
|
||||
4. 审核结果自动保存到audit_cache.json
|
||||
|
||||
### 4. 大模型自动评审
|
||||
|
||||
#### 准备测试用例
|
||||
|
||||
1. 创建Excel文件,包含以下列:
|
||||
- 提问问题:用户问题内容
|
||||
- 答案:标准答案
|
||||
2. 保存为.xlsx格式
|
||||
|
||||
#### 执行评审
|
||||
|
||||
1. 点击"大模型评审"按钮
|
||||
2. 选择测试用例Excel文件
|
||||
3. 勾选需要评审的节点(可多选)
|
||||
4. 点击"开始评审"
|
||||
5. 等待评审完成
|
||||
|
||||
### 5. 导出Excel
|
||||
|
||||
1. 点击"导出Excel"按钮
|
||||
2. 选择保存位置
|
||||
3. 导出文件包含三个Sheet:
|
||||
- 关键节点审核:人工审核结果
|
||||
- 完整流程日志:所有节点执行记录
|
||||
- 大模型评审结果:AI评审报告(如有)
|
||||
|
||||
### 6.切换app,默认6.4
|
||||
|
||||
6.4 a46947ea-c1bf-4884-b6ba-9d7be32e18c4
|
||||
|
||||
6.3.1 29d32bb5-994d-452a-8828-9d94b9eaea9d
|
||||
|
||||
## 五、配置说明
|
||||
|
||||
### 配置文件结构
|
||||
|
||||
config.json包含以下配置项:
|
||||
|
||||
```json
|
||||
{
|
||||
"app_id": "应用ID",
|
||||
"api_token": "API访问令牌",
|
||||
"cookies": {
|
||||
"cookie名": "cookie值"
|
||||
},
|
||||
"key_node_titles": ["节点1", "节点2"],
|
||||
"window": {
|
||||
"width": 1400,
|
||||
"height": 800
|
||||
},
|
||||
"default_date_range": {
|
||||
"days_before": 1
|
||||
},
|
||||
"columns": {
|
||||
"index": 50,
|
||||
"chat_id": 120
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 关键配置项
|
||||
|
||||
| 配置项 | 说明 | 默认值 |
|
||||
| ------------------------------ | ---------- | ------------------------ |
|
||||
| app_id | Udesk应用ID | 必填 |
|
||||
| api_token | Bearer令牌 | 必填 |
|
||||
| key_node_titles | 需要监控的关键节点 | ["大模型","单次反思","大模型二次生成"] |
|
||||
| default_date_range.days_before | 默认查询几天前的数据 | 1 |
|
||||
|
||||
## 六、常见问题
|
||||
|
||||
### Q1: 提示401 - Invalid token
|
||||
|
||||
**解决方案**:
|
||||
|
||||
- 重新获取最新的curl命令
|
||||
- 在配置对话框重新解析并保存
|
||||
- 确保token在有效期内
|
||||
|
||||
### Q2: 程序无法启动
|
||||
|
||||
**解决方案**:
|
||||
|
||||
- 检查是否有杀毒软件拦截
|
||||
- 右键选择"以管理员身份运行"
|
||||
- 使用源码运行方式排查问题
|
||||
|
||||
### Q3: Excel导出失败
|
||||
|
||||
**解决方案**:
|
||||
|
||||
- 检查保存位置是否有权限
|
||||
- 确保目标文件未被其他程序占用
|
||||
- 尝试保存到其他目录
|
||||
|
||||
### Q4: 大模型评审无结果
|
||||
|
||||
**解决方案**:
|
||||
|
||||
- 确认测试用例格式正确
|
||||
- 检查用户问题匹配度
|
||||
- 查看状态栏提示信息
|
||||
|
||||
## 七、技术支持
|
||||
|
||||
如有问题,请检查:
|
||||
|
||||
1. 程序同目录的log文件(如有)
|
||||
2. config.json配置是否正确
|
||||
3. 网络连接是否正常
|
||||
|
||||
## 八、版本历史
|
||||
|
||||
### v1.0.0
|
||||
|
||||
- 初始版本发布
|
||||
- 支持日志查看、人工审核
|
||||
- 支持大模型自动评审
|
||||
- 支持Excel导出
|
||||
- 支持配置管理---**注意事项**:
|
||||
1. 请妥善保管API Token,不要泄露
|
||||
2. 建议定期备份audit_cache.json
|
||||
3. API Token有有效期,过期后需要重新获取
|
||||
4. 首次使用建议先用少量数据测试
|
||||
@@ -0,0 +1,352 @@
|
||||
import requests
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
from openpyxl import Workbook
|
||||
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side, numbers
|
||||
from openpyxl.utils import get_column_letter
|
||||
|
||||
def get_time_range(days_ago=1):
|
||||
end_time = datetime.now()
|
||||
start_time = end_time - timedelta(days=days_ago)
|
||||
return {
|
||||
'start_time': start_time.strftime('%Y-%m-%d 00:00'),
|
||||
'end_time': end_time.strftime('%Y-%m-%d 23:59')
|
||||
}
|
||||
|
||||
cookies = {
|
||||
'ut_user_id': 'null',
|
||||
'ut_global_id': '%22ca426419-74d7-4dc8-bc5c-c866b096b297%22',
|
||||
'_gcl_au': '1.1.505382516.1778740165',
|
||||
'_ga': 'GA1.1.968517445.1778741596',
|
||||
'sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%2219e2542d1f1efd-0b7accb742cea4-4c657b58-2073600-19e2542d1f21d61%22%2C%22%24device_id%22%3A%2219e2542d1f1efd-0b7accb742cea4-4c657b58-2073600-19e2542d1f21d61%22%2C%22props%22%3A%7B%7D%7D',
|
||||
'Qs_lvt_102458': '1778741596%2C1779092047',
|
||||
'Hm_lvt_85cdbdd6ba7f014cd503e9f1cd5e5ba0': '1778741596,1779092047',
|
||||
'Qs_pv_102458': '4477521906714103000%2C2213764348932670000%2C1655296003018746400%2C2955048170989231600%2C930806901093578800',
|
||||
'_ga_WPQK651LHJ': 'GS2.1.s1779095976$o3$g0$t1779095976$j60$l0$h0',
|
||||
}
|
||||
|
||||
headers = {
|
||||
'Accept': '*/*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
|
||||
'Connection': 'keep-alive',
|
||||
'Referer': 'https://agent.udesk.cn/app/a46947ea-c1bf-4884-b6ba-9d7be32e18c4/logs',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36 Edg/148.0.0.0',
|
||||
'authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiNWQzNjMzZjMtNTIzZS00NTRjLTgyYzgtMTczMjc2YjVjMjg0IiwiZXhwIjoxNzc5MjYwNjU4LCJpc3MiOiJTRUxGX0hPU1RFRCIsInN1YiI6IkNvbnNvbGUgQVBJIFBhc3Nwb3J0In0.RseHavVdtiKHMKB2Mm8WVm4KwhiUZKmG93TiMIKuwtQ',
|
||||
'content-type': 'application/json',
|
||||
'sec-ch-ua': '"Chromium";v="148", "Microsoft Edge";v="148", "Not/A)Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
|
||||
app_id = 'a46947ea-c1bf-4884-b6ba-9d7be32e18c4'
|
||||
|
||||
def fetch_nodes(wf_run_id):
|
||||
if not wf_run_id:
|
||||
return []
|
||||
try:
|
||||
response = requests.get(
|
||||
f'https://agent.udesk.cn/api/backend/new/apps/{app_id}/wfRuns/{wf_run_id}/nodeExecutions',
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
)
|
||||
data = response.json()
|
||||
return data.get('data', {}).get('list', [])
|
||||
except Exception as e:
|
||||
print(f'获取节点执行记录失败 wf_run_id={wf_run_id}: {e}')
|
||||
return []
|
||||
|
||||
def safe_json_dumps(obj):
|
||||
if obj is None:
|
||||
return ''
|
||||
try:
|
||||
return json.dumps(obj, ensure_ascii=False)
|
||||
except Exception:
|
||||
return str(obj)
|
||||
|
||||
def safe_str(val):
|
||||
if val is None:
|
||||
return ''
|
||||
return str(val)
|
||||
|
||||
def ts_to_datetime(ts):
|
||||
try:
|
||||
return datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d %H:%M:%S')
|
||||
except Exception:
|
||||
return ''
|
||||
|
||||
KEY_NODE_TITLES = ['大模型', '单次反思', '大模型二次生成']
|
||||
|
||||
time_range = get_time_range(days_ago=1)
|
||||
|
||||
params = {
|
||||
'page_number': '1',
|
||||
'page_size': '10',
|
||||
'start_time': time_range['start_time'],
|
||||
'end_time': time_range['end_time'],
|
||||
'order_by': '-created_at',
|
||||
'status': 'all',
|
||||
}
|
||||
|
||||
response = requests.get(
|
||||
f'https://agent.udesk.cn/api/backend/apps/{app_id}/log/chat/pages',
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
)
|
||||
data = response.json()
|
||||
total = data.get('data', {}).get('total', 0)
|
||||
total_page = max(total // 10 + (1 if total % 10 else 0), 1)
|
||||
print(f'共 {total} 条对话记录,{total_page} 页,开始拉取...')
|
||||
|
||||
chart_list = []
|
||||
for page in range(1, total_page + 1):
|
||||
params['page_number'] = str(page)
|
||||
response = requests.get(
|
||||
f'https://agent.udesk.cn/api/backend/apps/{app_id}/log/chat/pages',
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
)
|
||||
page_data = response.json().get('data', {}).get('list', [])
|
||||
chart_list.extend(page_data)
|
||||
print(f' 第{page}/{total_page}页,获取 {len(page_data)} 条')
|
||||
|
||||
print(f'\n对话列表获取完成,共 {len(chart_list)} 条')
|
||||
|
||||
records = []
|
||||
|
||||
def fetch_all_messages(chat_log_id):
|
||||
all_messages = []
|
||||
fid = None
|
||||
page_size = 10
|
||||
|
||||
while True:
|
||||
try:
|
||||
c_params = {'id': chat_log_id, 'page_size': str(page_size)}
|
||||
if fid:
|
||||
c_params['fid'] = fid
|
||||
|
||||
c_resp = requests.get(
|
||||
f'https://agent.udesk.cn/api/backend/apps/{app_id}/chatMessage',
|
||||
params=c_params,
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
data = c_resp.json()
|
||||
messages = data.get('data', {}).get('list', [])
|
||||
|
||||
if not messages:
|
||||
break
|
||||
|
||||
all_messages.extend(messages)
|
||||
|
||||
if len(messages) < page_size:
|
||||
break
|
||||
|
||||
fid = messages[0].get('id')
|
||||
if not fid:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
break
|
||||
|
||||
return all_messages
|
||||
|
||||
for idx, chart in enumerate(chart_list):
|
||||
chat_log_id = chart.get('chat_log_id', '')
|
||||
chat_title = chart.get('title') or chart.get('chat_log_name') or ''
|
||||
created_time = str(chart.get('created_time', ''))
|
||||
|
||||
try:
|
||||
messages = fetch_all_messages(chat_log_id)
|
||||
print(f' [{idx+1}] 获取对话消息 {len(messages)} 条')
|
||||
except Exception as e:
|
||||
print(f' [{idx+1}] 获取对话消息失败 chat_log_id={chat_log_id}: {e}')
|
||||
messages = []
|
||||
|
||||
for msg in messages:
|
||||
wf_run_id = msg.get('workflow_run_id', '')
|
||||
if not wf_run_id:
|
||||
continue
|
||||
nodes = fetch_nodes(wf_run_id)
|
||||
|
||||
user_query = ''
|
||||
for n in nodes:
|
||||
if n.get('node_type') == 'start' and n.get('outputs', {}).get('sys.query'):
|
||||
user_query = n['outputs']['sys.query']
|
||||
break
|
||||
|
||||
key_outputs = {}
|
||||
final_answer = ''
|
||||
for n in nodes:
|
||||
title = n.get('title', '')
|
||||
if title in KEY_NODE_TITLES:
|
||||
key_outputs[title] = (n.get('outputs') or {}).get('text', '')
|
||||
if n.get('node_type') == 'answer':
|
||||
final_answer = (n.get('outputs') or {}).get('answer', '')
|
||||
|
||||
records.append({
|
||||
'chat_log_id': chat_log_id,
|
||||
'chat_title': chat_title,
|
||||
'created_time': created_time,
|
||||
'user_query': user_query,
|
||||
'wf_run_id': wf_run_id,
|
||||
'nodes': nodes,
|
||||
'key_outputs': key_outputs,
|
||||
'final_answer': final_answer,
|
||||
})
|
||||
|
||||
print(f' [{idx+1}/{len(chart_list)}] {chat_log_id[:8]}... 节点数:{len(nodes)}')
|
||||
|
||||
print(f'\n数据拉取完成,共 {len(records)} 条有效记录,开始生成Excel...')
|
||||
|
||||
wb = Workbook()
|
||||
|
||||
ws1 = wb.active
|
||||
ws1.title = '关键节点审核'
|
||||
|
||||
header_font = Font(name='微软雅黑', bold=True, size=11, color='FFFFFF')
|
||||
header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
|
||||
key_fill = PatternFill(start_color='FFF2CC', end_color='FFF2CC', fill_type='solid')
|
||||
data_font = Font(name='微软雅黑', size=10)
|
||||
thin_border = Border(
|
||||
left=Side(style='thin'),
|
||||
right=Side(style='thin'),
|
||||
top=Side(style='thin'),
|
||||
bottom=Side(style='thin'),
|
||||
)
|
||||
wrap_alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)
|
||||
center_alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
|
||||
|
||||
sheet1_headers = [
|
||||
'序号', '对话ID', '会话标题', '对话时间',
|
||||
'用户问题',
|
||||
'大模型_输出', '单次反思_输出', '大模型二次生成_输出',
|
||||
'最终回答', '人工审核结果', '备注'
|
||||
]
|
||||
|
||||
for col_idx, header in enumerate(sheet1_headers, 1):
|
||||
cell = ws1.cell(row=1, column=col_idx, value=header)
|
||||
cell.font = header_font
|
||||
cell.fill = header_fill
|
||||
cell.alignment = center_alignment
|
||||
cell.border = thin_border
|
||||
|
||||
key_col_indices = [col_idx for col_idx, h in enumerate(sheet1_headers, 1) if h in [
|
||||
'用户问题', '大模型_输出', '单次反思_输出', '大模型二次生成_输出', '最终回答'
|
||||
]]
|
||||
audit_col = sheet1_headers.index('人工审核结果') + 1
|
||||
|
||||
for row_idx, rec in enumerate(records):
|
||||
r = row_idx + 2
|
||||
values = [
|
||||
row_idx + 1,
|
||||
rec['chat_log_id'],
|
||||
rec['chat_title'],
|
||||
rec['created_time'],
|
||||
rec['user_query'],
|
||||
rec['key_outputs'].get('大模型', ''),
|
||||
rec['key_outputs'].get('单次反思', ''),
|
||||
rec['key_outputs'].get('大模型二次生成', ''),
|
||||
rec['final_answer'],
|
||||
'',
|
||||
'',
|
||||
]
|
||||
for col_idx, val in enumerate(values, 1):
|
||||
cell = ws1.cell(row=r, column=col_idx, value=val)
|
||||
cell.font = data_font
|
||||
cell.alignment = wrap_alignment
|
||||
cell.border = thin_border
|
||||
if col_idx in key_col_indices:
|
||||
cell.fill = key_fill
|
||||
|
||||
ws1.column_dimensions['A'].width = 6
|
||||
ws1.column_dimensions['B'].width = 38
|
||||
ws1.column_dimensions['C'].width = 20
|
||||
ws1.column_dimensions['D'].width = 20
|
||||
ws1.column_dimensions['E'].width = 40
|
||||
ws1.column_dimensions['F'].width = 50
|
||||
ws1.column_dimensions['G'].width = 50
|
||||
ws1.column_dimensions['H'].width = 50
|
||||
ws1.column_dimensions['I'].width = 50
|
||||
ws1.column_dimensions['J'].width = 15
|
||||
ws1.column_dimensions['K'].width = 20
|
||||
|
||||
ws1.freeze_panes = 'E2'
|
||||
ws1.auto_filter.ref = f'A1:K{len(records) + 1}'
|
||||
|
||||
ws2 = wb.create_sheet('全流程日志明细')
|
||||
|
||||
sheet2_headers = [
|
||||
'序号', '对话ID', '用户问题', '工作流运行ID',
|
||||
'节点序号', '节点类型', '节点标题', '节点状态',
|
||||
'执行开始时间', '执行结束时间',
|
||||
'节点输入(JSON)', '节点输出(JSON)', '错误信息'
|
||||
]
|
||||
|
||||
for col_idx, header in enumerate(sheet2_headers, 1):
|
||||
cell = ws2.cell(row=1, column=col_idx, value=header)
|
||||
cell.font = header_font
|
||||
cell.fill = PatternFill(start_color='548235', end_color='548235', fill_type='solid')
|
||||
cell.alignment = center_alignment
|
||||
cell.border = thin_border
|
||||
|
||||
log_row = 2
|
||||
log_seq = 0
|
||||
for rec in records:
|
||||
for node in rec['nodes']:
|
||||
log_seq += 1
|
||||
created_at = node.get('created_at') or node.get('started_at', 0)
|
||||
finished_at = node.get('finished_at', 0)
|
||||
values = [
|
||||
log_seq,
|
||||
rec['chat_log_id'],
|
||||
rec['user_query'],
|
||||
rec['wf_run_id'],
|
||||
node.get('index', ''),
|
||||
node.get('node_type', ''),
|
||||
node.get('title', ''),
|
||||
node.get('status', ''),
|
||||
ts_to_datetime(created_at),
|
||||
ts_to_datetime(finished_at),
|
||||
safe_json_dumps(node.get('inputs')),
|
||||
safe_json_dumps(node.get('outputs')),
|
||||
safe_str(node.get('error', '')),
|
||||
]
|
||||
for col_idx, val in enumerate(values, 1):
|
||||
cell = ws2.cell(row=log_row, column=col_idx, value=val)
|
||||
cell.font = data_font
|
||||
cell.alignment = wrap_alignment
|
||||
cell.border = thin_border
|
||||
if node.get('status') == 'failed':
|
||||
cell.fill = PatternFill(start_color='FFC7CE', end_color='FFC7CE', fill_type='solid')
|
||||
log_row += 1
|
||||
|
||||
ws2.column_dimensions['A'].width = 6
|
||||
ws2.column_dimensions['B'].width = 38
|
||||
ws2.column_dimensions['C'].width = 40
|
||||
ws2.column_dimensions['D'].width = 38
|
||||
ws2.column_dimensions['E'].width = 8
|
||||
ws2.column_dimensions['F'].width = 22
|
||||
ws2.column_dimensions['G'].width = 20
|
||||
ws2.column_dimensions['H'].width = 12
|
||||
ws2.column_dimensions['I'].width = 20
|
||||
ws2.column_dimensions['J'].width = 20
|
||||
ws2.column_dimensions['K'].width = 60
|
||||
ws2.column_dimensions['L'].width = 60
|
||||
ws2.column_dimensions['M'].width = 30
|
||||
|
||||
ws2.freeze_panes = 'F2'
|
||||
ws2.auto_filter.ref = f'A1:M{log_row - 1}'
|
||||
|
||||
output_file = f'智能客服节点审核_{datetime.now().strftime("%Y%m%d_%H%M%S")}.xlsx'
|
||||
output_path = f'd:/Idea Project/F6+宜搭+其它(1)/张阳脚本/udesk/{output_file}'
|
||||
wb.save(output_path)
|
||||
print(f'\nExcel已生成: {output_path}')
|
||||
print(f' Sheet1「关键节点审核」: {len(records)} 条对话')
|
||||
print(f' Sheet2「全流程日志明细」: {log_seq} 条节点执行记录')
|
||||
+1346
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,238 @@
|
||||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
import anthropic
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
class AutoReview:
|
||||
def __init__(self):
|
||||
self.api_key = "sk-cp-ayedGY_WYs9N0n2hYlAhbYYAYodr7ym7a1y8DgdyCcgx439ONVJzIgZmaR7JmB5bh4iA5ZiLlFy6dOLpHSLtmG8G5WH4EKLDLZXM9gbwAupxZUuqIAUnUEk"
|
||||
self.client = anthropic.Anthropic(
|
||||
api_key=self.api_key,
|
||||
base_url="https://api.minimaxi.com/anthropic",
|
||||
timeout=30.0
|
||||
)
|
||||
|
||||
def load_test_cases(self, excel_path):
|
||||
df = pd.read_excel(excel_path)
|
||||
self.test_cases = {}
|
||||
for _, row in df.iterrows():
|
||||
question = str(row['提问问题']).strip()
|
||||
answer = str(row['答案']).strip()
|
||||
if question:
|
||||
self.test_cases[question] = answer
|
||||
print(f"已加载 {len(self.test_cases)} 条测试用例")
|
||||
return self.test_cases
|
||||
|
||||
def load_log_data(self, log_path):
|
||||
if os.path.exists(log_path):
|
||||
with open(log_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
print(f"已加载 {len(data)} 条日志记录")
|
||||
return data
|
||||
else:
|
||||
print(f"日志文件不存在: {log_path}")
|
||||
return []
|
||||
|
||||
def match_question(self, user_query, threshold=0.7):
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
best_match = None
|
||||
best_score = 0
|
||||
|
||||
for std_question in self.test_cases.keys():
|
||||
score = SequenceMatcher(None, user_query, std_question).ratio()
|
||||
if score > best_score and score >= threshold:
|
||||
best_score = score
|
||||
best_match = std_question
|
||||
|
||||
return best_match, best_score
|
||||
|
||||
def evaluate_consistency(self, generated_answer, standard_answer):
|
||||
system_prompt = """
|
||||
你是一个专业的答案一致性评审助手。请按照以下标准评判生成答案与标准答案的一致性:
|
||||
|
||||
一致性评分标准:
|
||||
- 10分:完全一致,内容、逻辑、步骤完全相同
|
||||
- 8-9分:基本一致,核心内容相同,表述略有差异
|
||||
- 6-7分:部分一致,核心思路相同,但有遗漏或错误步骤
|
||||
- 4-5分:不太一致,只有部分内容相关
|
||||
- 0-3分:不一致,内容无关或错误
|
||||
|
||||
请输出JSON格式,包含:
|
||||
- score: 0-10的整数分数
|
||||
- confidence: 0-100的整数置信度
|
||||
- reason: 简短的评审理由(不超过100字)
|
||||
"""
|
||||
|
||||
user_prompt = f"""
|
||||
【生成答案】
|
||||
{generated_answer}
|
||||
|
||||
【标准答案】
|
||||
{standard_answer}
|
||||
|
||||
请根据上述标准进行评审,输出JSON格式结果。
|
||||
"""
|
||||
|
||||
try:
|
||||
print(f"正在调用大模型进行评审...")
|
||||
start_time = time.time()
|
||||
message = self.client.messages.create(
|
||||
model="MiniMax-M2.7",
|
||||
max_tokens=500,
|
||||
system=system_prompt,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": user_prompt
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
)
|
||||
elapsed = time.time() - start_time
|
||||
print(f"大模型调用完成,耗时: {elapsed:.2f}秒")
|
||||
|
||||
response_text = ""
|
||||
for block in message.content:
|
||||
if block.type == "text":
|
||||
response_text += block.text
|
||||
|
||||
response_text = response_text.strip()
|
||||
if response_text.startswith("```json"):
|
||||
response_text = response_text[7:]
|
||||
if response_text.endswith("```"):
|
||||
response_text = response_text[:-3]
|
||||
response_text = response_text.strip()
|
||||
|
||||
try:
|
||||
result = json.loads(response_text)
|
||||
return result
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"score": 0,
|
||||
"confidence": 50,
|
||||
"reason": f"解析失败: {response_text[:100]}"
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"score": 0,
|
||||
"confidence": 0,
|
||||
"reason": f"API调用失败: {str(e)}"
|
||||
}
|
||||
|
||||
def run_review(self, log_data):
|
||||
results = []
|
||||
|
||||
for idx, record in enumerate(log_data):
|
||||
user_query = record.get('user_query', '').strip()
|
||||
key_outputs = record.get('key_outputs', {})
|
||||
|
||||
matched_question, match_score = self.match_question(user_query)
|
||||
|
||||
if matched_question and match_score >= 0.7:
|
||||
standard_answer = self.test_cases[matched_question]
|
||||
|
||||
for node_name, generated_answer in key_outputs.items():
|
||||
if generated_answer.strip():
|
||||
evaluation = self.evaluate_consistency(generated_answer, standard_answer)
|
||||
|
||||
results.append({
|
||||
"序号": idx + 1,
|
||||
"用户问题": user_query,
|
||||
"匹配问题": matched_question,
|
||||
"匹配度": f"{match_score:.2f}",
|
||||
"节点名称": node_name,
|
||||
"生成答案": generated_answer[:200] + "..." if len(generated_answer) > 200 else generated_answer,
|
||||
"标准答案": standard_answer[:200] + "..." if len(standard_answer) > 200 else standard_answer,
|
||||
"一致性评分": evaluation["score"],
|
||||
"置信度": evaluation["confidence"],
|
||||
"评审理由": evaluation["reason"]
|
||||
})
|
||||
print(f"已评审第 {idx+1} 条记录,节点: {node_name},评分: {evaluation['score']}")
|
||||
else:
|
||||
results.append({
|
||||
"序号": idx + 1,
|
||||
"用户问题": user_query,
|
||||
"匹配问题": "未匹配",
|
||||
"匹配度": f"{match_score:.2f}",
|
||||
"节点名称": "-",
|
||||
"生成答案": "-",
|
||||
"标准答案": "-",
|
||||
"一致性评分": "-",
|
||||
"置信度": "-",
|
||||
"评审理由": "未找到匹配的测试用例"
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def export_results(self, results, output_path=None):
|
||||
if not results:
|
||||
print("没有评审结果可导出")
|
||||
return
|
||||
|
||||
df = pd.DataFrame(results)
|
||||
|
||||
if output_path is None:
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_path = f"自动评审结果_{timestamp}.xlsx"
|
||||
|
||||
df.to_excel(output_path, index=False)
|
||||
print(f"评审结果已导出到: {output_path}")
|
||||
return output_path
|
||||
|
||||
if __name__ == "__main__":
|
||||
reviewer = AutoReview()
|
||||
|
||||
test_cases_path = r"C:\Users\hp_z66\Desktop\自动化测试.xlsx"
|
||||
log_cache_path = r"d:\Idea Project\F6+宜搭+其它(1)\张阳脚本\udesk\audit_cache.json"
|
||||
|
||||
print("正在加载测试用例...")
|
||||
reviewer.load_test_cases(test_cases_path)
|
||||
|
||||
print("正在加载日志数据...")
|
||||
log_data = [
|
||||
{
|
||||
'user_query': '如何创建一人多车?',
|
||||
'key_outputs': {
|
||||
'大模型': '会员营销-客户车辆-客户信息里面搜索对应车主的手机号码,点击操作列的修改按钮,车辆信息下方可添加车辆。',
|
||||
'单次反思': '步骤清晰,确认操作路径正确',
|
||||
'大模型二次生成': '在会员营销模块中找到客户车辆管理,搜索车主手机号后点击修改,在车辆信息区域添加新车辆即可。'
|
||||
}
|
||||
},
|
||||
{
|
||||
'user_query': '卡开重了,如何撤销?',
|
||||
'key_outputs': {
|
||||
'大模型': '开卡单未结算的可以删除,已结算的需要联系管理员处理。',
|
||||
'单次反思': '回答不够详细,缺少具体路径',
|
||||
'大模型二次生成': '会员营销-卡券积分-卡单据,找到对应的开卡单号,未结算可直接删除;已结算需联系财务处理。'
|
||||
}
|
||||
},
|
||||
{
|
||||
'user_query': '如何修改卡可用车辆?',
|
||||
'key_outputs': {
|
||||
'大模型': '会员营销卡券积分卡管理找到对应的卡信息点击修改可用车辆选择指定车辆即可。',
|
||||
'单次反思': '缺少分隔符,步骤不够清晰',
|
||||
'大模型二次生成': '会员营销→卡券积分→卡管理,找到对应的卡信息,点击修改,可用车辆中选择指定车辆,即可勾选具体车辆。'
|
||||
}
|
||||
}
|
||||
]
|
||||
print(f"已加载 {len(log_data)} 条测试日志数据")
|
||||
|
||||
print("开始自动评审...")
|
||||
results = reviewer.run_review(log_data)
|
||||
|
||||
print("导出评审结果...")
|
||||
output_path = reviewer.export_results(results)
|
||||
|
||||
print("\n评审完成!")
|
||||
if results:
|
||||
scores = [r["一致性评分"] for r in results if isinstance(r["一致性评分"], int)]
|
||||
if scores:
|
||||
avg_score = sum(scores) / len(scores)
|
||||
print(f"平均一致性评分: {avg_score:.2f}")
|
||||
@@ -0,0 +1,185 @@
|
||||
import os
|
||||
import json
|
||||
import pandas as pd
|
||||
import anthropic
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
class AutoReview:
|
||||
def __init__(self):
|
||||
self.api_key = "sk-cp-ayedGY_WYs9N0n2hYlAhbYYAYodr7ym7a1y8DgdyCcgx439ONVJzIgZmaR7JmB5bh4iA5ZiLlFy6dOLpHSLtmG8G5WH4EKLDLZXM9gbwAupxZUuqIAUnUEk"
|
||||
self.client = anthropic.Anthropic(
|
||||
api_key=self.api_key,
|
||||
base_url="https://api.minimaxi.com/anthropic",
|
||||
timeout=30.0
|
||||
)
|
||||
self.test_cases = {
|
||||
"如何创建一人多车?": "会员营销-客户车辆-客户信息里面搜索对应车主的手机号码,点击操作列的修改按钮,车辆信息下方可添加车辆。",
|
||||
"卡开重了,如何撤销?": "开卡单未结算的,会员营销-卡券积分-卡单据,找到对应的开卡单号,操作列做删除;开卡单已结算的,联系财务处理。"
|
||||
}
|
||||
|
||||
def match_question(self, user_query, threshold=0.7):
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
best_match = None
|
||||
best_score = 0
|
||||
|
||||
for std_question in self.test_cases.keys():
|
||||
score = SequenceMatcher(None, user_query, std_question).ratio()
|
||||
if score > best_score and score >= threshold:
|
||||
best_score = score
|
||||
best_match = std_question
|
||||
|
||||
return best_match, best_score
|
||||
|
||||
def evaluate_consistency(self, generated_answer, standard_answer):
|
||||
system_prompt = """
|
||||
你是一个专业的答案一致性评审助手。请按照以下标准评判生成答案与标准答案的一致性:
|
||||
|
||||
一致性评分标准:
|
||||
- 10分:完全一致,内容、逻辑、步骤完全相同
|
||||
- 8-9分:基本一致,核心内容相同,表述略有差异
|
||||
- 6-7分:部分一致,核心思路相同,但有遗漏或错误步骤
|
||||
- 4-5分:不太一致,只有部分内容相关
|
||||
- 0-3分:不一致,内容无关或错误
|
||||
|
||||
请输出JSON格式,包含:
|
||||
- score: 0-10的整数分数
|
||||
- confidence: 0-100的整数置信度
|
||||
- reason: 简短的评审理由(不超过100字)
|
||||
"""
|
||||
|
||||
user_prompt = f"""
|
||||
【生成答案】
|
||||
{generated_answer}
|
||||
|
||||
【标准答案】
|
||||
{standard_answer}
|
||||
|
||||
请根据上述标准进行评审,输出JSON格式结果。
|
||||
"""
|
||||
|
||||
try:
|
||||
message = self.client.messages.create(
|
||||
model="MiniMax-M2.7",
|
||||
max_tokens=500,
|
||||
system=system_prompt,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": user_prompt
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
response_text = ""
|
||||
for block in message.content:
|
||||
if block.type == "text":
|
||||
response_text += block.text
|
||||
|
||||
response_text = response_text.strip()
|
||||
if response_text.startswith("```json"):
|
||||
response_text = response_text[7:]
|
||||
if response_text.endswith("```"):
|
||||
response_text = response_text[:-3]
|
||||
response_text = response_text.strip()
|
||||
|
||||
try:
|
||||
result = json.loads(response_text)
|
||||
return result
|
||||
except json.JSONDecodeError:
|
||||
return {
|
||||
"score": 0,
|
||||
"confidence": 50,
|
||||
"reason": f"解析失败: {response_text[:50]}"
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
"score": 0,
|
||||
"confidence": 0,
|
||||
"reason": f"API调用失败: {str(e)[:50]}"
|
||||
}
|
||||
|
||||
def run_review(self, log_data):
|
||||
results = []
|
||||
|
||||
for idx, record in enumerate(log_data):
|
||||
user_query = record.get('user_query', '').strip()
|
||||
key_outputs = record.get('key_outputs', {})
|
||||
|
||||
print(f"\n处理第 {idx+1} 条记录: {user_query}")
|
||||
|
||||
matched_question, match_score = self.match_question(user_query)
|
||||
|
||||
if matched_question and match_score >= 0.7:
|
||||
print(f"匹配到测试用例: {matched_question} (匹配度: {match_score:.2f})")
|
||||
standard_answer = self.test_cases[matched_question]
|
||||
|
||||
for node_name, generated_answer in key_outputs.items():
|
||||
if generated_answer.strip() and node_name != '单次反思':
|
||||
print(f" 评审节点: {node_name}")
|
||||
evaluation = self.evaluate_consistency(generated_answer, standard_answer)
|
||||
|
||||
results.append({
|
||||
"序号": idx + 1,
|
||||
"用户问题": user_query,
|
||||
"匹配问题": matched_question,
|
||||
"匹配度": f"{match_score:.2f}",
|
||||
"节点名称": node_name,
|
||||
"生成答案": generated_answer[:100] + "..." if len(generated_answer) > 100 else generated_answer,
|
||||
"标准答案": standard_answer[:100] + "..." if len(standard_answer) > 100 else standard_answer,
|
||||
"一致性评分": evaluation["score"],
|
||||
"置信度": evaluation["confidence"],
|
||||
"评审理由": evaluation["reason"]
|
||||
})
|
||||
print(f" 评分: {evaluation['score']}, 置信度: {evaluation['confidence']}%, 理由: {evaluation['reason']}")
|
||||
else:
|
||||
print(f"未匹配到测试用例 (匹配度: {match_score:.2f})")
|
||||
|
||||
return results
|
||||
|
||||
def export_results(self, results):
|
||||
if not results:
|
||||
print("没有评审结果可导出")
|
||||
return
|
||||
|
||||
df = pd.DataFrame(results)
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
output_path = f"自动评审结果_{timestamp}.xlsx"
|
||||
df.to_excel(output_path, index=False)
|
||||
print(f"\n评审结果已导出到: {output_path}")
|
||||
return output_path
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("=== 自动评审系统 ===")
|
||||
reviewer = AutoReview()
|
||||
print(f"已加载 {len(reviewer.test_cases)} 条测试用例")
|
||||
|
||||
log_data = [
|
||||
{
|
||||
'user_query': '如何创建一人多车?',
|
||||
'key_outputs': {
|
||||
'大模型': '会员营销-客户车辆-客户信息里面搜索对应车主的手机号码,点击操作列的修改按钮,车辆信息下方可添加车辆。',
|
||||
'单次反思': '步骤清晰,确认操作路径正确',
|
||||
'大模型二次生成': '在会员营销模块中找到客户车辆管理,搜索车主手机号后点击修改,在车辆信息区域添加新车辆即可。'
|
||||
}
|
||||
}
|
||||
]
|
||||
|
||||
print(f"\n开始评审 {len(log_data)} 条记录...")
|
||||
results = reviewer.run_review(log_data)
|
||||
|
||||
reviewer.export_results(results)
|
||||
|
||||
if results:
|
||||
scores = [r["一致性评分"] for r in results if isinstance(r["一致性评分"], int)]
|
||||
if scores:
|
||||
avg_score = sum(scores) / len(scores)
|
||||
print(f"\n平均一致性评分: {avg_score:.2f}")
|
||||
|
||||
print("\n评审完成!")
|
||||
@@ -1 +1,37 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
df = pd.read_excel(r"D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\udesk\自动评测.xlsx", sheet_name='Sheet1', header=None)
|
||||
|
||||
# 按位置指定列名(第1列→query, 第2列→reference_response, 第3列→session_id)
|
||||
# 根据源文件实际列数调整
|
||||
col_count = df.shape[1]
|
||||
if col_count == 2:
|
||||
df.columns = ["query", "reference_response"]
|
||||
elif col_count == 3:
|
||||
df.columns = ["query", "reference_response", "session_id"]
|
||||
else:
|
||||
df.columns = ["query", "reference_response", "session_id"][:col_count]
|
||||
|
||||
# 按模板字段整理输出
|
||||
template_cols = ["session_id", "query", "reference_response"]
|
||||
for col in template_cols:
|
||||
if col not in df.columns:
|
||||
df[col] = pd.NA # 缺失的字段填空值
|
||||
|
||||
df = df[template_cols]
|
||||
|
||||
# 按字段类型转换
|
||||
df["session_id"] = pd.to_numeric(df["session_id"], errors="coerce").astype("Int64")
|
||||
df["query"] = df["query"].astype(str)
|
||||
df["reference_response"] = df["reference_response"].astype(str)
|
||||
|
||||
# 随机抽取100条,若数据不足100条则全部抽取
|
||||
n = min(100, len(df))
|
||||
sampled = df.sample(n=n, random_state=42)
|
||||
|
||||
# 生成到当前目录
|
||||
output_path = os.path.join(os.getcwd(), "随机抽取100条结果.xlsx")
|
||||
sampled.to_excel(output_path, index=False)
|
||||
print(f"已随机抽取 {n} 条数据,保存至: {output_path}")
|
||||
|
||||
Reference in New Issue
Block a user