Files
F6--/张阳脚本/udesk/历史对话捞取.py
T
2026-06-02 15:08:26 +08:00

353 lines
12 KiB
Python

import requests
import json
from datetime import datetime, timedelta
from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side, numbers
from openpyxl.utils import get_column_letter
def get_time_range(days_ago=1):
end_time = datetime.now()
start_time = end_time - timedelta(days=days_ago)
return {
'start_time': start_time.strftime('%Y-%m-%d 00:00'),
'end_time': end_time.strftime('%Y-%m-%d 23:59')
}
cookies = {
'ut_user_id': 'null',
'ut_global_id': '%22ca426419-74d7-4dc8-bc5c-c866b096b297%22',
'_gcl_au': '1.1.505382516.1778740165',
'_ga': 'GA1.1.968517445.1778741596',
'sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%2219e2542d1f1efd-0b7accb742cea4-4c657b58-2073600-19e2542d1f21d61%22%2C%22%24device_id%22%3A%2219e2542d1f1efd-0b7accb742cea4-4c657b58-2073600-19e2542d1f21d61%22%2C%22props%22%3A%7B%7D%7D',
'Qs_lvt_102458': '1778741596%2C1779092047',
'Hm_lvt_85cdbdd6ba7f014cd503e9f1cd5e5ba0': '1778741596,1779092047',
'Qs_pv_102458': '4477521906714103000%2C2213764348932670000%2C1655296003018746400%2C2955048170989231600%2C930806901093578800',
'_ga_WPQK651LHJ': 'GS2.1.s1779095976$o3$g0$t1779095976$j60$l0$h0',
}
headers = {
'Accept': '*/*',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'Connection': 'keep-alive',
'Referer': 'https://agent.udesk.cn/app/a46947ea-c1bf-4884-b6ba-9d7be32e18c4/logs',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36 Edg/148.0.0.0',
'authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiNWQzNjMzZjMtNTIzZS00NTRjLTgyYzgtMTczMjc2YjVjMjg0IiwiZXhwIjoxNzc5MjYwNjU4LCJpc3MiOiJTRUxGX0hPU1RFRCIsInN1YiI6IkNvbnNvbGUgQVBJIFBhc3Nwb3J0In0.RseHavVdtiKHMKB2Mm8WVm4KwhiUZKmG93TiMIKuwtQ',
'content-type': 'application/json',
'sec-ch-ua': '"Chromium";v="148", "Microsoft Edge";v="148", "Not/A)Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
}
app_id = 'a46947ea-c1bf-4884-b6ba-9d7be32e18c4'
def fetch_nodes(wf_run_id):
if not wf_run_id:
return []
try:
response = requests.get(
f'https://agent.udesk.cn/api/backend/new/apps/{app_id}/wfRuns/{wf_run_id}/nodeExecutions',
cookies=cookies,
headers=headers,
)
data = response.json()
return data.get('data', {}).get('list', [])
except Exception as e:
print(f'获取节点执行记录失败 wf_run_id={wf_run_id}: {e}')
return []
def safe_json_dumps(obj):
if obj is None:
return ''
try:
return json.dumps(obj, ensure_ascii=False)
except Exception:
return str(obj)
def safe_str(val):
if val is None:
return ''
return str(val)
def ts_to_datetime(ts):
try:
return datetime.fromtimestamp(int(ts)).strftime('%Y-%m-%d %H:%M:%S')
except Exception:
return ''
KEY_NODE_TITLES = ['大模型', '单次反思', '大模型二次生成']
time_range = get_time_range(days_ago=1)
params = {
'page_number': '1',
'page_size': '10',
'start_time': time_range['start_time'],
'end_time': time_range['end_time'],
'order_by': '-created_at',
'status': 'all',
}
response = requests.get(
f'https://agent.udesk.cn/api/backend/apps/{app_id}/log/chat/pages',
params=params,
cookies=cookies,
headers=headers,
)
data = response.json()
total = data.get('data', {}).get('total', 0)
total_page = max(total // 10 + (1 if total % 10 else 0), 1)
print(f'{total} 条对话记录,{total_page} 页,开始拉取...')
chart_list = []
for page in range(1, total_page + 1):
params['page_number'] = str(page)
response = requests.get(
f'https://agent.udesk.cn/api/backend/apps/{app_id}/log/chat/pages',
params=params,
cookies=cookies,
headers=headers,
)
page_data = response.json().get('data', {}).get('list', [])
chart_list.extend(page_data)
print(f'{page}/{total_page}页,获取 {len(page_data)}')
print(f'\n对话列表获取完成,共 {len(chart_list)}')
records = []
def fetch_all_messages(chat_log_id):
all_messages = []
fid = None
page_size = 10
while True:
try:
c_params = {'id': chat_log_id, 'page_size': str(page_size)}
if fid:
c_params['fid'] = fid
c_resp = requests.get(
f'https://agent.udesk.cn/api/backend/apps/{app_id}/chatMessage',
params=c_params,
cookies=cookies,
headers=headers,
timeout=30
)
data = c_resp.json()
messages = data.get('data', {}).get('list', [])
if not messages:
break
all_messages.extend(messages)
if len(messages) < page_size:
break
fid = messages[0].get('id')
if not fid:
break
except Exception as e:
break
return all_messages
for idx, chart in enumerate(chart_list):
chat_log_id = chart.get('chat_log_id', '')
chat_title = chart.get('title') or chart.get('chat_log_name') or ''
created_time = str(chart.get('created_time', ''))
try:
messages = fetch_all_messages(chat_log_id)
print(f' [{idx+1}] 获取对话消息 {len(messages)}')
except Exception as e:
print(f' [{idx+1}] 获取对话消息失败 chat_log_id={chat_log_id}: {e}')
messages = []
for msg in messages:
wf_run_id = msg.get('workflow_run_id', '')
if not wf_run_id:
continue
nodes = fetch_nodes(wf_run_id)
user_query = ''
for n in nodes:
if n.get('node_type') == 'start' and n.get('outputs', {}).get('sys.query'):
user_query = n['outputs']['sys.query']
break
key_outputs = {}
final_answer = ''
for n in nodes:
title = n.get('title', '')
if title in KEY_NODE_TITLES:
key_outputs[title] = (n.get('outputs') or {}).get('text', '')
if n.get('node_type') == 'answer':
final_answer = (n.get('outputs') or {}).get('answer', '')
records.append({
'chat_log_id': chat_log_id,
'chat_title': chat_title,
'created_time': created_time,
'user_query': user_query,
'wf_run_id': wf_run_id,
'nodes': nodes,
'key_outputs': key_outputs,
'final_answer': final_answer,
})
print(f' [{idx+1}/{len(chart_list)}] {chat_log_id[:8]}... 节点数:{len(nodes)}')
print(f'\n数据拉取完成,共 {len(records)} 条有效记录,开始生成Excel...')
wb = Workbook()
ws1 = wb.active
ws1.title = '关键节点审核'
header_font = Font(name='微软雅黑', bold=True, size=11, color='FFFFFF')
header_fill = PatternFill(start_color='4472C4', end_color='4472C4', fill_type='solid')
key_fill = PatternFill(start_color='FFF2CC', end_color='FFF2CC', fill_type='solid')
data_font = Font(name='微软雅黑', size=10)
thin_border = Border(
left=Side(style='thin'),
right=Side(style='thin'),
top=Side(style='thin'),
bottom=Side(style='thin'),
)
wrap_alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)
center_alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
sheet1_headers = [
'序号', '对话ID', '会话标题', '对话时间',
'用户问题',
'大模型_输出', '单次反思_输出', '大模型二次生成_输出',
'最终回答', '人工审核结果', '备注'
]
for col_idx, header in enumerate(sheet1_headers, 1):
cell = ws1.cell(row=1, column=col_idx, value=header)
cell.font = header_font
cell.fill = header_fill
cell.alignment = center_alignment
cell.border = thin_border
key_col_indices = [col_idx for col_idx, h in enumerate(sheet1_headers, 1) if h in [
'用户问题', '大模型_输出', '单次反思_输出', '大模型二次生成_输出', '最终回答'
]]
audit_col = sheet1_headers.index('人工审核结果') + 1
for row_idx, rec in enumerate(records):
r = row_idx + 2
values = [
row_idx + 1,
rec['chat_log_id'],
rec['chat_title'],
rec['created_time'],
rec['user_query'],
rec['key_outputs'].get('大模型', ''),
rec['key_outputs'].get('单次反思', ''),
rec['key_outputs'].get('大模型二次生成', ''),
rec['final_answer'],
'',
'',
]
for col_idx, val in enumerate(values, 1):
cell = ws1.cell(row=r, column=col_idx, value=val)
cell.font = data_font
cell.alignment = wrap_alignment
cell.border = thin_border
if col_idx in key_col_indices:
cell.fill = key_fill
ws1.column_dimensions['A'].width = 6
ws1.column_dimensions['B'].width = 38
ws1.column_dimensions['C'].width = 20
ws1.column_dimensions['D'].width = 20
ws1.column_dimensions['E'].width = 40
ws1.column_dimensions['F'].width = 50
ws1.column_dimensions['G'].width = 50
ws1.column_dimensions['H'].width = 50
ws1.column_dimensions['I'].width = 50
ws1.column_dimensions['J'].width = 15
ws1.column_dimensions['K'].width = 20
ws1.freeze_panes = 'E2'
ws1.auto_filter.ref = f'A1:K{len(records) + 1}'
ws2 = wb.create_sheet('全流程日志明细')
sheet2_headers = [
'序号', '对话ID', '用户问题', '工作流运行ID',
'节点序号', '节点类型', '节点标题', '节点状态',
'执行开始时间', '执行结束时间',
'节点输入(JSON)', '节点输出(JSON)', '错误信息'
]
for col_idx, header in enumerate(sheet2_headers, 1):
cell = ws2.cell(row=1, column=col_idx, value=header)
cell.font = header_font
cell.fill = PatternFill(start_color='548235', end_color='548235', fill_type='solid')
cell.alignment = center_alignment
cell.border = thin_border
log_row = 2
log_seq = 0
for rec in records:
for node in rec['nodes']:
log_seq += 1
created_at = node.get('created_at') or node.get('started_at', 0)
finished_at = node.get('finished_at', 0)
values = [
log_seq,
rec['chat_log_id'],
rec['user_query'],
rec['wf_run_id'],
node.get('index', ''),
node.get('node_type', ''),
node.get('title', ''),
node.get('status', ''),
ts_to_datetime(created_at),
ts_to_datetime(finished_at),
safe_json_dumps(node.get('inputs')),
safe_json_dumps(node.get('outputs')),
safe_str(node.get('error', '')),
]
for col_idx, val in enumerate(values, 1):
cell = ws2.cell(row=log_row, column=col_idx, value=val)
cell.font = data_font
cell.alignment = wrap_alignment
cell.border = thin_border
if node.get('status') == 'failed':
cell.fill = PatternFill(start_color='FFC7CE', end_color='FFC7CE', fill_type='solid')
log_row += 1
ws2.column_dimensions['A'].width = 6
ws2.column_dimensions['B'].width = 38
ws2.column_dimensions['C'].width = 40
ws2.column_dimensions['D'].width = 38
ws2.column_dimensions['E'].width = 8
ws2.column_dimensions['F'].width = 22
ws2.column_dimensions['G'].width = 20
ws2.column_dimensions['H'].width = 12
ws2.column_dimensions['I'].width = 20
ws2.column_dimensions['J'].width = 20
ws2.column_dimensions['K'].width = 60
ws2.column_dimensions['L'].width = 60
ws2.column_dimensions['M'].width = 30
ws2.freeze_panes = 'F2'
ws2.auto_filter.ref = f'A1:M{log_row - 1}'
output_file = f'智能客服节点审核_{datetime.now().strftime("%Y%m%d_%H%M%S")}.xlsx'
output_path = f'd:/Idea Project/F6+宜搭+其它(1)/张阳脚本/udesk/{output_file}'
wb.save(output_path)
print(f'\nExcel已生成: {output_path}')
print(f' Sheet1「关键节点审核」: {len(records)} 条对话')
print(f' Sheet2「全流程日志明细」: {log_seq} 条节点执行记录')