Files
F6--/张阳脚本/udesk/随机抽取100条测试.py
2026-06-02 15:08:26 +08:00

38 lines
1.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
import os
import pandas as pd
df = pd.read_excel(r"D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\udesk\自动评测.xlsx", sheet_name='Sheet1', header=None)
# 按位置指定列名(第1列→query, 第2列→reference_response, 第3列→session_id
# 根据源文件实际列数调整
col_count = df.shape[1]
if col_count == 2:
df.columns = ["query", "reference_response"]
elif col_count == 3:
df.columns = ["query", "reference_response", "session_id"]
else:
df.columns = ["query", "reference_response", "session_id"][:col_count]
# 按模板字段整理输出
template_cols = ["session_id", "query", "reference_response"]
for col in template_cols:
if col not in df.columns:
df[col] = pd.NA # 缺失的字段填空值
df = df[template_cols]
# 按字段类型转换
df["session_id"] = pd.to_numeric(df["session_id"], errors="coerce").astype("Int64")
df["query"] = df["query"].astype(str)
df["reference_response"] = df["reference_response"].astype(str)
# 随机抽取100条,若数据不足100条则全部抽取
n = min(100, len(df))
sampled = df.sample(n=n, random_state=42)
# 生成到当前目录
output_path = os.path.join(os.getcwd(), "随机抽取100条结果.xlsx")
sampled.to_excel(output_path, index=False)
print(f"已随机抽取 {n} 条数据,保存至: {output_path}")