# -*- coding: utf-8 -*- import os import pandas as pd df = pd.read_excel(r"D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\udesk\自动评测.xlsx", sheet_name='Sheet1', header=None) # 按位置指定列名(第1列→query, 第2列→reference_response, 第3列→session_id) # 根据源文件实际列数调整 col_count = df.shape[1] if col_count == 2: df.columns = ["query", "reference_response"] elif col_count == 3: df.columns = ["query", "reference_response", "session_id"] else: df.columns = ["query", "reference_response", "session_id"][:col_count] # 按模板字段整理输出 template_cols = ["session_id", "query", "reference_response"] for col in template_cols: if col not in df.columns: df[col] = pd.NA # 缺失的字段填空值 df = df[template_cols] # 按字段类型转换 df["session_id"] = pd.to_numeric(df["session_id"], errors="coerce").astype("Int64") df["query"] = df["query"].astype(str) df["reference_response"] = df["reference_response"].astype(str) # 随机抽取100条,若数据不足100条则全部抽取 n = min(100, len(df)) sampled = df.sample(n=n, random_state=42) # 生成到当前目录 output_path = os.path.join(os.getcwd(), "随机抽取100条结果.xlsx") sampled.to_excel(output_path, index=False) print(f"已随机抽取 {n} 条数据,保存至: {output_path}")