264 lines
10 KiB
Plaintext
264 lines
10 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"查看excel里面的换行符格式\n",
|
||
"检查后可以看到,excel里面的单元格内换行符是\\r\\n格式"
|
||
],
|
||
"id": "c9030123c5d35ea8"
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"id": "initial_id",
|
||
"metadata": {
|
||
"collapsed": true,
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-13T01:38:58.145302Z",
|
||
"start_time": "2025-02-13T01:38:49.123687Z"
|
||
}
|
||
},
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"# 读取Excel文件\n",
|
||
"file_path = r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\明细统计.xls\" # 使用原始字符串 (r\"\") 来避免转义字符问题\n",
|
||
"df = pd.read_excel(file_path)\n",
|
||
"\n",
|
||
"# 选择你要检查的列(例如 '图号')\n",
|
||
"column_to_check = '图号'\n",
|
||
"\n",
|
||
"# 打印前几行的数据,查看是否有换行符\n",
|
||
"for index, row in df.head().iterrows():\n",
|
||
" print(f\"Row {index}:\")\n",
|
||
" print(row[column_to_check])\n",
|
||
" print(repr(row[column_to_check])) # 使用 repr() 查看实际的换行符\n",
|
||
" print(\"-\" * 40)"
|
||
],
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Row 0:\n",
|
||
"TH04733\r\n",
|
||
"YLS-1L5W30\r\n",
|
||
"LSGL-4-75W90\r\n",
|
||
"NO-20002\r\n",
|
||
"1307012TAA\r\n",
|
||
"1002250TAR-1\r\n",
|
||
"EZL401\r\n",
|
||
"NA-14021U\r\n",
|
||
"CC-22\r\n",
|
||
"3701250TAC\r\n",
|
||
"TH04732\r\n",
|
||
"1006060TARD1\r\n",
|
||
"1002350TRA-F\n",
|
||
"'TH04733\\r\\nYLS-1L5W30\\r\\nLSGL-4-75W90\\r\\nNO-20002\\r\\n1307012TAA\\r\\n1002250TAR-1\\r\\nEZL401\\r\\nNA-14021U\\r\\nCC-22\\r\\n3701250TAC\\r\\nTH04732\\r\\n1006060TARD1\\r\\n1002350TRA-F'\n",
|
||
"----------------------------------------\n",
|
||
"Row 1:\n",
|
||
"nan\n",
|
||
"nan\n",
|
||
"----------------------------------------\n",
|
||
"Row 2:\n",
|
||
"0280150989\r\n",
|
||
"H15015-0200\r\n",
|
||
"06F 905 115G\r\n",
|
||
"7092-BKR6EGP\n",
|
||
"'0280150989\\r\\nH15015-0200\\r\\n06F 905 115G\\r\\n7092-BKR6EGP'\n",
|
||
"----------------------------------------\n",
|
||
"Row 3:\n",
|
||
"0M08-10129\r\n",
|
||
"S101014-0400\r\n",
|
||
"0M10-30064\r\n",
|
||
"H15002-1000\n",
|
||
"'0M08-10129\\r\\nS101014-0400\\r\\n0M10-30064\\r\\nH15002-1000'\n",
|
||
"----------------------------------------\n",
|
||
"Row 4:\n",
|
||
"JLS-SN5W301L\r\n",
|
||
"JL00004\r\n",
|
||
"AST00002\r\n",
|
||
"NA-18034P\r\n",
|
||
"AST00010\r\n",
|
||
"NC-17012L\r\n",
|
||
"56110-RAA-A01HC\r\n",
|
||
"08285-P99-01ZE3\r\n",
|
||
"N0-18003\n",
|
||
"'JLS-SN5W301L\\r\\nJL00004\\r\\nAST00002\\r\\nNA-18034P\\r\\nAST00010\\r\\nNC-17012L\\r\\n56110-RAA-A01HC\\r\\n08285-P99-01ZE3\\r\\nN0-18003'\n",
|
||
"----------------------------------------\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 1
|
||
},
|
||
{
|
||
"metadata": {},
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"拆分\\r\\n换行符\n",
|
||
"\n",
|
||
"根据同一组\\r\\n的数量,"
|
||
],
|
||
"id": "3023b57d0a934d9e"
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2025-02-13T01:44:18.374344Z",
|
||
"start_time": "2025-02-13T01:39:23.892604Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"from tqdm import tqdm\n",
|
||
"from pathlib import Path\n",
|
||
"\n",
|
||
"def split_rows(df, key_column='工单号'):\n",
|
||
" # 获取需要处理的列\n",
|
||
" part_columns = ['配件名称', '配件数量', '配件施工人员', '配件成本', '配件单价', '配件金额']\n",
|
||
" service_columns = ['服务项目', '项目金额', '项目施工人员']\n",
|
||
"\n",
|
||
" # 初始化三个空列表来存储正常、异常和异常工单号对应的正常数据\n",
|
||
" normal_rows = []\n",
|
||
" abnormal_rows = []\n",
|
||
" abnormal_key_values = set() # 用于存储异常工单号\n",
|
||
" original_abnormal_rows = [] # 用于存储异常工单的原始行\n",
|
||
"\n",
|
||
" # 创建进度条,用于显示处理每一行的进度\n",
|
||
" with tqdm(total=len(df), desc='Processing Rows') as pbar:\n",
|
||
" for index, row in df.iterrows():\n",
|
||
" # 分割配件组和服务项目组数据\n",
|
||
" parts_data = {col: row[col].split('\\r\\n') if isinstance(row[col], str) and pd.notna(row[col]) else [row[col]] for col in part_columns}\n",
|
||
" services_data = {col: row[col].split('\\r\\n') if isinstance(row[col], str) and pd.notna(row[col]) else [row[col]] for col in service_columns}\n",
|
||
"\n",
|
||
" # 合并两个字典\n",
|
||
" all_data = {**parts_data, **services_data}\n",
|
||
"\n",
|
||
" # 确定最大长度\n",
|
||
" max_length = max(len(data) for data in all_data.values())\n",
|
||
"\n",
|
||
" # 记录当前行的工单号\n",
|
||
" key_value = row[key_column]\n",
|
||
"\n",
|
||
" # 将所有列的数据扩展到最大长度\n",
|
||
" for i in range(max_length):\n",
|
||
" new_row = row.copy()\n",
|
||
" for col, data in all_data.items():\n",
|
||
" if i < len(data):\n",
|
||
" new_row[col] = data[i]\n",
|
||
" else:\n",
|
||
" new_row[col] = None\n",
|
||
"\n",
|
||
" # 检查是否有异常情况\n",
|
||
" non_null_values = [new_row[col] for col in all_data.keys() if pd.notna(new_row[col])]\n",
|
||
" if len(non_null_values) == 1:\n",
|
||
" # 如果这一行只有单个非空值,则记录为异常数据\n",
|
||
" abnormal_row = {\n",
|
||
" key_column: key_value,\n",
|
||
" '异常列': list(all_data.keys())[0],\n",
|
||
" '异常值': non_null_values[0],\n",
|
||
" '原始行索引': index # 记录原始行的索引\n",
|
||
" }\n",
|
||
" abnormal_rows.append(abnormal_row)\n",
|
||
" abnormal_key_values.add(key_value) # 记录异常工单号\n",
|
||
" else:\n",
|
||
" normal_rows.append(new_row)\n",
|
||
"\n",
|
||
" # 更新进度条\n",
|
||
" pbar.update(1)\n",
|
||
"\n",
|
||
" # 一次性将所有正常和异常的行合并为新的DataFrame\n",
|
||
" normal_df = pd.DataFrame(normal_rows)\n",
|
||
" abnormal_df = pd.DataFrame(abnormal_rows)\n",
|
||
"\n",
|
||
" # 提取异常工单号对应的正常数据\n",
|
||
" abnormal_normal_df = normal_df[normal_df[key_column].isin(abnormal_key_values)]\n",
|
||
"\n",
|
||
" # 从正常数据中移除异常工单号的数据\n",
|
||
" normal_df = normal_df[~normal_df[key_column].isin(abnormal_key_values)]\n",
|
||
"\n",
|
||
" # 提取异常工单的原始行\n",
|
||
" original_abnormal_df = df[df[key_column].isin(abnormal_key_values)]\n",
|
||
"\n",
|
||
" return normal_df, abnormal_df, abnormal_normal_df, original_abnormal_df\n",
|
||
"\n",
|
||
"# 读取Excel文件\n",
|
||
"input_file_path = Path(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\明细统计.xls\",sheet='Sheet2')\n",
|
||
"df = pd.read_excel(input_file_path)\n",
|
||
"\n",
|
||
"# 处理数据\n",
|
||
"normal_df, abnormal_df, abnormal_normal_df, original_abnormal_df = split_rows(df, key_column='工单号')\n",
|
||
"\n",
|
||
"# 写入新的Excel文件,包含四个工作表\n",
|
||
"output_file_path = Path(r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\拆分后_明细统计.xlsx\", engine='xlsxwriter') # 修改为 .xlsx 格式\n",
|
||
"with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:\n",
|
||
" normal_df.to_excel(writer, sheet_name='正常数据', index=False)\n",
|
||
" abnormal_df.to_excel(writer, sheet_name='异常数据', index=False)\n",
|
||
" abnormal_normal_df.to_excel(writer, sheet_name='异常工单的正常数据', index=False)\n",
|
||
" original_abnormal_df.to_excel(writer, sheet_name='异常工单的原始行', index=False)\n",
|
||
"\n",
|
||
"print(f\"处理完成,正常数据、异常数据、异常工单的正常数据以及异常工单的原始行已分别保存到 '{output_file_path}' 的 '正常数据'、'异常数据'、'异常工单的正常数据' 和 '异常工单的原始行' 工作表中。\")"
|
||
],
|
||
"id": "78e36241d6a7696",
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Processing Rows: 100%|██████████| 31335/31335 [01:08<00:00, 456.67it/s] \n"
|
||
]
|
||
},
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"处理完成,正常数据、异常数据、异常工单的正常数据以及异常工单的原始行已分别保存到 'C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\拆分后_明细统计.xlsx' 的 '正常数据'、'异常数据'、'异常工单的正常数据' 和 '异常工单的原始行' 工作表中。\n"
|
||
]
|
||
}
|
||
],
|
||
"execution_count": 2
|
||
},
|
||
{
|
||
"metadata": {
|
||
"ExecuteTime": {
|
||
"end_time": "2024-12-11T01:29:12.873010Z",
|
||
"start_time": "2024-12-11T01:27:31.524526Z"
|
||
}
|
||
},
|
||
"cell_type": "code",
|
||
"source": [
|
||
"with pd.ExcelWriter(output_file_path, engine='openpyxl') as writer:\n",
|
||
" normal_df.to_excel(writer, sheet_name='正常数据', index=False)\n",
|
||
" abnormal_df.to_excel(writer, sheet_name='异常数据', index=False)\n",
|
||
" abnormal_normal_df.to_excel(writer, sheet_name='异常工单的正常数据', index=False)"
|
||
],
|
||
"id": "2884a4a0209bf061",
|
||
"outputs": [],
|
||
"execution_count": 3
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 2
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython2",
|
||
"version": "2.7.6"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|