Files
saas/test/BI.ipynb
T
2025-08-21 11:41:21 +08:00

561 lines
18 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"metadata": {},
"cell_type": "markdown",
"source": "BI创建表",
"id": "9a93686f364e491f"
},
{
"cell_type": "code",
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-08-20T09:06:39.520648Z",
"start_time": "2025-08-20T09:06:39.167174Z"
}
},
"source": [
"import pandas as pd\n",
"import mysql.connector\n",
"from mysql.connector import Error\n",
"\n",
"\n",
"def create_table(cursor, table_name):\n",
" \"\"\"创建数据表\"\"\"\n",
" create_table_query = f\"\"\"\n",
" CREATE TABLE IF NOT EXISTS {table_name} (\n",
" `选择合伙人` VARCHAR(255) COMMENT '选择合伙人',\n",
" `合伙人姓名` VARCHAR(255) COMMENT '合伙人姓名',\n",
" `手机号` VARCHAR(255) COMMENT '手机号',\n",
" `合伙人身份` VARCHAR(255) COMMENT '合伙人身份',\n",
" `合伙人所在省市` VARCHAR(255) COMMENT '合伙人所在省市',\n",
" `合伙人登记人` VARCHAR(255) COMMENT '合伙人登记人',\n",
" `战区经理` VARCHAR(255) COMMENT '战区经理',\n",
" `提交人` VARCHAR(255) COMMENT '提交人',\n",
" `合伙人分类` VARCHAR(255) COMMENT '合伙人分类',\n",
" `战区` VARCHAR(255) COMMENT '战区',\n",
" `订单编号` VARCHAR(255) COMMENT '订单登记表.订单编号',\n",
" `销售阶段` VARCHAR(255) COMMENT '订单登记表.销售阶段',\n",
" `版本` VARCHAR(255) COMMENT '订单登记表.版本',\n",
" `年限` VARCHAR(255) COMMENT '订单登记表.年限',\n",
" `成交金额` VARCHAR(255) COMMENT '订单登记表.成交金额',\n",
" `佣金` VARCHAR(255) COMMENT '订单登记表.佣金',\n",
" `理论佣金` VARCHAR(255) COMMENT '订单登记表.理论佣金',\n",
" `佣金比例` VARCHAR(255) COMMENT '订单登记表.佣金比例',\n",
" `合计佣金` VARCHAR(255) COMMENT '合计佣金',\n",
" `理论合计佣金` VARCHAR(255) COMMENT '理论合计佣金',\n",
" `特殊情况备注` VARCHAR(255) COMMENT '特殊情况备注',\n",
" `合伙人介绍证明` VARCHAR(255) COMMENT '合伙人介绍证明(微信聊天截图等)',\n",
" `合伙人类型` VARCHAR(255) COMMENT '合伙人类型'\n",
" ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 ;\n",
" \"\"\"\n",
" cursor.execute(create_table_query)\n",
" print(f\"成功创建表 {table_name}\")\n",
"\n",
"\n",
"# 数据库连接信息\n",
"HS_DB_Config = {\n",
" 'host': \"f6-public.rwlb.rds.aliyuncs.com\",\n",
" 'user': \"rw_operation_data_relay\",\n",
" 'password': \"m+q5Z4%IVuF9bf\",\n",
" 'database': \"f6operation_data_relay\"\n",
"} # 衡时数据库链接配置-mysql\n",
"\n",
"# 表名\n",
"table_name = \"partner_settlement_to_BI\" # 请替换为实际的表名\n",
"\n",
"# 连接数据库\n",
"connection = mysql.connector.connect(\n",
" host=HS_DB_Config[\"host\"],\n",
" user=HS_DB_Config[\"user\"],\n",
" password=HS_DB_Config[\"password\"],\n",
" database=HS_DB_Config[\"database\"]\n",
")\n",
"\n",
"cursor = connection.cursor()\n",
"\n",
"# 创建表\n",
"create_table(cursor, table_name)\n",
"\n",
"# 关闭连接\n",
"cursor.close()\n",
"connection.close()"
],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"成功创建表 partner_settlement_to_BI\n"
]
}
],
"execution_count": 2
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### 向BI中写入数据",
"id": "b2b6cd2fb31c305e"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-05T02:24:00.182030Z",
"start_time": "2025-08-05T02:23:57.092710Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import mysql.connector\n",
"from mysql.connector import Error\n",
"\n",
"# 连接信息\n",
"HS_DB_Config = {\n",
" 'host': \"f6-public.rwlb.rds.aliyuncs.com\",\n",
" 'user': \"rw_operation_data_relay\",\n",
" 'password': \"m+q5Z4%IVuF9bf\",\n",
" 'database': \"f6operation_data_relay\"\n",
"} # 衡时数据库链接配置-mysql\n",
"table_name = \"thailand_store_data_email\" # 请替换为实际的表名\n",
"# table_name = \"yida_process_time_statistics\"\n",
"# 连接\n",
"connection = mysql.connector.connect(\n",
" host=HS_DB_Config[\"host\"],\n",
" user=HS_DB_Config[\"user\"],\n",
" password=HS_DB_Config[\"password\"],\n",
" database=HS_DB_Config[\"database\"]\n",
")\n",
"\n",
"print(f\"成功连接 {HS_DB_Config[\"database\"]}\")\n",
"cursor = connection.cursor()\n",
"\n",
"# 读取Excel文件\n",
"df = pd.read_excel(\n",
" r\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Downloads\\门店日使用数据Workshops_Daily_Usage_Data_20250805101517.xlsx\",\n",
" sheet_name=\"Sheet1\")\n",
"\n",
"# 处理空值 - 将NaN/NaT/空字符串统一转为None\n",
"df = df.map(lambda x: None if pd.isna(x) or str(x).strip() == '' else x)\n",
"\n",
"# 生成插入语句\n",
"columns = ', '.join(df.columns)\n",
"placeholders = ', '.join(['%s'] * len(df.columns))\n",
"insert_query = f\"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})\"\n",
"\n",
"# 批量插入数据\n",
"records = [tuple(row) for row in df.values]\n",
"cursor.executemany(insert_query, records)\n",
"connection.commit()\n",
"\n",
"print(f\"成功导入 {cursor.rowcount} 条记录到 {table_name} 表\")\n",
"\n",
"cursor.close()\n",
"\n",
"connection.close()\n"
],
"id": "a98f8dd324b53eeb",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"成功连接 f6operation_data_relay\n",
"成功导入 7250 条记录到 thailand_store_data_email 表\n"
]
}
],
"execution_count": 1
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### 清空BI数据表",
"id": "92964c5c1b836d72"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-10T07:02:11.293374Z",
"start_time": "2025-06-10T07:02:10.565566Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import mysql.connector\n",
"from mysql.connector import Error\n",
"\n",
"# 数据库连接信息\n",
"host = \"rm-uf6r230vbtxf5gdz63o.mysql.rds.aliyuncs.com\"\n",
"user = \"rw_operation_data_relay\"\n",
"password = \"m+q5Z4%IVuF9bf\"\n",
"database = \"f6operation_data_relay\"\n",
"table_name = \"yida_process_time_statistics\" # 要清空的表名\n",
"# table_name = \"thailand_store_data_email\" # 要清空的表名\n",
"\n",
"# 连接数据库\n",
"try:\n",
" connection = mysql.connector.connect(\n",
" host=host,\n",
" user=user,\n",
" password=password,\n",
" database=database\n",
" )\n",
"\n",
" if connection.is_connected():\n",
" cursor = connection.cursor()\n",
"\n",
" # 使用TRUNCATE清空表数据\n",
" cursor.execute(f\"TRUNCATE TABLE {table_name}\")\n",
" connection.commit()\n",
"\n",
" print(f\"成功清空表 {table_name} 中的所有数据\")\n",
"\n",
"except Error as e:\n",
" print(f\"清空表时发生错误: {e}\")\n",
" if connection.is_connected():\n",
" connection.rollback()\n",
"\n",
"if connection.is_connected():\n",
" cursor.close()\n",
" connection.close()\n",
" print(\"数据库连接已关闭\")\n",
"\n",
"\n"
],
"id": "406f1e2ca21ad9a",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"成功清空表 yida_process_time_statistics 中的所有数据\n",
"数据库连接已关闭\n"
]
}
],
"execution_count": 2
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### 更新数据表结构",
"id": "432238219a78f927"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-20T08:07:48.856164Z",
"start_time": "2025-08-20T08:07:48.650261Z"
}
},
"cell_type": "code",
"source": [
"import mysql.connector\n",
"from mysql.connector import Error\n",
"\n",
"HS_DB_Config = {\n",
" 'host': \"f6-public.rwlb.rds.aliyuncs.com\",\n",
" 'user': \"rw_operation_data_relay\",\n",
" 'password': \"m+q5Z4%IVuF9bf\",\n",
" 'database': \"f6operation_data_relay\"\n",
"} # 衡时数据库链接配置-mysql\n",
"# table_name = \"new_dealer_service_order_to_bi\" # 替换为你的实际表名\n",
"\n",
"table_name = \"non_standard_performance_to_BI\"\n",
"column_name = \"开户/处理日期\"\n",
"# new_column_type = \"VARCHAR(255)\" # 目标数据类型\n",
"new_column_type = \"DATETIME\" # 目标数据类型\n",
"\n",
"try:\n",
" # 连接数据库\n",
" # 建立数据库连接\n",
" connection = mysql.connector.connect(\n",
" host=HS_DB_Config[\"host\"],\n",
" user=HS_DB_Config[\"user\"],\n",
" password=HS_DB_Config[\"password\"],\n",
" database=HS_DB_Config[\"database\"]\n",
" )\n",
" cursor = connection.cursor()\n",
"\n",
" # 检查字段是否存在及当前数据类型\n",
" cursor.execute(f\"\"\"\n",
" SELECT DATA_TYPE \n",
" FROM INFORMATION_SCHEMA.COLUMNS \n",
" WHERE TABLE_SCHEMA = DATABASE() \n",
" AND TABLE_NAME = '{table_name}' \n",
" AND COLUMN_NAME = '{column_name}';\n",
" \"\"\")\n",
"\n",
" column_info = cursor.fetchone()\n",
"\n",
" if not column_info:\n",
" # 字段不存在时添加\n",
" alter_query = f\"ALTER TABLE `{table_name}` ADD COLUMN `{column_name}` {new_column_type};\"\n",
" cursor.execute(alter_query)\n",
" print(f\"✅ 成功添加字段: `{column_name}`\")\n",
" else:\n",
" current_type = column_info[0]\n",
" # 比较当前类型与目标类型(忽略括号内的长度)\n",
" if current_type.upper() != new_column_type.split('(')[0].upper():\n",
" # 修改数据类型\n",
" alter_query = f\"ALTER TABLE `{table_name}` MODIFY COLUMN `{column_name}` {new_column_type};\"\n",
" cursor.execute(alter_query)\n",
" print(f\"✅ 成功将字段 `{column_name}` 从 {current_type} 修改为 {new_column_type}\")\n",
" else:\n",
" print(f\"️ 字段 `{column_name}` 已经是 {new_column_type} 类型,无需修改\")\n",
"\n",
" connection.commit()\n",
"\n",
"except Error as e:\n",
" print(f\"❌ 操作失败:{e}\")\n",
" if connection.is_connected():\n",
" connection.rollback()\n",
"\n",
"finally:\n",
" if connection.is_connected():\n",
" cursor.close()\n",
" connection.close()\n",
" print(\"数据库连接已关闭\")"
],
"id": "e322e4b26a013cdd",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"❌ 操作失败:1146 (42S02): Table 'f6operation_data_relay.non_standard_performance_to_bi' doesn't exist\n",
"数据库连接已关闭\n"
]
}
],
"execution_count": 4
},
{
"metadata": {},
"cell_type": "markdown",
"source": "### 清空部分数据\n",
"id": "f0ad64203d788c79"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-06-11T01:39:55.579785Z",
"start_time": "2025-06-11T01:39:55.295469Z"
}
},
"cell_type": "code",
"source": [
"import pandas as pd\n",
"import mysql.connector\n",
"from mysql.connector import Error\n",
"\n",
"# 数据库连接信息\n",
"host = \"rm-uf6r230vbtxf5gdz63o.mysql.rds.aliyuncs.com\"\n",
"user = \"rw_operation_data_relay\"\n",
"password = \"m+q5Z4%IVuF9bf\"\n",
"database = \"f6operation_data_relay\"\n",
"table_name = \"yida_process_time_statistics\" # 要操作的表名\n",
"# table_name = \"thailand_store_data_email\" # 要操作的表名\n",
"min_id_to_delete = 127821 # 要删除的最小ID值\n",
"\n",
"# 连接数据库\n",
"try:\n",
" connection = mysql.connector.connect(\n",
" host=host,\n",
" user=user,\n",
" password=password,\n",
" database=database\n",
" )\n",
"\n",
" if connection.is_connected():\n",
" cursor = connection.cursor()\n",
"\n",
" # 使用DELETE删除ID大于等于127821的数据\n",
" cursor.execute(f\"DELETE FROM {table_name} WHERE id >= {min_id_to_delete}\")\n",
"\n",
" connection.commit()\n",
" print(f\"成功删除表 {table_name} 中ID大于等于{min_id_to_delete}的所有数据\")\n",
"\n",
"except Error as e:\n",
" print(f\"删除数据时发生错误: {e}\")\n",
" if connection.is_connected():\n",
" connection.rollback()\n",
"\n",
"if connection.is_connected():\n",
" cursor.close()\n",
" connection.close()\n",
" print(\"数据库连接已关闭\")"
],
"id": "3df14d022c9d8046",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"成功删除表 yida_process_time_statistics 中ID大于等于127821的所有数据\n",
"数据库连接已关闭\n"
]
}
],
"execution_count": 3
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## 删除区间数据",
"id": "8192d432b3f65bc2"
},
{
"metadata": {},
"cell_type": "code",
"outputs": [],
"execution_count": null,
"source": [
"import pandas as pd\n",
"import mysql.connector\n",
"from mysql.connector import Error\n",
"\n",
"# 数据库连接信息\n",
"# host = \"rm-uf6r230vbtxf5gdz63o.mysql.rds.aliyuncs.com\"\n",
"# user = \"rw_operation_data_relay\"\n",
"# password = \"m+q5Z4%IVuF9bf\"\n",
"# database = \"f6operation_data_relay\"\n",
"# BI数据库链接配置-mysql\n",
"host = \"f6-public.rwlb.rds.aliyuncs.com\"\n",
"database = \"f6operation_data_relay\"\n",
"user = \"rw_operation_data_relay\"\n",
"password = \"m+q5Z4%IVuF9bf\"\n",
"table_name = \"thailand_store_data_email\" # 要操作的表名\n",
"# table_name = \"thailand_store_data_email\" # 要操作的表名\n",
"start_id = 104864 # 要删除的区间起始ID\n",
"end_id = 106995 # 要删除的区间结束ID\n",
"\n",
"# 连接数据库\n",
"try:\n",
" connection = mysql.connector.connect(\n",
" host=host,\n",
" user=user,\n",
" password=password,\n",
" database=database\n",
" )\n",
"\n",
" if connection.is_connected():\n",
" cursor = connection.cursor()\n",
"\n",
" # 使用DELETE删除ID在指定区间内的数据\n",
" delete_query = f\"DELETE FROM {table_name} WHERE id BETWEEN {start_id} AND {end_id}\"\n",
" cursor.execute(delete_query)\n",
"\n",
" connection.commit()\n",
" print(f\"成功删除表 {table_name} 中ID在{start_id}到{end_id}之间的所有数据\")\n",
"\n",
"except Error as e:\n",
" print(f\"删除数据时发生错误: {e}\")\n",
" if connection.is_connected():\n",
" connection.rollback()\n",
"\n",
"if connection.is_connected():\n",
" cursor.close()\n",
" connection.close()\n",
" print(\"数据库连接已关闭\")"
],
"id": "fe36740aa6724433"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## BI删表\n",
"id": "76b76aed2ce2a77f"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-08-21T02:06:22.956060Z",
"start_time": "2025-08-21T02:06:22.797879Z"
}
},
"cell_type": "code",
"source": [
"import mysql.connector\n",
"from mysql.connector import Error\n",
"\n",
"\n",
"def drop_table(cursor, table_name):\n",
" \"\"\"删除数据表\"\"\"\n",
" drop_table_query = f\"DROP TABLE IF EXISTS {table_name};\"\n",
" cursor.execute(drop_table_query)\n",
" print(f\"成功删除表 {table_name}\")\n",
"\n",
"\n",
"# 数据库连接信息\n",
"HS_DB_Config = {\n",
" 'host': \"f6-public.rwlb.rds.aliyuncs.com\",\n",
" 'user': \"rw_operation_data_relay\",\n",
" 'password': \"m+q5Z4%IVuF9bf\",\n",
" 'database': \"f6operation_data_relay\"\n",
"} # 衡时数据库链接配置-mysql\n",
"\n",
"# 表名\n",
"table_name = \"test\" # 请替换为实际的表名\n",
"\n",
"# 连接数据库\n",
"connection = mysql.connector.connect(\n",
" host=HS_DB_Config[\"host\"],\n",
" user=HS_DB_Config[\"user\"],\n",
" password=HS_DB_Config[\"password\"],\n",
" database=HS_DB_Config[\"database\"]\n",
")\n",
"\n",
"cursor = connection.cursor()\n",
"\n",
"# 删除表\n",
"drop_table(cursor, table_name)\n",
"\n",
"# 提交更改\n",
"connection.commit()\n",
"\n",
"# 关闭连接\n",
"cursor.close()\n",
"connection.close()"
],
"id": "daf2c94f811fbcdd",
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"成功删除表 test\n"
]
}
],
"execution_count": 2
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}