703 lines
31 KiB
Plaintext
703 lines
31 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "initial_id",
|
|
"metadata": {
|
|
"collapsed": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import poplib\n",
|
|
"import time\n",
|
|
"import pandas as pd\n",
|
|
"from email.parser import Parser\n",
|
|
"from email.header import decode_header\n",
|
|
"from email.utils import parseaddr\n",
|
|
"from datetime import datetime\n",
|
|
"from config import Config\n",
|
|
"from api import API\n",
|
|
"from back_ground_module import CommonModule\n",
|
|
"import pandas as pd\n",
|
|
"import pymysql\n",
|
|
"from pymysql import Error\n",
|
|
"\n",
|
|
"api_instance = API()\n",
|
|
"common_module = CommonModule()\n",
|
|
"\n",
|
|
"\n",
|
|
"class EmailProcessor:\n",
|
|
" \"\"\"泰国CRM每日邮件写入简道云与BI\"\"\"\n",
|
|
" def __init__(self):\n",
|
|
" # 配置信息\n",
|
|
" self.user_email_address = 'caowei@f6car.cn'\n",
|
|
" self.user_password = 'Cw@340826'\n",
|
|
" self.pop_server_host = 'pop.qiye.aliyun.com'\n",
|
|
" self.pop_server_port = '995'\n",
|
|
" self.send_name = \"f6car\"\n",
|
|
" self.send_addr = 'noreplay@notice.f6car.com'\n",
|
|
"\n",
|
|
" # 创建输出目录(如果不存在)\n",
|
|
" output_dir = \"email\"\n",
|
|
" os.makedirs(output_dir, exist_ok=True)\n",
|
|
" nowtime = datetime.now().strftime(\"%Y%m%d%H%M%S\")\n",
|
|
"\n",
|
|
" self.write_path = os.path.join(output_dir, f'email_data.xlsx')\n",
|
|
"\n",
|
|
" # 初始化字段映射\n",
|
|
" self.field_mapping = {\n",
|
|
" \"指标归属日期\": \"_widget_1742174728275\",\n",
|
|
" \"公司ID\": \"_widget_1742091963874\",\n",
|
|
" \"公司名称\": \"_widget_1742091963875\",\n",
|
|
" \"门店ID\": \"_widget_1742091963876\",\n",
|
|
" \"门店名称\": \"_widget_1742091963877\",\n",
|
|
" \"门店简称\": \"_widget_1742091963878\",\n",
|
|
" \"门店创建时间\": \"_widget_1742091963879\",\n",
|
|
" \"指标类型\": \"_widget_1742091963880\",\n",
|
|
" \"指标值\": \"_widget_1742091963882\",\n",
|
|
" \"指标子类型\": \"_widget_1742091963881\",\n",
|
|
" \"指标值\": \"_widget_1742091963882\"\n",
|
|
" }\n",
|
|
"\n",
|
|
" def connect_email_by_pop3(self):\n",
|
|
" try:\n",
|
|
" # 连接到POP服务器\n",
|
|
" email_server = poplib.POP3_SSL(\n",
|
|
" host=self.pop_server_host,\n",
|
|
" port=self.pop_server_port,\n",
|
|
" timeout=10\n",
|
|
" )\n",
|
|
" print(\"POP服务器连接成功,开始用户邮箱验证\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"POP服务器连接失败。错误: {str(e)}\")\n",
|
|
" exit(1)\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # 验证用户邮箱\n",
|
|
" email_server.user(self.user_email_address)\n",
|
|
" print(\"用户邮箱验证成功,开始授权码验证\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"用户邮箱验证失败。错误: {str(e)}\")\n",
|
|
" exit(1)\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # 验证密码/授权码\n",
|
|
" email_server.pass_(self.user_password)\n",
|
|
" print(\"授权码验证成功,开始处理邮件\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"授权码验证失败。错误: {str(e)}\")\n",
|
|
" exit(1)\n",
|
|
"\n",
|
|
" # 处理邮件\n",
|
|
" self.parse_email_server(email_server)\n",
|
|
"\n",
|
|
" def parse_email_server(self, email_server):\n",
|
|
" # 获取所有邮件列表\n",
|
|
" resp, mails, octets = email_server.list()\n",
|
|
" index = len(mails)\n",
|
|
"\n",
|
|
" # 获取今天的零点时间戳\n",
|
|
" now = datetime.now()\n",
|
|
" today_start = datetime(now.year, now.month, now.day) # 当天零点\n",
|
|
" today_start_timestamp = int(today_start.timestamp()) # 转换为时间戳\n",
|
|
"\n",
|
|
" # 按逆序处理邮件(最新的先处理)\n",
|
|
" for i in range(index, 0, -1):\n",
|
|
"\n",
|
|
" # print(f\"正在处理邮件 {i},{index}\")\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # 获取邮件内容\n",
|
|
" resp, lines, octets = email_server.retr(i)\n",
|
|
" msg_content = b'\\r\\n'.join(lines).decode('utf-8', errors='ignore') # 避免解码错误\n",
|
|
" msg = Parser().parsestr(msg_content)\n",
|
|
"\n",
|
|
" # 处理邮件时间\n",
|
|
" mail_datetime = self.parse_mail_time(msg.get(\"date\"))\n",
|
|
" if not mail_datetime: # 如果邮件时间解析失败,跳过\n",
|
|
" # logging.warning(f\"Failed to parse date for email {i}. Skipping...\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # 将邮件时间转换为时间戳\n",
|
|
" mail_timestamp = int(mail_datetime.timestamp())\n",
|
|
"\n",
|
|
" # 如果邮件不是今天的,跳过\n",
|
|
" if mail_timestamp < today_start_timestamp:\n",
|
|
" # logging.info(f\"Skipping email {i} as it is not from today.\")\n",
|
|
" # continue\n",
|
|
" break\n",
|
|
"\n",
|
|
" # 打印邮件接收时间\n",
|
|
" mail_time_str = datetime.strftime(mail_datetime, '%Y-%m-%d %H:%M:%S')\n",
|
|
" print(f\"邮件接收时间: {mail_time_str}\")\n",
|
|
"\n",
|
|
" # 处理邮件内容\n",
|
|
" self.parser_content(msg, 0)\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" # logging.error(f\"Error processing email {i}: {e}\")\n",
|
|
" print(f\"Error processing email {i}: {e}\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # 退出服务器\n",
|
|
" email_server.quit()\n",
|
|
"\n",
|
|
" def parser_content(self, msg, indent):\n",
|
|
" print(\"邮件处理\")\n",
|
|
" if indent == 0:\n",
|
|
" self.parser_email_header(msg)\n",
|
|
"\n",
|
|
" # 解析发件人信息\n",
|
|
" hdr, addr = parseaddr(msg['From'])\n",
|
|
" name, charset = decode_header(hdr)[0]\n",
|
|
" if charset:\n",
|
|
" name = name.decode(charset)\n",
|
|
" print(f'发件人姓名: {name}, 发件人邮箱: {addr}')\n",
|
|
"\n",
|
|
" if name == self.send_name:\n",
|
|
" # 下载附件\n",
|
|
" for part in msg.walk():\n",
|
|
" file_name = part.get_filename()\n",
|
|
" if file_name is None:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" filename = self.decode_str(file_name)\n",
|
|
" data = part.get_payload(decode=True)\n",
|
|
" try:\n",
|
|
" with open(self.write_path, 'wb') as att_file:\n",
|
|
" att_file.write(data)\n",
|
|
" print(f\"附件保存成功: {self.write_path}+{filename}\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"附件保存失败: {str(e)}\")\n",
|
|
"\n",
|
|
" if msg.is_multipart():\n",
|
|
" parts = msg.get_payload()\n",
|
|
" for part in parts:\n",
|
|
" self.parser_content(part, indent + 1)\n",
|
|
" else:\n",
|
|
" # 解析邮件正文\n",
|
|
" content_type = msg.get_content_type()\n",
|
|
" if content_type in ['text/plain', 'text/html']:\n",
|
|
" content = msg.get_payload(decode=True)\n",
|
|
" charset = self.guess_charset(msg)\n",
|
|
" if charset:\n",
|
|
" content = content.decode(charset)\n",
|
|
" print(f\"{' ' * indent}邮件内容: {content}\")\n",
|
|
"\n",
|
|
" def parser_email_header(self, msg):\n",
|
|
" # 解析邮件主题\n",
|
|
" subject = msg['Subject']\n",
|
|
" value, charset = decode_header(subject)[0]\n",
|
|
" if charset:\n",
|
|
" value = value.decode(charset)\n",
|
|
" print(f'邮件主题: {value}')\n",
|
|
"\n",
|
|
" # 解析发件人信息\n",
|
|
" hdr, addr = parseaddr(msg['From'])\n",
|
|
" name, charset = decode_header(hdr)[0]\n",
|
|
" if charset:\n",
|
|
" name = name.decode(charset)\n",
|
|
" print(f'发件人姓名: {name}, 发件人邮箱: {addr}')\n",
|
|
"\n",
|
|
" # 解析收件人信息\n",
|
|
" hdr, addr = parseaddr(msg['To'])\n",
|
|
" name, charset = decode_header(hdr)[0]\n",
|
|
" if charset:\n",
|
|
" name = name.decode(charset)\n",
|
|
" print(f'收件人姓名: {name}, 收件人邮箱: {addr}')\n",
|
|
"\n",
|
|
" @staticmethod\n",
|
|
" def decode_str(s):\n",
|
|
" value, charset = decode_header(s)[0]\n",
|
|
" if charset:\n",
|
|
" value = value.decode(charset)\n",
|
|
" return value\n",
|
|
"\n",
|
|
" @staticmethod\n",
|
|
" def guess_charset(msg):\n",
|
|
" charset = msg.get_charset()\n",
|
|
" if charset is None:\n",
|
|
" content_type = msg.get('Content-Type', '').lower()\n",
|
|
" for item in content_type.split(';'):\n",
|
|
" item = item.strip()\n",
|
|
" if item.startswith('charset'):\n",
|
|
" charset = item.split('=')[1]\n",
|
|
" break\n",
|
|
" return charset\n",
|
|
"\n",
|
|
" @staticmethod\n",
|
|
" def parse_mail_time(mail_datetime):\n",
|
|
" GMT_FORMAT = \"%a, %d %b %Y %H:%M:%S\"\n",
|
|
" GMT_FORMAT2 = \"%d %b %Y %H:%M:%S\"\n",
|
|
" index = mail_datetime.find(' +0')\n",
|
|
" if index > 0:\n",
|
|
" mail_datetime = mail_datetime[:index] # 移除时区信息\n",
|
|
" formats = [GMT_FORMAT, GMT_FORMAT2]\n",
|
|
" for ft in formats:\n",
|
|
" try:\n",
|
|
" mail_datetime = datetime.strptime(mail_datetime, ft)\n",
|
|
" return mail_datetime\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
" raise Exception(\"邮件时间格式解析错误\")\n",
|
|
"\n",
|
|
" @staticmethod\n",
|
|
" def row_to_dict(row, field_mapping):\n",
|
|
" \"\"\"将一行数据转换为格式化字典\"\"\"\n",
|
|
" result = {}\n",
|
|
" for col_name, widget_id in field_mapping.items():\n",
|
|
" if col_name in row:\n",
|
|
" value = row[col_name]\n",
|
|
" clean_value = None if pd.isna(value) else value\n",
|
|
" result[widget_id] = {\"value\": clean_value}\n",
|
|
" return result\n",
|
|
"\n",
|
|
" def update_email(self):\n",
|
|
" # try:\n",
|
|
" print(self.write_path)\n",
|
|
" email_df = pd.read_excel(self.write_path, sheet_name=\"Sheet0\")\n",
|
|
"\n",
|
|
" print(email_df.head())\n",
|
|
" email_df['公司ID'] = email_df['公司ID'].astype(str)\n",
|
|
" email_df['门店ID'] = email_df['门店ID'].astype(str)\n",
|
|
" email_df['指标归属日期'] = pd.to_datetime(email_df['指标归属日期'], format=\"%Y/%m/%d\").dt.strftime(\"%Y-%m-%d\")\n",
|
|
" email_df[\"门店创建时间\"] = pd.to_datetime(email_df['门店创建时间'], format=\"%Y-%m-%d %H:%M:%S\")\n",
|
|
" new_email_df = email_df.copy() # 拷贝传参\n",
|
|
" for index, row in email_df.iterrows():\n",
|
|
" email_df.loc[index, '指标归属日期'] = common_module.time_to_UTC(row['指标归属日期'])\n",
|
|
" email_df.loc[index, '门店创建时间'] = common_module.time_to_UTC(row['门店创建时间'])\n",
|
|
"\n",
|
|
" email_data = [self.row_to_dict(row, self.field_mapping) for index, row in email_df.iterrows()]\n",
|
|
" new_email_data = {'api_key': \"673457d6837e60a418e0e56b\",\n",
|
|
" 'entry_id': \"67d636bb6212b7619a7a4231\",\n",
|
|
" # 'entry_id': \"684157deab0c4c9ec636ed36\", # 测试\n",
|
|
" \"data_list\": email_data}\n",
|
|
" # api_instance.entry_data_batch_create(new_email_data)\n",
|
|
" os.remove(self.write_path)\n",
|
|
" return new_email_df\n",
|
|
"\n",
|
|
" def up_to_BI(self, df):\n",
|
|
" # 连接信息\n",
|
|
" HS_DB_Config = {\n",
|
|
" 'host': \"f6-public.rwlb.rds.aliyuncs.com\",\n",
|
|
" 'user': \"rw_operation_data_relay\",\n",
|
|
" 'password': \"m+q5Z4%IVuF9bf\",\n",
|
|
" 'database': \"f6operation_data_relay\"\n",
|
|
" } # 衡时数据库链接配置-mysql\n",
|
|
" table_name = \"thailand_store_data_email\"\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # 连接\n",
|
|
" connection = pymysql.connect(\n",
|
|
" host=HS_DB_Config[\"host\"],\n",
|
|
" user=HS_DB_Config[\"user\"],\n",
|
|
" password=HS_DB_Config[\"password\"],\n",
|
|
" database=HS_DB_Config[\"database\"],\n",
|
|
" charset='utf8mb4',\n",
|
|
" )\n",
|
|
"\n",
|
|
" print(f\"成功连接 {HS_DB_Config[\"database\"]}\")\n",
|
|
"\n",
|
|
" with connection.cursor() as cursor:\n",
|
|
" # 处理数据\n",
|
|
" df = df.where(pd.notna(df), None) # 将NaN转换为None\n",
|
|
"\n",
|
|
" # 生成插入语句\n",
|
|
" columns = ', '.join(df.columns)\n",
|
|
" placeholders = ', '.join(['%s'] * len(df.columns))\n",
|
|
" insert_query = f\"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})\"\n",
|
|
"\n",
|
|
" # 批量插入数据\n",
|
|
" records = [tuple(row) for row in df.values]\n",
|
|
" cursor.executemany(insert_query, records)\n",
|
|
" connection.commit()\n",
|
|
"\n",
|
|
" print(f\"成功导入 {cursor.rowcount} 条记录到 {table_name} 表\")\n",
|
|
"\n",
|
|
" except Error as e:\n",
|
|
" print(f\"数据库操作出错: {e}\")\n",
|
|
" if connection:\n",
|
|
" connection.rollback()\n",
|
|
" finally:\n",
|
|
" if connection:\n",
|
|
" connection.close()\n",
|
|
"\n",
|
|
" @classmethod\n",
|
|
" def main(cls):\n",
|
|
" \"\"\"邮件处理器的主入口点\"\"\"\n",
|
|
" task_start_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
|
|
" processor = cls()\n",
|
|
" processor.connect_email_by_pop3()\n",
|
|
"\n",
|
|
" email_df = processor.update_email()\n",
|
|
" processor.up_to_BI(email_df) # 发送到BI\n",
|
|
" common_module.send_task_status(task_start_time, \"海外邮件推送\")\n",
|
|
"\n",
|
|
"\n",
|
|
"if __name__ == \"__main__\":\n",
|
|
" EmailProcessor.main()\n"
|
|
]
|
|
},
|
|
{
|
|
"metadata": {
|
|
"jupyter": {
|
|
"is_executing": true
|
|
}
|
|
},
|
|
"cell_type": "code",
|
|
"source": [
|
|
"import os\n",
|
|
"import poplib\n",
|
|
"import time\n",
|
|
"import pandas as pd\n",
|
|
"from email.parser import Parser\n",
|
|
"from email.header import decode_header\n",
|
|
"from email.utils import parseaddr\n",
|
|
"from datetime import datetime\n",
|
|
"from config import Config\n",
|
|
"from api import API\n",
|
|
"from back_ground_module import CommonModule\n",
|
|
"import pandas as pd\n",
|
|
"import pymysql\n",
|
|
"from pymysql import Error\n",
|
|
"\n",
|
|
"api_instance = API()\n",
|
|
"common_module = CommonModule()\n",
|
|
"\n",
|
|
"\n",
|
|
"class EmailProcessor:\n",
|
|
" \"\"\"泰国CRM每日邮件写入简道云与BI\"\"\"\n",
|
|
" def __init__(self):\n",
|
|
" # 配置信息\n",
|
|
" self.user_email_address = 'caowei@f6car.cn'\n",
|
|
" self.user_password = 'Cw@340826'\n",
|
|
" self.pop_server_host = 'pop.qiye.aliyun.com'\n",
|
|
" self.pop_server_port = '995'\n",
|
|
" self.send_name = \"f6car\"\n",
|
|
" self.send_addr = 'noreplay@notice.f6car.com'\n",
|
|
"\n",
|
|
" # 创建输出目录(如果不存在)\n",
|
|
" output_dir = \"email\"\n",
|
|
" os.makedirs(output_dir, exist_ok=True)\n",
|
|
" nowtime = datetime.now().strftime(\"%Y%m%d%H%M%S\")\n",
|
|
"\n",
|
|
" self.write_path = os.path.join(output_dir, f'email_data.xlsx')\n",
|
|
"\n",
|
|
" # 初始化字段映射\n",
|
|
" self.field_mapping = {\n",
|
|
" \"指标归属日期\": \"_widget_1742174728275\",\n",
|
|
" \"公司ID\": \"_widget_1742091963874\",\n",
|
|
" \"公司名称\": \"_widget_1742091963875\",\n",
|
|
" \"门店ID\": \"_widget_1742091963876\",\n",
|
|
" \"门店名称\": \"_widget_1742091963877\",\n",
|
|
" \"门店简称\": \"_widget_1742091963878\",\n",
|
|
" \"门店创建时间\": \"_widget_1742091963879\",\n",
|
|
" \"指标类型\": \"_widget_1742091963880\",\n",
|
|
" \"指标值\": \"_widget_1742091963882\",\n",
|
|
" \"指标子类型\": \"_widget_1742091963881\",\n",
|
|
" \"指标值\": \"_widget_1742091963882\"\n",
|
|
" }\n",
|
|
"\n",
|
|
" def connect_email_by_pop3(self):\n",
|
|
" try:\n",
|
|
" # 连接到POP服务器\n",
|
|
" email_server = poplib.POP3_SSL(\n",
|
|
" host=self.pop_server_host,\n",
|
|
" port=self.pop_server_port,\n",
|
|
" timeout=10\n",
|
|
" )\n",
|
|
" print(\"POP服务器连接成功,开始用户邮箱验证\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"POP服务器连接失败。错误: {str(e)}\")\n",
|
|
" exit(1)\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # 验证用户邮箱\n",
|
|
" email_server.user(self.user_email_address)\n",
|
|
" print(\"用户邮箱验证成功,开始授权码验证\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"用户邮箱验证失败。错误: {str(e)}\")\n",
|
|
" exit(1)\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # 验证密码/授权码\n",
|
|
" email_server.pass_(self.user_password)\n",
|
|
" print(\"授权码验证成功,开始处理邮件\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"授权码验证失败。错误: {str(e)}\")\n",
|
|
" exit(1)\n",
|
|
"\n",
|
|
" # 处理邮件\n",
|
|
" self.parse_email_server(email_server)\n",
|
|
"\n",
|
|
" def parse_email_server(self, email_server):\n",
|
|
" # 获取所有邮件列表\n",
|
|
" resp, mails, octets = email_server.list()\n",
|
|
" index = len(mails)\n",
|
|
"\n",
|
|
" # 获取今天的零点时间戳\n",
|
|
" now = datetime.now()\n",
|
|
" today_start = datetime(now.year, now.month, now.day) # 当天零点\n",
|
|
" today_start_timestamp = int(today_start.timestamp()) # 转换为时间戳\n",
|
|
"\n",
|
|
" # 按逆序处理邮件(最新的先处理)\n",
|
|
" for i in range(index, 0, -1):\n",
|
|
"\n",
|
|
" # print(f\"正在处理邮件 {i},{index}\")\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # 获取邮件内容\n",
|
|
" resp, lines, octets = email_server.retr(i)\n",
|
|
" msg_content = b'\\r\\n'.join(lines).decode('utf-8', errors='ignore') # 避免解码错误\n",
|
|
" msg = Parser().parsestr(msg_content)\n",
|
|
"\n",
|
|
" # 处理邮件时间\n",
|
|
" mail_datetime = self.parse_mail_time(msg.get(\"date\"))\n",
|
|
" if not mail_datetime: # 如果邮件时间解析失败,跳过\n",
|
|
" # logging.warning(f\"Failed to parse date for email {i}. Skipping...\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # 将邮件时间转换为时间戳\n",
|
|
" mail_timestamp = int(mail_datetime.timestamp())\n",
|
|
"\n",
|
|
" # 如果邮件不是今天的,跳过\n",
|
|
" if mail_timestamp < today_start_timestamp:\n",
|
|
" # logging.info(f\"Skipping email {i} as it is not from today.\")\n",
|
|
" # continue\n",
|
|
" break\n",
|
|
"\n",
|
|
" # 打印邮件接收时间\n",
|
|
" mail_time_str = datetime.strftime(mail_datetime, '%Y-%m-%d %H:%M:%S')\n",
|
|
" print(f\"邮件接收时间: {mail_time_str}\")\n",
|
|
"\n",
|
|
" # 处理邮件内容\n",
|
|
" self.parser_content(msg, 0)\n",
|
|
"\n",
|
|
" except Exception as e:\n",
|
|
" # logging.error(f\"Error processing email {i}: {e}\")\n",
|
|
" print(f\"Error processing email {i}: {e}\")\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # 退出服务器\n",
|
|
" email_server.quit()\n",
|
|
"\n",
|
|
" def parser_content(self, msg, indent):\n",
|
|
" print(\"邮件处理\")\n",
|
|
" if indent == 0:\n",
|
|
" self.parser_email_header(msg)\n",
|
|
"\n",
|
|
" # 解析发件人信息\n",
|
|
" hdr, addr = parseaddr(msg['From'])\n",
|
|
" name, charset = decode_header(hdr)[0]\n",
|
|
" if charset:\n",
|
|
" name = name.decode(charset)\n",
|
|
" print(f'发件人姓名: {name}, 发件人邮箱: {addr}')\n",
|
|
"\n",
|
|
" if name == self.send_name:\n",
|
|
" # 下载附件\n",
|
|
" for part in msg.walk():\n",
|
|
" file_name = part.get_filename()\n",
|
|
" if file_name is None:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" filename = self.decode_str(file_name)\n",
|
|
" data = part.get_payload(decode=True)\n",
|
|
" try:\n",
|
|
" with open(self.write_path, 'wb') as att_file:\n",
|
|
" att_file.write(data)\n",
|
|
" print(f\"附件保存成功: {self.write_path}+{filename}\")\n",
|
|
" except Exception as e:\n",
|
|
" print(f\"附件保存失败: {str(e)}\")\n",
|
|
"\n",
|
|
" if msg.is_multipart():\n",
|
|
" parts = msg.get_payload()\n",
|
|
" for part in parts:\n",
|
|
" self.parser_content(part, indent + 1)\n",
|
|
" else:\n",
|
|
" # 解析邮件正文\n",
|
|
" content_type = msg.get_content_type()\n",
|
|
" if content_type in ['text/plain', 'text/html']:\n",
|
|
" content = msg.get_payload(decode=True)\n",
|
|
" charset = self.guess_charset(msg)\n",
|
|
" if charset:\n",
|
|
" content = content.decode(charset)\n",
|
|
" print(f\"{' ' * indent}邮件内容: {content}\")\n",
|
|
"\n",
|
|
" def parser_email_header(self, msg):\n",
|
|
" # 解析邮件主题\n",
|
|
" subject = msg['Subject']\n",
|
|
" value, charset = decode_header(subject)[0]\n",
|
|
" if charset:\n",
|
|
" value = value.decode(charset)\n",
|
|
" print(f'邮件主题: {value}')\n",
|
|
"\n",
|
|
" # 解析发件人信息\n",
|
|
" hdr, addr = parseaddr(msg['From'])\n",
|
|
" name, charset = decode_header(hdr)[0]\n",
|
|
" if charset:\n",
|
|
" name = name.decode(charset)\n",
|
|
" print(f'发件人姓名: {name}, 发件人邮箱: {addr}')\n",
|
|
"\n",
|
|
" # 解析收件人信息\n",
|
|
" hdr, addr = parseaddr(msg['To'])\n",
|
|
" name, charset = decode_header(hdr)[0]\n",
|
|
" if charset:\n",
|
|
" name = name.decode(charset)\n",
|
|
" print(f'收件人姓名: {name}, 收件人邮箱: {addr}')\n",
|
|
"\n",
|
|
" @staticmethod\n",
|
|
" def decode_str(s):\n",
|
|
" value, charset = decode_header(s)[0]\n",
|
|
" if charset:\n",
|
|
" value = value.decode(charset)\n",
|
|
" return value\n",
|
|
"\n",
|
|
" @staticmethod\n",
|
|
" def guess_charset(msg):\n",
|
|
" charset = msg.get_charset()\n",
|
|
" if charset is None:\n",
|
|
" content_type = msg.get('Content-Type', '').lower()\n",
|
|
" for item in content_type.split(';'):\n",
|
|
" item = item.strip()\n",
|
|
" if item.startswith('charset'):\n",
|
|
" charset = item.split('=')[1]\n",
|
|
" break\n",
|
|
" return charset\n",
|
|
"\n",
|
|
" @staticmethod\n",
|
|
" def parse_mail_time(mail_datetime):\n",
|
|
" GMT_FORMAT = \"%a, %d %b %Y %H:%M:%S\"\n",
|
|
" GMT_FORMAT2 = \"%d %b %Y %H:%M:%S\"\n",
|
|
" index = mail_datetime.find(' +0')\n",
|
|
" if index > 0:\n",
|
|
" mail_datetime = mail_datetime[:index] # 移除时区信息\n",
|
|
" formats = [GMT_FORMAT, GMT_FORMAT2]\n",
|
|
" for ft in formats:\n",
|
|
" try:\n",
|
|
" mail_datetime = datetime.strptime(mail_datetime, ft)\n",
|
|
" return mail_datetime\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
" raise Exception(\"邮件时间格式解析错误\")\n",
|
|
"\n",
|
|
" @staticmethod\n",
|
|
" def row_to_dict(row, field_mapping):\n",
|
|
" \"\"\"将一行数据转换为格式化字典\"\"\"\n",
|
|
" result = {}\n",
|
|
" for col_name, widget_id in field_mapping.items():\n",
|
|
" if col_name in row:\n",
|
|
" value = row[col_name]\n",
|
|
" clean_value = None if pd.isna(value) else value\n",
|
|
" result[widget_id] = {\"value\": clean_value}\n",
|
|
" return result\n",
|
|
"\n",
|
|
" def update_email(self):\n",
|
|
" # try:\n",
|
|
" print(self.write_path)\n",
|
|
" email_df = pd.read_excel(fr\"C:\\Users\\Administrator.DESKTOP-7IC2USJ\\Desktop\\新建文件夹\\门店使用数据周报2025-07-11.xlsx\", sheet_name=\"Sheet0\")\n",
|
|
"\n",
|
|
" print(email_df.head())\n",
|
|
" email_df['公司ID'] = email_df['公司ID'].astype(str)\n",
|
|
" email_df['门店ID'] = email_df['门店ID'].astype(str)\n",
|
|
" email_df['指标归属日期'] = pd.to_datetime(email_df['指标归属日期'], format=\"%Y/%m/%d\").dt.strftime(\"%Y-%m-%d\")\n",
|
|
" email_df[\"门店创建时间\"] = pd.to_datetime(email_df['门店创建时间'], format=\"%Y-%m-%d %H:%M:%S\")\n",
|
|
" new_email_df = email_df.copy() # 拷贝传参\n",
|
|
" for index, row in email_df.iterrows():\n",
|
|
" email_df.loc[index, '指标归属日期'] = common_module.time_to_UTC(row['指标归属日期'])\n",
|
|
" email_df.loc[index, '门店创建时间'] = common_module.time_to_UTC(row['门店创建时间'])\n",
|
|
"\n",
|
|
" email_data = [self.row_to_dict(row, self.field_mapping) for index, row in email_df.iterrows()]\n",
|
|
" new_email_data = {'api_key': \"673457d6837e60a418e0e56b\",\n",
|
|
" 'entry_id': \"67d636bb6212b7619a7a4231\",\n",
|
|
" # 'entry_id': \"684157deab0c4c9ec636ed36\", # 测试\n",
|
|
" \"data_list\": email_data}\n",
|
|
" api_instance.entry_data_batch_create(new_email_data)\n",
|
|
" os.remove(self.write_path)\n",
|
|
" return new_email_df\n",
|
|
"\n",
|
|
" def up_to_BI(self, df):\n",
|
|
" # 连接信息\n",
|
|
" HS_DB_Config = Config.HS_DB_Config\n",
|
|
" table_name = \"thailand_store_data_email\"\n",
|
|
"\n",
|
|
" try:\n",
|
|
" # 连接\n",
|
|
" connection = pymysql.connect(\n",
|
|
" host=HS_DB_Config[\"host\"],\n",
|
|
" user=HS_DB_Config[\"user\"],\n",
|
|
" password=HS_DB_Config[\"password\"],\n",
|
|
" database=HS_DB_Config[\"database\"],\n",
|
|
" charset='utf8mb4',\n",
|
|
" )\n",
|
|
"\n",
|
|
" print(f\"成功连接 {HS_DB_Config[\"database\"]}\")\n",
|
|
"\n",
|
|
" with connection.cursor() as cursor:\n",
|
|
" # 处理数据\n",
|
|
" df = df.where(pd.notna(df), None) # 将NaN转换为None\n",
|
|
"\n",
|
|
" # 生成插入语句\n",
|
|
" columns = ', '.join(df.columns)\n",
|
|
" placeholders = ', '.join(['%s'] * len(df.columns))\n",
|
|
" insert_query = f\"INSERT INTO {table_name} ({columns}) VALUES ({placeholders})\"\n",
|
|
"\n",
|
|
" # 批量插入数据\n",
|
|
" records = [tuple(row) for row in df.values]\n",
|
|
" cursor.executemany(insert_query, records)\n",
|
|
" connection.commit()\n",
|
|
"\n",
|
|
" print(f\"成功导入 {cursor.rowcount} 条记录到 {table_name} 表\")\n",
|
|
"\n",
|
|
" except Error as e:\n",
|
|
" print(f\"数据库操作出错: {e}\")\n",
|
|
" if connection:\n",
|
|
" connection.rollback()\n",
|
|
" finally:\n",
|
|
" if connection:\n",
|
|
" connection.close()\n",
|
|
"\n",
|
|
" @classmethod\n",
|
|
" def main(cls):\n",
|
|
" \"\"\"邮件处理器的主入口点\"\"\"\n",
|
|
" task_start_time = datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
|
|
" processor = cls()\n",
|
|
" # processor.connect_email_by_pop3()\n",
|
|
"\n",
|
|
" email_df = processor.update_email()\n",
|
|
" processor.up_to_BI(email_df) # 发送到BI\n",
|
|
" common_module.send_task_status(task_start_time, \"海外邮件推送\")\n",
|
|
"\n",
|
|
"\n",
|
|
"if __name__ == \"__main__\":\n",
|
|
" EmailProcessor.main()\n"
|
|
],
|
|
"id": "6ebd484f9d123928",
|
|
"outputs": [],
|
|
"execution_count": null
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 2
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython2",
|
|
"version": "2.7.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|