saas1.6
This commit is contained in:
@@ -0,0 +1,400 @@
|
||||
"""字段监控"""
|
||||
import sys
|
||||
from datetime import datetime, timedelta, timezone
|
||||
import pandas as pd
|
||||
import zipfile
|
||||
import logging
|
||||
from pathlib import Path
|
||||
import json
|
||||
import requests
|
||||
from api import API
|
||||
import time
|
||||
import os
|
||||
from log_config import configure_task_logger, configure_error_task_logger
|
||||
from back_ground_module import CommonModule
|
||||
|
||||
|
||||
# 获取已经配置好的常规日志记录器
|
||||
logger = configure_task_logger()
|
||||
|
||||
# 获取已经配置好的错误任务日志记录器
|
||||
error_task_logger = configure_error_task_logger()
|
||||
common_tools = CommonModule()
|
||||
|
||||
# ---------------------------- 配置项 ----------------------------
|
||||
class Config:
|
||||
OUTPUT_DIR = "output"
|
||||
DATA_DIR = "数据快照存储"
|
||||
ARCHIVE_DIR = "压缩包存储"
|
||||
RETAIN_DAYS = 7
|
||||
COMPRESS_FORMAT = "zip"
|
||||
LOG_FILE = os.path.join(OUTPUT_DIR+"data_monitor.log")
|
||||
CHANGES_FILE = os.path.join(OUTPUT_DIR+"changes_summary.csv")
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 0.5
|
||||
|
||||
|
||||
# ---------------------- 日志配置 -----------------------
|
||||
# class Logger:
|
||||
# @staticmethod
|
||||
# def setup():
|
||||
# logging.basicConfig(
|
||||
# level=logging.INFO,
|
||||
# format='%(asctime)s - %(levelname)s - %(message)s',
|
||||
# handlers=[
|
||||
# logging.StreamHandler(),
|
||||
# logging.FileHandler(Config.LOG_FILE)
|
||||
# ]
|
||||
# )
|
||||
# return logging.getLogger(__name__)
|
||||
#
|
||||
#
|
||||
# logger = Logger.setup()
|
||||
|
||||
|
||||
# ---------------------- 工具函数 -----------------------
|
||||
class Utils:
|
||||
@staticmethod
|
||||
def get_path(*path_parts):
|
||||
return str(Path(*path_parts))
|
||||
|
||||
@staticmethod
|
||||
def ensure_dir(path):
|
||||
Path(path).mkdir(parents=True, exist_ok=True)
|
||||
logger.debug(f"确保目录存在: {path}")
|
||||
|
||||
@staticmethod
|
||||
def get_iso_time():
|
||||
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
||||
|
||||
@staticmethod
|
||||
def is_first_run_today():
|
||||
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
snapshot_file = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR, f"snapshot_{today}.csv")
|
||||
widget_file = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR, f"all_widgets_{today}.csv")
|
||||
return not (os.path.exists(snapshot_file) and os.path.exists(widget_file))
|
||||
|
||||
|
||||
# ---------------------- API 客户端 -----------------------
|
||||
class APIClient:
|
||||
def __init__(self):
|
||||
self.headers = {
|
||||
'Authorization': 'Bearer qygHulymo1fekJk4CIZyNKjyQAzG8CFN',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
self.api = API()
|
||||
|
||||
def request(self, url, payload, method='POST'):
|
||||
for retry in range(Config.MAX_RETRIES + 1):
|
||||
try:
|
||||
response = requests.request(
|
||||
method, url, headers=self.headers,
|
||||
data=payload, timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response
|
||||
except requests.exceptions.RequestException as e:
|
||||
if retry == Config.MAX_RETRIES:
|
||||
raise
|
||||
time.sleep(Config.RETRY_DELAY)
|
||||
logger.warning(f"请求失败 (尝试 {retry + 1}/{Config.MAX_RETRIES}): {str(e)}")
|
||||
|
||||
|
||||
# ---------------------- 数据处理类 -----------------------
|
||||
class DataHandler:
|
||||
def __init__(self):
|
||||
self.execution_time = Utils.get_iso_time()
|
||||
self.today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
self.setup_dirs()
|
||||
self.api = APIClient()
|
||||
|
||||
def setup_dirs(self):
|
||||
self.data_dir = Utils.get_path(Config.OUTPUT_DIR, Config.DATA_DIR)
|
||||
self.archive_dir = Utils.get_path(Config.OUTPUT_DIR, Config.ARCHIVE_DIR)
|
||||
Utils.ensure_dir(self.data_dir)
|
||||
Utils.ensure_dir(self.archive_dir)
|
||||
self.last_data_file = Utils.get_path(self.data_dir, "last_data.csv")
|
||||
self.last_widget_file = Utils.get_path(self.data_dir, "last_widget_data.csv")
|
||||
|
||||
def load_last_data(self):
|
||||
try:
|
||||
last_data = pd.read_csv(self.last_data_file) if os.path.exists(self.last_data_file) else None
|
||||
last_widget = pd.read_csv(self.last_widget_file) if os.path.exists(self.last_widget_file) else None
|
||||
return last_data, last_widget
|
||||
except Exception as e:
|
||||
error_task_logger.error(f"加载上次数据失败: {str(e)}")
|
||||
return None, None
|
||||
|
||||
def save_last_data(self, data, widget_data):
|
||||
try:
|
||||
data.to_csv(self.last_data_file, index=False)
|
||||
widget_data.to_csv(self.last_widget_file, index=False)
|
||||
return True
|
||||
except Exception as e:
|
||||
error_task_logger.error(f"保存当前数据失败: {str(e)}")
|
||||
return False
|
||||
|
||||
def save_to_csv(self, data, filename):
|
||||
try:
|
||||
temp_file = filename + '.tmp'
|
||||
data.to_csv(temp_file, index=False)
|
||||
if os.path.exists(filename):
|
||||
os.remove(filename)
|
||||
os.rename(temp_file, filename)
|
||||
return True
|
||||
except Exception as e:
|
||||
error_task_logger.error(f"保存文件失败: {filename}, 错误: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------- 数据监控主类 -----------------------
|
||||
class DataMonitor(DataHandler):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.last_data, self.last_widget = self.load_last_data()
|
||||
|
||||
def fetch_apps(self):
|
||||
url = "https://api.jiandaoyun.com/api/v5/app/list"
|
||||
payload = json.dumps({"skip": 0, "limit": 100})
|
||||
response = self.api.request(url, payload)
|
||||
return pd.DataFrame(response.json().get("apps", []))
|
||||
|
||||
def fetch_entries(self, app_df):
|
||||
url = "https://api.jiandaoyun.com/api/v5/app/entry/list"
|
||||
all_entries = []
|
||||
|
||||
for _, app in app_df.iterrows():
|
||||
payload = json.dumps({"app_id": app['app_id']})
|
||||
response = self.api.request(url, payload)
|
||||
entries = response.json().get("forms", [])
|
||||
|
||||
if entries:
|
||||
entry_df = pd.DataFrame(entries)
|
||||
entry_df['app_id'] = app['app_id']
|
||||
all_entries.append(entry_df)
|
||||
|
||||
return pd.concat(all_entries, ignore_index=True) if all_entries else None
|
||||
|
||||
def fetch_widgets(self, entry_df):
|
||||
url = "https://api.jiandaoyun.com/api/v5/app/entry/widget/list"
|
||||
all_widgets = []
|
||||
|
||||
for _, entry in entry_df.iterrows():
|
||||
payload = json.dumps({
|
||||
"app_id": entry['app_id'],
|
||||
"entry_id": entry['entry_id']
|
||||
})
|
||||
response = self.api.request(url, payload)
|
||||
widgets = response.json().get('widgets', [])
|
||||
|
||||
if widgets:
|
||||
widget_df = pd.DataFrame(widgets)
|
||||
widget_df['app_id'] = entry['app_id']
|
||||
widget_df['entry_id'] = entry['entry_id']
|
||||
all_widgets.append(widget_df)
|
||||
|
||||
return pd.concat(all_widgets, ignore_index=True) if all_widgets else None
|
||||
|
||||
def fetch_monitor_data(self):
|
||||
payload = {"api_key": "6694d3c4fcb69ca9a111a6c4", "entry_id": "6850c044f17c934b3ec01fea"}
|
||||
data = self.api.api.entry_data_list(payload).get("data")
|
||||
data_list = pd.DataFrame(data)
|
||||
|
||||
for col in data_list.columns:
|
||||
if data_list[col].apply(lambda x: isinstance(x, (dict, list))).any():
|
||||
data_list[col] = data_list[col].astype(str)
|
||||
|
||||
return data_list.drop_duplicates()
|
||||
|
||||
def match_widgets(self, data_list, widget_list):
|
||||
if '_widget_1750122565203' not in data_list.columns:
|
||||
raise ValueError("数据列表中缺少 '_widget_1750122565203' 列")
|
||||
return widget_list[widget_list['entry_id'].isin(data_list['_widget_1750122565203'])]
|
||||
|
||||
def archive_old_data(self):
|
||||
keep_dates = [
|
||||
(datetime.now(timezone.utc) - timedelta(days=i)).strftime("%Y-%m-%d")
|
||||
for i in range(Config.RETAIN_DAYS)
|
||||
]
|
||||
|
||||
files_to_archive = [
|
||||
f for f in os.listdir(self.data_dir)
|
||||
if (f.startswith("snapshot_") or f.startswith("all_widgets_")) and f.endswith(".csv")
|
||||
]
|
||||
|
||||
for filename in files_to_archive:
|
||||
date_str = filename[9:-4] if filename.startswith("snapshot_") else filename[12:-4]
|
||||
|
||||
if date_str not in keep_dates:
|
||||
year_month = date_str[:7]
|
||||
archive_name = Utils.get_path(self.archive_dir, f"snapshots_{year_month}.{Config.COMPRESS_FORMAT}")
|
||||
file_path = Utils.get_path(self.data_dir, filename)
|
||||
|
||||
with zipfile.ZipFile(archive_name, 'a', zipfile.ZIP_DEFLATED) as zipf:
|
||||
zipf.write(file_path, arcname=filename)
|
||||
|
||||
os.remove(file_path)
|
||||
logger.debug(f"已归档 {filename} 到 {archive_name}")
|
||||
|
||||
def compare_data(self, current_data):
|
||||
if not os.path.exists(self.last_data_file):
|
||||
return None
|
||||
|
||||
last_data = pd.read_csv(self.last_data_file)
|
||||
last_data['unique_id'] = last_data['name'].astype(str) + last_data['app_id'].astype(str)
|
||||
current_data['unique_id'] = current_data['name'].astype(str) + current_data['app_id'].astype(str)
|
||||
|
||||
merged = pd.merge(
|
||||
last_data, current_data,
|
||||
on=['unique_id'],
|
||||
how='outer',
|
||||
suffixes=('_last', '_current'),
|
||||
indicator=True
|
||||
)
|
||||
|
||||
changes = {
|
||||
'added': merged[merged['_merge'] == 'right_only'],
|
||||
'deleted': merged[merged['_merge'] == 'left_only'],
|
||||
'modified': pd.DataFrame()
|
||||
}
|
||||
|
||||
for col in ['label', 'type']:
|
||||
last_col = f"{col}_last"
|
||||
current_col = f"{col}_current"
|
||||
|
||||
if last_col in merged.columns and current_col in merged.columns:
|
||||
mask = (merged['_merge'] == 'both') & (merged[last_col] != merged[current_col])
|
||||
mask = mask & ~merged[last_col].isna() & ~merged[current_col].isna()
|
||||
|
||||
if mask.any():
|
||||
modified = merged.loc[mask].copy()
|
||||
modified['changed_field'] = col
|
||||
modified['old_value'] = modified[last_col]
|
||||
modified['new_value'] = modified[current_col]
|
||||
modified['change_status'] = 'update'
|
||||
changes['modified'] = pd.concat([changes['modified'], modified])
|
||||
|
||||
return changes
|
||||
|
||||
def save_changes(self, changes, apps, entries):
|
||||
result_rows = []
|
||||
|
||||
for change_type in ['added', 'deleted', 'modified']:
|
||||
suffix = 'current' if change_type in ['added', 'modified'] else 'last'
|
||||
|
||||
for _, row in changes[change_type].iterrows():
|
||||
app_id = row[f'app_id_{suffix}']
|
||||
entry_id = row[f'entry_id_{suffix}']
|
||||
|
||||
app_name = apps.loc[apps['app_id'] == app_id, 'name'].values[0] if not apps[
|
||||
apps['app_id'] == app_id].empty else '未知应用'
|
||||
entry_name = \
|
||||
entries.loc[(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id), 'name'].values[0] if not \
|
||||
entries[(entries['app_id'] == app_id) & (entries['entry_id'] == entry_id)].empty else '未知表单'
|
||||
|
||||
if change_type == 'added':
|
||||
content = f"新增字段: {row['label_current']}"
|
||||
elif change_type == 'deleted':
|
||||
content = f"删除字段: {row['label_last']}"
|
||||
else:
|
||||
content = f"由\"{row['old_value']}\"修改为\"{row['new_value']}\""
|
||||
|
||||
result_rows.append({
|
||||
'程序执行时间': self.execution_time,
|
||||
'unique_id': row['unique_id'],
|
||||
'app_id': app_id,
|
||||
'app_name': app_name,
|
||||
'entry_id': entry_id,
|
||||
'entry_name': entry_name,
|
||||
'change_type': {'added': '新增', 'deleted': '删除', 'modified': '修改'}[change_type],
|
||||
'具体内容': content
|
||||
})
|
||||
|
||||
if result_rows:
|
||||
result_df = pd.DataFrame(result_rows)
|
||||
changes_file = Utils.get_path(self.data_dir, Config.CHANGES_FILE)
|
||||
result_df.to_csv(changes_file, mode='a', header=not os.path.exists(changes_file), index=False)
|
||||
self.add_to_jiandaoyun(result_df)
|
||||
return True
|
||||
return False
|
||||
|
||||
def add_to_jiandaoyun(self, result_df):
|
||||
all_data = [{
|
||||
"_widget_1751446961315": {"value": row["app_name"]},
|
||||
"_widget_1751446961316": {"value": row["entry_name"]},
|
||||
"_widget_1751446961317": {"value": row["change_type"]},
|
||||
"_widget_1751446961318": {"value": row["具体内容"]},
|
||||
"_widget_1751446961319": {"value": row["程序执行时间"]},
|
||||
} for _, row in result_df.iterrows()]
|
||||
|
||||
payload = {
|
||||
"api_key": "6694d3c4fcb69ca9a111a6c4",
|
||||
"entry_id": "6863a402a77925690a470cc5",
|
||||
"data_list": all_data
|
||||
}
|
||||
|
||||
response = self.api.api.entry_data_batch_create(payload)
|
||||
|
||||
if isinstance(response, list):
|
||||
logger.info(f"成功写入 {len(response)} 条变更数据到简道云")
|
||||
return True
|
||||
else:
|
||||
error_task_logger.error(f"写入简道云失败: {response.get('message', '未知错误')}")
|
||||
return False
|
||||
|
||||
def run_daily_snapshot(self):
|
||||
logger.info("=== 开始每日数据快照任务 ===")
|
||||
|
||||
apps = self.fetch_apps()
|
||||
entries = self.fetch_entries(apps)
|
||||
widgets = self.fetch_widgets(entries)
|
||||
monitor_data = self.fetch_monitor_data()
|
||||
matched_data = self.match_widgets(monitor_data, widgets)
|
||||
|
||||
self.save_to_csv(widgets, Utils.get_path(self.data_dir, f"all_widgets_{self.today}.csv"))
|
||||
self.save_to_csv(matched_data, Utils.get_path(self.data_dir, f"snapshot_{self.today}.csv"))
|
||||
self.archive_old_data()
|
||||
self.save_last_data(matched_data, widgets)
|
||||
|
||||
logger.info("=== 每日数据快照任务成功完成 ===")
|
||||
return True
|
||||
|
||||
def run_hourly_check(self):
|
||||
logger.info("=== 开始每小时数据检查任务 ===")
|
||||
|
||||
apps = self.fetch_apps()
|
||||
entries = self.fetch_entries(apps)
|
||||
widgets = self.fetch_widgets(entries)
|
||||
monitor_data = self.fetch_monitor_data()
|
||||
current_data = self.match_widgets(monitor_data, widgets)
|
||||
|
||||
changes = self.compare_data(current_data)
|
||||
if changes and any(len(v) > 0 for v in changes.values()):
|
||||
self.save_changes(changes, apps, entries)
|
||||
|
||||
self.save_last_data(current_data, widgets)
|
||||
|
||||
logger.info("=== 每小时数据检查任务成功完成 ===")
|
||||
return True
|
||||
|
||||
def main(self):
|
||||
import datetime
|
||||
task_start_time =datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
logger.info(f"=== 开始数据监控任务 ({self.execution_time}) ===")
|
||||
|
||||
if Utils.is_first_run_today():
|
||||
success = self.run_daily_snapshot()
|
||||
else:
|
||||
success = self.run_hourly_check()
|
||||
|
||||
common_tools.send_task_status(task_start_time, "字段监控")
|
||||
|
||||
logger.info("=== 数据监控任务完成 ===")
|
||||
return success
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
Utils.ensure_dir(Config.OUTPUT_DIR)
|
||||
monitor = DataMonitor()
|
||||
if not monitor.main():
|
||||
sys.exit(1)
|
||||
Reference in New Issue
Block a user