189 lines
7.1 KiB
Python
189 lines
7.1 KiB
Python
import os
|
||
from datetime import datetime, timezone, timedelta
|
||
import pandas as pd
|
||
from tqdm import tqdm
|
||
import json
|
||
from yd_api import YDAPI
|
||
from api import API
|
||
import time
|
||
|
||
output_dir = "output"
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
|
||
api_instance = API()
|
||
yd_api_instance = YDAPI()
|
||
|
||
def generate_monthly_ranges(start: str, end: str):
|
||
"""
|
||
生成按自然月划分的时间段列表(左闭右开)
|
||
例如: [('2025-11-01T00:00:00Z', '2025-12-01T00:00:00Z'), ...]
|
||
"""
|
||
start_dt = datetime.fromisoformat(start.replace("Z", "+00:00"))
|
||
end_dt = datetime.fromisoformat(end.replace("Z", "+00:00"))
|
||
|
||
ranges = []
|
||
current = start_dt
|
||
|
||
while current < end_dt:
|
||
# 下一个月的第一天
|
||
if current.month == 12:
|
||
next_month = current.replace(year=current.year + 1, month=1, day=1)
|
||
else:
|
||
next_month = current.replace(month=current.month + 1, day=1)
|
||
# 不超过 end_dt
|
||
segment_end = min(next_month, end_dt)
|
||
ranges.append((
|
||
current.strftime("%Y-%m-%dT00:00:00Z"),
|
||
segment_end.strftime("%Y-%m-%dT00:00:00Z")
|
||
))
|
||
current = next_month
|
||
|
||
return ranges
|
||
|
||
class GetYDData:
|
||
|
||
def __init__(self):
|
||
self.FORMID = "FORM-PE866MD1MJMU0WGLYRFLYEN5YN9L1I55Z7ZUK22"
|
||
self.appType = "APP_UYZ0KG6L0CCNV80GZ66O"
|
||
self.systemToken = "XA966F81JAJOFCVVVKO64E9MIIZV1EWE5SFMKJ2"
|
||
|
||
# 第一段:2025-01-01 到 2025-11-01
|
||
first_segment = ("2025-01-01T00:00:00Z", "2025-11-01T00:00:00Z")
|
||
|
||
# 第二段:2025-11-01 到当前时间(按月拆分)
|
||
now_utc_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
||
monthly_segments = generate_monthly_ranges("2025-11-01T00:00:00Z", now_utc_str)
|
||
|
||
# 合并所有时间段
|
||
self.time_ranges = [first_segment] + monthly_segments
|
||
|
||
print("📅 计划拉取以下时间段:")
|
||
for i, (s, e) in enumerate(self.time_ranges, 1):
|
||
print(f" {i}. {s} → {e}")
|
||
|
||
def build_value_to_label_map(self, form_structure):
|
||
value_to_label_map = {}
|
||
fields = form_structure.get("result", [])
|
||
for field in fields:
|
||
field_id = field.get("fieldId")
|
||
component = field.get("componentName")
|
||
props = field.get("props", {})
|
||
data_source = props.get("dataSource", [])
|
||
|
||
if component in ["SelectField", "RadioField"] and data_source:
|
||
option_map = {}
|
||
for opt in data_source:
|
||
val = opt.get("value")
|
||
if val is None:
|
||
continue
|
||
text_obj = opt.get("text", {})
|
||
if isinstance(text_obj, dict):
|
||
zh_text = text_obj.get("zh_CN")
|
||
if zh_text is None and "value" in text_obj:
|
||
raw = text_obj["value"]
|
||
if isinstance(raw, str) and raw.startswith('"') and raw.endswith('"'):
|
||
zh_text = raw[1:-1]
|
||
else:
|
||
zh_text = str(val)
|
||
elif zh_text is None:
|
||
zh_text = str(val)
|
||
else:
|
||
zh_text = str(text_obj)
|
||
option_map[str(val)] = zh_text
|
||
if option_map:
|
||
value_to_label_map[field_id] = option_map
|
||
return value_to_label_map
|
||
|
||
def convert_record_values(self, record, value_map):
|
||
converted = {}
|
||
for key, val in record.items():
|
||
if key in value_map and val is not None:
|
||
str_val = str(val)
|
||
converted[key] = value_map[key].get(str_val, val)
|
||
else:
|
||
converted[key] = val
|
||
return converted
|
||
|
||
def fetch_records_in_range(self, token, start_time, end_time):
|
||
"""拉取指定时间范围内的所有记录"""
|
||
try:
|
||
first_page = yd_api_instance.read_processes_instances(
|
||
token=token,
|
||
formUuid=self.FORMID,
|
||
page=1,
|
||
n=100,
|
||
appType=self.appType,
|
||
systemToken=self.systemToken,
|
||
instanceStatus="RUNNING",
|
||
modifiedFromTimeGMT=start_time,
|
||
modifiedToTimeGMT=end_time,
|
||
)
|
||
except Exception as e:
|
||
print(f"❌ 首页请求失败 ({start_time} ~ {end_time}): {e}")
|
||
return []
|
||
|
||
total_count = first_page.get("totalCount", 0)
|
||
total_pages = (total_count // 100) + (1 if total_count % 100 else 0)
|
||
print(f"📊 [{start_time[:10]} ~ {end_time[:10]}] 总记录数: {total_count}, 共 {total_pages} 页")
|
||
|
||
all_records = []
|
||
if total_count > 0:
|
||
all_records.extend(first_page.get("data", []))
|
||
for page in tqdm(range(2, total_pages + 1), desc=f"{start_time[:7]}"):
|
||
try:
|
||
resp = yd_api_instance.read_processes_instances(
|
||
token=token,
|
||
formUuid=self.FORMID,
|
||
page=page,
|
||
n=100,
|
||
appType=self.appType,
|
||
systemToken=self.systemToken,
|
||
instanceStatus="RUNNING",
|
||
modifiedFromTimeGMT=start_time,
|
||
modifiedToTimeGMT=end_time,
|
||
)
|
||
page_data = resp.get("data", [])
|
||
all_records.extend(page_data)
|
||
time.sleep(0.15) # 稍微增加间隔,更安全
|
||
except Exception as e:
|
||
print(f"⚠️ 第 {page} 页失败 ({start_time[:10]}): {e}")
|
||
continue
|
||
return all_records
|
||
|
||
def main(self):
|
||
# Step 1: 获取表单结构
|
||
token = yd_api_instance.generateToken()
|
||
form_struct = yd_api_instance.get_form_structures(
|
||
token=token,
|
||
formUuid=self.FORMID
|
||
)
|
||
value_map = self.build_value_to_label_map(form_struct)
|
||
print("\n✅ 表单选项映射构建完成")
|
||
|
||
# Step 2: 按时间段拉取
|
||
all_records = []
|
||
for start_time, end_time in self.time_ranges:
|
||
print(f"\n⏳ 拉取: {start_time} → {end_time}")
|
||
records = self.fetch_records_in_range(token, start_time, end_time)
|
||
all_records.extend(records)
|
||
|
||
print(f"\n📥 总共获取 {len(all_records)} 条流程实例")
|
||
|
||
# Step 3: 转换 formData
|
||
converted_records = []
|
||
for inst in all_records:
|
||
form_data = inst.get("formData", {})
|
||
converted = self.convert_record_values(form_data, value_map)
|
||
converted_records.append(converted)
|
||
|
||
# Step 4: 保存
|
||
if converted_records:
|
||
df = pd.DataFrame(converted_records)
|
||
output_path = os.path.join(output_dir, "converted_yd_data.csv")
|
||
df.to_csv(output_path, index=False, encoding="utf_8_sig")
|
||
print(f"\n✅ 成功保存 {len(converted_records)} 条记录至: {output_path}")
|
||
else:
|
||
print("\n❌ 无有效数据")
|
||
|
||
if __name__ == "__main__":
|
||
GetYDData().main() |