Files
saas/test/宜搭获取续约待办数据.py
T

189 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
from datetime import datetime, timezone, timedelta
import pandas as pd
from tqdm import tqdm
import json
from yd_api import YDAPI
from api import API
import time
output_dir = "output"
os.makedirs(output_dir, exist_ok=True)
api_instance = API()
yd_api_instance = YDAPI()
def generate_monthly_ranges(start: str, end: str):
"""
生成按自然月划分的时间段列表(左闭右开)
例如: [('2025-11-01T00:00:00Z', '2025-12-01T00:00:00Z'), ...]
"""
start_dt = datetime.fromisoformat(start.replace("Z", "+00:00"))
end_dt = datetime.fromisoformat(end.replace("Z", "+00:00"))
ranges = []
current = start_dt
while current < end_dt:
# 下一个月的第一天
if current.month == 12:
next_month = current.replace(year=current.year + 1, month=1, day=1)
else:
next_month = current.replace(month=current.month + 1, day=1)
# 不超过 end_dt
segment_end = min(next_month, end_dt)
ranges.append((
current.strftime("%Y-%m-%dT00:00:00Z"),
segment_end.strftime("%Y-%m-%dT00:00:00Z")
))
current = next_month
return ranges
class GetYDData:
def __init__(self):
self.FORMID = "FORM-PE866MD1MJMU0WGLYRFLYEN5YN9L1I55Z7ZUK22"
self.appType = "APP_UYZ0KG6L0CCNV80GZ66O"
self.systemToken = "XA966F81JAJOFCVVVKO64E9MIIZV1EWE5SFMKJ2"
# 第一段:2025-01-01 到 2025-11-01
first_segment = ("2025-01-01T00:00:00Z", "2025-11-01T00:00:00Z")
# 第二段:2025-11-01 到当前时间(按月拆分)
now_utc_str = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
monthly_segments = generate_monthly_ranges("2025-11-01T00:00:00Z", now_utc_str)
# 合并所有时间段
self.time_ranges = [first_segment] + monthly_segments
print("📅 计划拉取以下时间段:")
for i, (s, e) in enumerate(self.time_ranges, 1):
print(f" {i}. {s}{e}")
def build_value_to_label_map(self, form_structure):
value_to_label_map = {}
fields = form_structure.get("result", [])
for field in fields:
field_id = field.get("fieldId")
component = field.get("componentName")
props = field.get("props", {})
data_source = props.get("dataSource", [])
if component in ["SelectField", "RadioField"] and data_source:
option_map = {}
for opt in data_source:
val = opt.get("value")
if val is None:
continue
text_obj = opt.get("text", {})
if isinstance(text_obj, dict):
zh_text = text_obj.get("zh_CN")
if zh_text is None and "value" in text_obj:
raw = text_obj["value"]
if isinstance(raw, str) and raw.startswith('"') and raw.endswith('"'):
zh_text = raw[1:-1]
else:
zh_text = str(val)
elif zh_text is None:
zh_text = str(val)
else:
zh_text = str(text_obj)
option_map[str(val)] = zh_text
if option_map:
value_to_label_map[field_id] = option_map
return value_to_label_map
def convert_record_values(self, record, value_map):
converted = {}
for key, val in record.items():
if key in value_map and val is not None:
str_val = str(val)
converted[key] = value_map[key].get(str_val, val)
else:
converted[key] = val
return converted
def fetch_records_in_range(self, token, start_time, end_time):
"""拉取指定时间范围内的所有记录"""
try:
first_page = yd_api_instance.read_processes_instances(
token=token,
formUuid=self.FORMID,
page=1,
n=100,
appType=self.appType,
systemToken=self.systemToken,
instanceStatus="RUNNING",
modifiedFromTimeGMT=start_time,
modifiedToTimeGMT=end_time,
)
except Exception as e:
print(f"❌ 首页请求失败 ({start_time} {end_time}): {e}")
return []
total_count = first_page.get("totalCount", 0)
total_pages = (total_count // 100) + (1 if total_count % 100 else 0)
print(f"📊 [{start_time[:10]} {end_time[:10]}] 总记录数: {total_count}, 共 {total_pages}")
all_records = []
if total_count > 0:
all_records.extend(first_page.get("data", []))
for page in tqdm(range(2, total_pages + 1), desc=f"{start_time[:7]}"):
try:
resp = yd_api_instance.read_processes_instances(
token=token,
formUuid=self.FORMID,
page=page,
n=100,
appType=self.appType,
systemToken=self.systemToken,
instanceStatus="RUNNING",
modifiedFromTimeGMT=start_time,
modifiedToTimeGMT=end_time,
)
page_data = resp.get("data", [])
all_records.extend(page_data)
time.sleep(0.15) # 稍微增加间隔,更安全
except Exception as e:
print(f"⚠️ 第 {page} 页失败 ({start_time[:10]}): {e}")
continue
return all_records
def main(self):
# Step 1: 获取表单结构
token = yd_api_instance.generateToken()
form_struct = yd_api_instance.get_form_structures(
token=token,
formUuid=self.FORMID
)
value_map = self.build_value_to_label_map(form_struct)
print("\n✅ 表单选项映射构建完成")
# Step 2: 按时间段拉取
all_records = []
for start_time, end_time in self.time_ranges:
print(f"\n⏳ 拉取: {start_time}{end_time}")
records = self.fetch_records_in_range(token, start_time, end_time)
all_records.extend(records)
print(f"\n📥 总共获取 {len(all_records)} 条流程实例")
# Step 3: 转换 formData
converted_records = []
for inst in all_records:
form_data = inst.get("formData", {})
converted = self.convert_record_values(form_data, value_map)
converted_records.append(converted)
# Step 4: 保存
if converted_records:
df = pd.DataFrame(converted_records)
output_path = os.path.join(output_dir, "converted_yd_data.csv")
df.to_csv(output_path, index=False, encoding="utf_8_sig")
print(f"\n✅ 成功保存 {len(converted_records)} 条记录至: {output_path}")
else:
print("\n❌ 无有效数据")
if __name__ == "__main__":
GetYDData().main()