NGV换源

This commit is contained in:
2026-01-14 15:13:44 +08:00
parent 1ef81def0f
commit 25795f4a2d
9 changed files with 1964 additions and 66 deletions
+79 -34
View File
@@ -81,50 +81,77 @@ class CommonModule:
def get_ngv_details(self, days_back=1):
"""
从固定的数据库中获取前几天的NGV明细。
参数 `days_back` 表示相对于今天的天数偏移量,默认为1前一天)
返回包含NGV明细的pandas DataFrame
重构后适配MySQL的NGV明细获取方法(仅处理saas_create_time字段,全字段保留文本类型)
参数 `days_back`相对于今天的天数偏移量,默认为1(前一天)
返回pandas DataFrame(所有字段为文本类型,仅saas_create_time做日期格式化),失败返回None
"""
conn = None
cursor = None
try:
# 获得连接
conn = psycopg2.connect(**self.conn)
# 1. 建立MySQL连接(仅适配MySQL,参数与原逻辑对齐)
conn = pymysql.connect(
host=Config.BI_CONN_host,
database=Config.BI_CONN_INFO_database,
user=Config.BI_CONN_INFO_user,
password=Config.BI_CONN_INFO_password,
charset='utf8mb4', # MySQL中文兼容
cursorclass=pymysql.cursors.DictCursor # 保持字典游标,字段名映射一致
)
cursor = conn.cursor()
# 获取指定天数前的日期
# 2. 日期计算逻辑(完全复用原始逻辑)
now_time = datetime.now()
target_time = now_time + timedelta(days=-days_back)
target_date_id = int(target_time.strftime('%Y%m%d')) # 获取目标日期
target_time = now_time - timedelta(days=days_back)
target_date_id = int(target_time.strftime('%Y%m%d'))
# sql语句查询
sql = f"""
SELECT * FROM "public"."holo_ads_report_saas_profile_ngv_detail_d" WHERE "date_id" = '{target_date_id}' ;
"""
# 执行语句并获取结果集
cursor.execute(sql)
# 3. MySQL兼容的SQL(仅替换语法,逻辑不变)
sql = """
SELECT *
FROM `jdy_ngv_data_source`
WHERE `date_id` = %s;
"""
cursor.execute(sql, (target_date_id,))
rows = cursor.fetchall()
all_fields = cursor.description
# 执行结果转化为dataframe
col = [i[0] for i in all_fields]
data_NGV = pd.DataFrame(rows, columns=col)
# 4. 数据转换:强制全字段为文本类型(匹配原始数据源特性)
if rows:
# 核心:所有字段转字符串,空值统一为'',避免后续处理异常
data_NGV = pd.DataFrame(rows).astype(str).replace({'nan': '', 'NaT': ''})
else:
data_NGV = pd.DataFrame()
# 尝试自动解析日期时间字符串
# 5. 仅处理saas_create_time字段(完全复用原始转换逻辑)
time_format = "%Y-%m-%d %H:%M:%S"
if 'saas_create_time' in data_NGV.columns:
data_NGV['saas_create_time'] = pd.to_datetime(data_NGV['saas_create_time'], format=time_format,
errors='coerce')
data_NGV['saas_create_time'] = data_NGV['saas_create_time'].dt.strftime('%Y-%m-%d')
# 步骤1:解析为datetime(消除格式警告)
temp_dt = pd.to_datetime(
data_NGV['saas_create_time'],
format=time_format, # 指定格式,消除UserWarning
errors='coerce' # 解析失败设为NaT
)
# 步骤2:转换为YYYY-MM-DD格式的字符串,覆盖原始列(与原逻辑一致)
data_NGV['saas_create_time'] = temp_dt.dt.strftime('%Y-%m-%d').fillna('')
# 关闭游标和连接
cursor.close()
conn.close()
# 6. 其他时间字段完全保留原始文本格式(不做任何处理)
# date_fmt/expiry_time等字段仅保留从数据库读取的原始字符串)
return data_NGV
except Exception as e:
error_task_logger.error(f"获取NGV明细失败: {e}")
error_task_logger.error(f"获取NGV明细失败(MySQL适配): {str(e)}", exc_info=True)
return None
finally:
# 确保MySQL连接/游标关闭(资源释放)
if cursor:
try:
cursor.close()
except Exception as e:
error_task_logger.warning(f"关闭MySQL游标失败: {str(e)}")
if conn:
try:
conn.close()
except Exception as e:
error_task_logger.warning(f"关闭MySQL连接失败: {str(e)}")
def get_yichang_details(self, days_back=1):
"""
@@ -180,7 +207,14 @@ class CommonModule:
"""
try:
# 获得连接
conn = psycopg2.connect(**self.conn)
conn = pymysql.connect(
host=Config.BI_CONN_host,
database=Config.BI_CONN_INFO_database,
user=Config.BI_CONN_INFO_user,
password=Config.BI_CONN_INFO_password,
charset='utf8mb4', # MySQL中文兼容
cursorclass=pymysql.cursors.DictCursor # 保持字典游标,字段名映射一致
)
cursor = conn.cursor()
# 获取指定天数前的日期
@@ -195,15 +229,26 @@ class CommonModule:
print("距离今天还有{}天的日期是:{}".format(days_to_add, future_date))
sql = f"""SELECT * FROM "public"."holo_ads_report_saas_profile_ngv_detail_d" WHERE "date_id" = '{yes_time_nyr}' and "expiry_time" like '%{future_date}%';"""
sql = """
SELECT *
FROM `jdy_ngv_data_source`
WHERE `date_id` = %s \
AND `expiry_time` LIKE %s; \
"""
# 执行语句并获取结果集
cursor.execute(sql)
like_pattern = f"%{future_date}%"
cursor.execute(sql, (yes_time_nyr, like_pattern))
rows = cursor.fetchall()
all_fields = cursor.description # 获取所有字段名
# 执行结果转化为dataframe
col = [i[0] for i in all_fields]
data_NGV = pd.DataFrame(list(rows), columns=col)
if rows:
# data_NGV = pd.DataFrame(rows).astype(str).replace({'nan': '', 'NaT': ''})
all_fields = cursor.description # 获取所有字段名
# 执行结果转化为dataframe
col = [i[0] for i in all_fields]
data_NGV = pd.DataFrame(list(rows), columns=col).astype(str).replace({'nan': '', 'NaT': ''})
else:
data_NGV = pd.DataFrame()
# 关闭数据库连接
cursor.close()