Files
2026-01-30 11:28:35 +08:00

74 lines
2.5 KiB
Python

import pandas as pd
import os
import requests
from urllib.parse import urlparse
def download_image(img_url, folder_path, save_filename=None):
"""下载图片并保存到指定文件夹"""
if pd.isna(img_url): # 检查是否为 NaN
print(f"Invalid or missing URL: {img_url}")
return None
try:
if not isinstance(img_url, str) or not img_url.startswith(('http://', 'https://')):
print(f"Invalid URL format: {img_url}")
return None
response = requests.get(img_url, timeout=10)
if response.status_code == 200:
# 提取原始文件名
parsed_url = urlparse(img_url)
file_ext = os.path.splitext(parsed_url.path)[1]
if not file_ext:
file_ext = '.jpg'
# 使用传入的文件名,或从 URL 提取
filename = save_filename
if not filename:
filename = os.path.basename(parsed_url.path) or f"image{file_ext}"
filename = os.path.splitext(filename)[0] # 去掉扩展名避免重复
filename = f"{filename}{file_ext}"
# 确保目录存在
os.makedirs(folder_path, exist_ok=True)
save_path = os.path.join(folder_path, filename)
with open(save_path, 'wb') as handler:
handler.write(response.content)
print(f"Downloaded: {save_path}")
return save_path
else:
print(f"Failed to download image from {img_url}, status code: {response.status_code}")
return None
except Exception as e:
print(f"Error downloading image from {img_url}: {str(e)}")
return None
# ✅ 正确的数据结构:每个字典包含完整信息
demo_list = [
{"门店id": "2345", "图片地址": "https://pf.f6yc.com/macan/prod/customerCar/2cfe619f259c4a00a8099676554e2e36.jpeg"}
]
df = pd.DataFrame(demo_list)
# ✅ 使用原始字符串避免转义问题
base_folder = r"D:\Idea Project\F6+宜搭+其它(1)\张阳脚本\文件输出\图片"
for index, row in df.iterrows():
shop_id = row["门店id"]
img_url = row["图片地址"]
if pd.isna(shop_id):
print("Missing shop ID, skipping...")
continue
# 创建每个门店的子文件夹或直接命名文件
folder_path = os.path.join(base_folder, str(shop_id))
# 或者:folder_path = base_folder # 所有图放一起
# 保存为:2345.jpg 或 2345_原文件名.jpg
save_filename = f"{shop_id}.jpg"
download_image(img_url, base_folder, save_filename=save_filename)