Files
F6--/张阳脚本/竞品系统数据导出/爱车店新版.py
T
2026-01-30 11:28:35 +08:00

131 lines
4.5 KiB
Python

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
import os
import pickle
import json
from datetime import datetime, timedelta
# ==================== 配置 ====================
COOKIE_FILE = "sina_cookies.pkl" # 保存 Cookies 的文件名
LOGIN_URL = "https://tousu.sina.com.cn/"
DOMAIN = "tousu.sina.com.cn"
MAX_COOKIE_AGE_DAYS = 7 # Cookies 最大有效期(天)
def is_cookie_valid():
"""检查 Cookies 是否存在且未过期"""
if not os.path.exists(COOKIE_FILE):
return False
with open(COOKIE_FILE, "rb") as f:
cookies = pickle.load(f)
# 获取 Cookies 中的过期时间(假设有一个 Cookie 有 'expiry' 字段)
for cookie in cookies:
if 'expiry' in cookie:
expiry_time = datetime.fromtimestamp(cookie['expiry'])
if expiry_time < datetime.now():
print("Cookies 已过期")
return False
return True
def save_cookies(driver):
"""保存当前会话的 Cookies 到文件"""
cookies = driver.get_cookies()
with open(COOKIE_FILE, "wb") as f:
pickle.dump(cookies, f)
print("Cookies 已保存")
def load_cookies(driver):
"""加载 Cookies 到当前浏览器会话"""
driver.get(LOGIN_URL) # 必须先访问一次目标网站才能设置 Cookies
with open(COOKIE_FILE, "rb") as f:
cookies = pickle.load(f)
for cookie in cookies:
# 移除不必要的字段
cookie.pop('sameSite', None)
cookie.pop('expiry', None) if 'expiry' in cookie and cookie['expiry'] < time.time() else None
driver.add_cookie(cookie)
print("Cookies 加载成功")
def check_login_status(driver):
"""检查用户是否登录"""
try:
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#SI_User > div.ac-login.ac-logined"))
)
print("检测到已登录状态")
return True
except:
print("未登录或登录状态失效")
return False
def collect_complaints(driver, keyword, max_items=1000):
search_url = f"https://tousu.sina.com.cn/index/search/?keywords={keyword}&t=1"
driver.get(search_url)
collected_items = set()
no_new_count = 0
while len(collected_items) < max_items and no_new_count < 3:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
complaints = driver.find_elements(By.CSS_SELECTOR, "#search_tab > div.blackcat-container > div.tab-con.tousu-list > div > a > h1")
current_count = len(collected_items)
for complaint in complaints:
text = complaint.text.strip()
if text and text not in collected_items:
collected_items.add(text)
if len(collected_items) == current_count:
no_new_count += 1
else:
no_new_count = 0
return list(collected_items)
def main():
# 配置 Chrome 浏览器选项
chrome_options = Options()
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0 Safari/537.36")
# 启动浏览器
chrome_service = Service(executable_path='D:\\ProgramTools\\chromedriver-win64\\chromedriver.exe')
driver = webdriver.Chrome(service=chrome_service, options=chrome_options)
# 检查是否已有 Cookies
if is_cookie_valid():
print("使用保存的 Cookies 登录...")
load_cookies(driver)
driver.get(LOGIN_URL)
if not check_login_status(driver):
print("Cookies 无效,需要手动登录")
input("请手动登录后按回车继续...")
save_cookies(driver)
else:
print("首次登录,请手动登录")
driver.get(LOGIN_URL)
input("请手动登录后按回车继续...")
save_cookies(driver)
# 获取搜索关键词
keyword = input("请输入搜索关键词(例如:F6汽车科技): ")
# 收集投诉内容
complaints = collect_complaints(driver, keyword)
# 保存结果
with open("tousu.txt", "w", encoding="utf-8") as f:
for item in complaints:
f.write(item + "\n")
print(f"成功收集到{len(complaints)}条投诉信息,已保存到tousu.txt")
driver.quit()
if __name__ == "__main__":
main()