Add a visual control panel for the crawler, supporting customization of topics and parameter configuration.
This commit is contained in:
@@ -99,8 +99,10 @@ app.secret_key = 'this is secret_key you know ?' # 设置 Flask 的密钥,用
|
||||
# 导入蓝图
|
||||
from views.page import page
|
||||
from views.user import user
|
||||
from views.spider_control import spider_bp
|
||||
app.register_blueprint(page.pb) # 注册页面蓝图
|
||||
app.register_blueprint(user.ub) # 注册用户蓝图
|
||||
app.register_blueprint(spider_bp) # 注册爬虫控制蓝图
|
||||
|
||||
# 首页路由,清空 session
|
||||
@app.route('/')
|
||||
|
||||
@@ -3,6 +3,13 @@ from spiderDataPackage.spiderContent import start as spiderContent
|
||||
from spiderDataPackage.spiderComments import start as spiderComments
|
||||
from spiderDataPackage.settings import navAddr
|
||||
import os
|
||||
import requests
|
||||
import time
|
||||
import random
|
||||
import logging
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
from utils.logger import spider_logger as logging
|
||||
|
||||
def spiderData():
|
||||
if not os.path.exists(navAddr):
|
||||
@@ -13,5 +20,131 @@ def spiderData():
|
||||
print('正在爬取文章评论数据')
|
||||
spiderComments()
|
||||
|
||||
class SpiderData:
|
||||
def __init__(self):
|
||||
self.headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
}
|
||||
self.base_url = 'https://s.weibo.com'
|
||||
|
||||
def crawl_topic(self, topic, depth=3, interval=5, max_retries=3, timeout=30):
|
||||
"""
|
||||
爬取指定话题的微博内容
|
||||
|
||||
:param topic: 要爬取的话题
|
||||
:param depth: 爬取深度(页数)
|
||||
:param interval: 请求间隔时间(秒)
|
||||
:param max_retries: 最大重试次数
|
||||
:param timeout: 请求超时时间(秒)
|
||||
"""
|
||||
logging.info(f"开始爬取话题: {topic}")
|
||||
|
||||
for page in range(1, depth + 1):
|
||||
retries = 0
|
||||
while retries < max_retries:
|
||||
try:
|
||||
url = f"{self.base_url}/weibo?q={topic}&page={page}"
|
||||
response = requests.get(url, headers=self.headers, timeout=timeout)
|
||||
|
||||
if response.status_code == 200:
|
||||
self._parse_page(response.text)
|
||||
logging.info(f"成功爬取话题 {topic} 第 {page} 页")
|
||||
break
|
||||
else:
|
||||
logging.warning(f"请求失败,状态码: {response.status_code}")
|
||||
retries += 1
|
||||
|
||||
except requests.RequestException as e:
|
||||
logging.error(f"请求异常: {e}")
|
||||
retries += 1
|
||||
|
||||
if retries < max_retries:
|
||||
sleep_time = interval * (1 + random.random())
|
||||
logging.info(f"等待 {sleep_time:.2f} 秒后重试...")
|
||||
time.sleep(sleep_time)
|
||||
|
||||
if retries == max_retries:
|
||||
logging.error(f"话题 {topic} 第 {page} 页爬取失败,已达到最大重试次数")
|
||||
continue
|
||||
|
||||
# 在页面之间添加随机延迟
|
||||
if page < depth:
|
||||
sleep_time = interval * (1 + random.random())
|
||||
logging.info(f"等待 {sleep_time:.2f} 秒后继续...")
|
||||
time.sleep(sleep_time)
|
||||
|
||||
def _parse_page(self, html_content):
|
||||
"""
|
||||
解析页面内容并保存数据
|
||||
|
||||
:param html_content: 页面HTML内容
|
||||
"""
|
||||
try:
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
weibo_items = soup.find_all('div', class_='card-wrap')
|
||||
|
||||
for item in weibo_items:
|
||||
try:
|
||||
# 提取微博内容
|
||||
content = item.find('p', class_='txt')
|
||||
if not content:
|
||||
continue
|
||||
|
||||
# 提取用户信息
|
||||
user_info = item.find('a', class_='name')
|
||||
if not user_info:
|
||||
continue
|
||||
|
||||
# 提取发布时间
|
||||
time_info = item.find('p', class_='from')
|
||||
|
||||
# 提取互动数据
|
||||
actions = item.find_all('li', class_='action')
|
||||
|
||||
# 构建数据字典
|
||||
weibo_data = {
|
||||
'content': content.text.strip(),
|
||||
'user_name': user_info.text.strip(),
|
||||
'publish_time': time_info.text.strip() if time_info else '',
|
||||
'forward_count': self._extract_number(actions[0].text) if len(actions) > 0 else 0,
|
||||
'comment_count': self._extract_number(actions[1].text) if len(actions) > 1 else 0,
|
||||
'like_count': self._extract_number(actions[2].text) if len(actions) > 2 else 0,
|
||||
'crawl_time': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
|
||||
}
|
||||
|
||||
# 保存到数据库
|
||||
self._save_to_database(weibo_data)
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"解析微博项时出错: {e}")
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"解析页面时出错: {e}")
|
||||
|
||||
def _extract_number(self, text):
|
||||
"""
|
||||
从文本中提取数字
|
||||
|
||||
:param text: 包含数字的文本
|
||||
:return: 提取的数字,如果没有找到则返回0
|
||||
"""
|
||||
try:
|
||||
return int(''.join(filter(str.isdigit, text)))
|
||||
except ValueError:
|
||||
return 0
|
||||
|
||||
def _save_to_database(self, data):
|
||||
"""
|
||||
将数据保存到数据库
|
||||
|
||||
:param data: 要保存的数据字典
|
||||
"""
|
||||
try:
|
||||
# TODO: 实现数据库保存逻辑
|
||||
logging.info(f"保存数据: {data}")
|
||||
except Exception as e:
|
||||
logging.error(f"保存数据时出错: {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
spiderData()
|
||||
@@ -0,0 +1,291 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>爬虫控制面板</title>
|
||||
<link href="https://cdn.bootcdn.net/ajax/libs/twitter-bootstrap/5.0.2/css/bootstrap.min.css" rel="stylesheet">
|
||||
<link href="https://cdn.bootcdn.net/ajax/libs/font-awesome/5.15.4/css/all.min.css" rel="stylesheet">
|
||||
<style>
|
||||
.topic-item {
|
||||
margin: 5px;
|
||||
padding: 8px 15px;
|
||||
border-radius: 20px;
|
||||
background-color: #f8f9fa;
|
||||
display: inline-block;
|
||||
cursor: pointer;
|
||||
}
|
||||
.topic-item.selected {
|
||||
background-color: #0d6efd;
|
||||
color: white;
|
||||
}
|
||||
.custom-topic-input {
|
||||
margin: 10px 0;
|
||||
}
|
||||
.parameter-section {
|
||||
margin: 20px 0;
|
||||
padding: 20px;
|
||||
border-radius: 10px;
|
||||
background-color: #f8f9fa;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container mt-5">
|
||||
<h2 class="mb-4">爬虫控制面板</h2>
|
||||
|
||||
<!-- 话题选择区域 -->
|
||||
<div class="card mb-4">
|
||||
<div class="card-header">
|
||||
<h5 class="mb-0">选择话题类型</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div id="predefinedTopics" class="mb-3">
|
||||
<!-- 预定义话题将通过JavaScript动态加载 -->
|
||||
</div>
|
||||
|
||||
<div class="custom-topic-input">
|
||||
<h6>添加自定义话题</h6>
|
||||
<div class="input-group">
|
||||
<input type="text" class="form-control" id="customTopic" placeholder="输入自定义话题">
|
||||
<button class="btn btn-primary" onclick="addCustomTopic()">
|
||||
<i class="fas fa-plus"></i> 添加
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="selectedTopics" class="mt-3">
|
||||
<h6>已选择的话题:</h6>
|
||||
<div id="selectedTopicsList" class="mt-2">
|
||||
<!-- 已选择的话题将在这里显示 -->
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 爬虫参数配置 -->
|
||||
<div class="card mb-4">
|
||||
<div class="card-header">
|
||||
<h5 class="mb-0">爬虫参数配置</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
<div class="mb-3">
|
||||
<label for="crawlDepth" class="form-label">爬取深度</label>
|
||||
<input type="number" class="form-control" id="crawlDepth" value="3" min="1" max="10">
|
||||
<small class="text-muted">每个话题爬取的页数(1-10)</small>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="mb-3">
|
||||
<label for="interval" class="form-label">爬取间隔(秒)</label>
|
||||
<input type="number" class="form-control" id="interval" value="5" min="1">
|
||||
<small class="text-muted">每次请求之间的间隔时间</small>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="row">
|
||||
<div class="col-md-6">
|
||||
<div class="mb-3">
|
||||
<label for="maxRetries" class="form-label">最大重试次数</label>
|
||||
<input type="number" class="form-control" id="maxRetries" value="3" min="1">
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-6">
|
||||
<div class="mb-3">
|
||||
<label for="timeout" class="form-label">请求超时时间(秒)</label>
|
||||
<input type="number" class="form-control" id="timeout" value="30" min="1">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 操作按钮 -->
|
||||
<div class="d-flex justify-content-between mb-5">
|
||||
<button class="btn btn-primary" onclick="startCrawling()">
|
||||
<i class="fas fa-play"></i> 开始爬取
|
||||
</button>
|
||||
<button class="btn btn-secondary" onclick="saveConfig()">
|
||||
<i class="fas fa-save"></i> 保存配置
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<!-- 爬虫状态和日志 -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h5 class="mb-0">爬虫状态</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="progress mb-3">
|
||||
<div id="crawlProgress" class="progress-bar" role="progressbar" style="width: 0%"></div>
|
||||
</div>
|
||||
<div class="border p-3 bg-light" style="height: 200px; overflow-y: auto;">
|
||||
<pre id="crawlLog" class="mb-0"></pre>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="https://cdn.bootcdn.net/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
|
||||
<script src="https://cdn.bootcdn.net/ajax/libs/twitter-bootstrap/5.0.2/js/bootstrap.bundle.min.js"></script>
|
||||
<script>
|
||||
// 预定义话题列表
|
||||
const predefinedTopics = [
|
||||
'热门', '社会', '科技', '娱乐', '体育', '财经',
|
||||
'教育', '健康', '军事', '文化', '汽车', '美食'
|
||||
];
|
||||
|
||||
// 已选择的话题
|
||||
let selectedTopics = new Set();
|
||||
|
||||
// 初始化页面
|
||||
window.onload = function() {
|
||||
loadPredefinedTopics();
|
||||
};
|
||||
|
||||
// 加载预定义话题
|
||||
function loadPredefinedTopics() {
|
||||
const topicsDiv = document.getElementById('predefinedTopics');
|
||||
predefinedTopics.forEach(topic => {
|
||||
const topicElement = document.createElement('span');
|
||||
topicElement.className = 'topic-item';
|
||||
topicElement.textContent = topic;
|
||||
topicElement.onclick = () => toggleTopic(topic, topicElement);
|
||||
topicsDiv.appendChild(topicElement);
|
||||
});
|
||||
}
|
||||
|
||||
// 切换话题选择状态
|
||||
function toggleTopic(topic, element) {
|
||||
if (selectedTopics.has(topic)) {
|
||||
selectedTopics.delete(topic);
|
||||
element.classList.remove('selected');
|
||||
} else {
|
||||
selectedTopics.add(topic);
|
||||
element.classList.add('selected');
|
||||
}
|
||||
updateSelectedTopicsList();
|
||||
}
|
||||
|
||||
// 添加自定义话题
|
||||
function addCustomTopic() {
|
||||
const input = document.getElementById('customTopic');
|
||||
const topic = input.value.trim();
|
||||
if (topic) {
|
||||
selectedTopics.add(topic);
|
||||
input.value = '';
|
||||
updateSelectedTopicsList();
|
||||
}
|
||||
}
|
||||
|
||||
// 更新已选择的话题列表
|
||||
function updateSelectedTopicsList() {
|
||||
const listDiv = document.getElementById('selectedTopicsList');
|
||||
listDiv.innerHTML = '';
|
||||
selectedTopics.forEach(topic => {
|
||||
const topicElement = document.createElement('span');
|
||||
topicElement.className = 'topic-item selected';
|
||||
topicElement.textContent = topic;
|
||||
topicElement.onclick = () => {
|
||||
selectedTopics.delete(topic);
|
||||
updateSelectedTopicsList();
|
||||
};
|
||||
listDiv.appendChild(topicElement);
|
||||
});
|
||||
}
|
||||
|
||||
// 开始爬取
|
||||
function startCrawling() {
|
||||
if (selectedTopics.size === 0) {
|
||||
alert('请至少选择一个话题!');
|
||||
return;
|
||||
}
|
||||
|
||||
const config = {
|
||||
topics: Array.from(selectedTopics),
|
||||
parameters: {
|
||||
crawlDepth: parseInt(document.getElementById('crawlDepth').value),
|
||||
interval: parseInt(document.getElementById('interval').value),
|
||||
maxRetries: parseInt(document.getElementById('maxRetries').value),
|
||||
timeout: parseInt(document.getElementById('timeout').value)
|
||||
}
|
||||
};
|
||||
|
||||
// 发送爬虫配置到后端
|
||||
fetch('/api/spider/start', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(config)
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
updateCrawlLog('爬虫任务已启动...');
|
||||
} else {
|
||||
updateCrawlLog('启动失败:' + data.message);
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
updateCrawlLog('错误:' + error.message);
|
||||
});
|
||||
}
|
||||
|
||||
// 保存配置
|
||||
function saveConfig() {
|
||||
const config = {
|
||||
topics: Array.from(selectedTopics),
|
||||
parameters: {
|
||||
crawlDepth: parseInt(document.getElementById('crawlDepth').value),
|
||||
interval: parseInt(document.getElementById('interval').value),
|
||||
maxRetries: parseInt(document.getElementById('maxRetries').value),
|
||||
timeout: parseInt(document.getElementById('timeout').value)
|
||||
}
|
||||
};
|
||||
|
||||
fetch('/api/spider/save-config', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify(config)
|
||||
})
|
||||
.then(response => response.json())
|
||||
.then(data => {
|
||||
if (data.success) {
|
||||
alert('配置已保存!');
|
||||
} else {
|
||||
alert('保存失败:' + data.message);
|
||||
}
|
||||
})
|
||||
.catch(error => {
|
||||
alert('保存出错:' + error.message);
|
||||
});
|
||||
}
|
||||
|
||||
// 更新爬虫日志
|
||||
function updateCrawlLog(message) {
|
||||
const log = document.getElementById('crawlLog');
|
||||
const timestamp = new Date().toLocaleTimeString();
|
||||
log.innerHTML += `[${timestamp}] ${message}\n`;
|
||||
log.scrollTop = log.scrollHeight;
|
||||
}
|
||||
|
||||
// WebSocket连接用于实时更新爬虫状态
|
||||
const ws = new WebSocket(`ws://${window.location.host}/ws/spider-status`);
|
||||
|
||||
ws.onmessage = function(event) {
|
||||
const data = JSON.parse(event.data);
|
||||
if (data.type === 'progress') {
|
||||
document.getElementById('crawlProgress').style.width = data.value + '%';
|
||||
} else if (data.type === 'log') {
|
||||
updateCrawlLog(data.message);
|
||||
}
|
||||
};
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,213 @@
|
||||
from flask import Blueprint, jsonify, request, render_template
|
||||
import json
|
||||
import os
|
||||
from datetime import datetime
|
||||
import threading
|
||||
from queue import Queue
|
||||
import asyncio
|
||||
import websockets
|
||||
import logging
|
||||
from spider.spiderData import SpiderData
|
||||
|
||||
# 创建蓝图
|
||||
spider_bp = Blueprint('spider', __name__)
|
||||
|
||||
# 创建日志记录器
|
||||
logger = logging.getLogger('spider_control')
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# 存储WebSocket连接的集合
|
||||
websocket_connections = set()
|
||||
|
||||
# 创建消息队列
|
||||
message_queue = Queue()
|
||||
|
||||
# 默认配置
|
||||
DEFAULT_CONFIG = {
|
||||
'crawlDepth': 3,
|
||||
'interval': 5,
|
||||
'maxRetries': 3,
|
||||
'timeout': 30
|
||||
}
|
||||
|
||||
def load_config():
|
||||
"""加载爬虫配置"""
|
||||
config_path = os.path.join(os.path.dirname(__file__), '../spider/config.json')
|
||||
try:
|
||||
if os.path.exists(config_path):
|
||||
with open(config_path, 'r', encoding='utf-8') as f:
|
||||
return json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"加载配置文件失败: {e}")
|
||||
return DEFAULT_CONFIG
|
||||
|
||||
def save_config(config):
|
||||
"""保存爬虫配置"""
|
||||
config_path = os.path.join(os.path.dirname(__file__), '../spider/config.json')
|
||||
try:
|
||||
with open(config_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(config, f, ensure_ascii=False, indent=4)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"保存配置文件失败: {e}")
|
||||
return False
|
||||
|
||||
async def broadcast_message(message):
|
||||
"""广播消息到所有WebSocket连接"""
|
||||
if not websocket_connections:
|
||||
return
|
||||
|
||||
for websocket in websocket_connections.copy():
|
||||
try:
|
||||
await websocket.send(json.dumps(message))
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
websocket_connections.remove(websocket)
|
||||
except Exception as e:
|
||||
logger.error(f"发送WebSocket消息失败: {e}")
|
||||
websocket_connections.remove(websocket)
|
||||
|
||||
def spider_worker(topics, parameters):
|
||||
"""爬虫工作线程"""
|
||||
total_topics = len(topics)
|
||||
completed_topics = 0
|
||||
|
||||
try:
|
||||
spider = SpiderData()
|
||||
|
||||
for topic in topics:
|
||||
try:
|
||||
# 更新进度
|
||||
progress = int((completed_topics / total_topics) * 100)
|
||||
asyncio.run(broadcast_message({
|
||||
'type': 'progress',
|
||||
'value': progress
|
||||
}))
|
||||
|
||||
# 发送开始爬取的日志
|
||||
asyncio.run(broadcast_message({
|
||||
'type': 'log',
|
||||
'message': f'开始爬取话题: {topic}'
|
||||
}))
|
||||
|
||||
# 执行爬取
|
||||
spider.crawl_topic(
|
||||
topic=topic,
|
||||
depth=parameters['crawlDepth'],
|
||||
interval=parameters['interval'],
|
||||
max_retries=parameters['maxRetries'],
|
||||
timeout=parameters['timeout']
|
||||
)
|
||||
|
||||
completed_topics += 1
|
||||
|
||||
# 发送完成爬取的日志
|
||||
asyncio.run(broadcast_message({
|
||||
'type': 'log',
|
||||
'message': f'话题 {topic} 爬取完成'
|
||||
}))
|
||||
|
||||
except Exception as e:
|
||||
# 发送错误日志
|
||||
asyncio.run(broadcast_message({
|
||||
'type': 'log',
|
||||
'message': f'爬取话题 {topic} 时出错: {str(e)}'
|
||||
}))
|
||||
|
||||
# 更新最终进度
|
||||
asyncio.run(broadcast_message({
|
||||
'type': 'progress',
|
||||
'value': 100
|
||||
}))
|
||||
|
||||
# 发送完成消息
|
||||
asyncio.run(broadcast_message({
|
||||
'type': 'log',
|
||||
'message': '所有话题爬取完成'
|
||||
}))
|
||||
|
||||
except Exception as e:
|
||||
# 发送错误日志
|
||||
asyncio.run(broadcast_message({
|
||||
'type': 'log',
|
||||
'message': f'爬虫任务执行出错: {str(e)}'
|
||||
}))
|
||||
|
||||
@spider_bp.route('/spider/control')
|
||||
def spider_control():
|
||||
"""渲染爬虫控制页面"""
|
||||
return render_template('spider_control.html')
|
||||
|
||||
@spider_bp.route('/api/spider/start', methods=['POST'])
|
||||
def start_spider():
|
||||
"""启动爬虫任务"""
|
||||
try:
|
||||
data = request.get_json()
|
||||
topics = data.get('topics', [])
|
||||
parameters = data.get('parameters', DEFAULT_CONFIG)
|
||||
|
||||
if not topics:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': '请选择至少一个话题'
|
||||
})
|
||||
|
||||
# 启动爬虫线程
|
||||
thread = threading.Thread(
|
||||
target=spider_worker,
|
||||
args=(topics, parameters),
|
||||
daemon=True
|
||||
)
|
||||
thread.start()
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'message': '爬虫任务已启动'
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"启动爬虫任务失败: {e}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': str(e)
|
||||
})
|
||||
|
||||
@spider_bp.route('/api/spider/save-config', methods=['POST'])
|
||||
def save_spider_config():
|
||||
"""保存爬虫配置"""
|
||||
try:
|
||||
config = request.get_json()
|
||||
if save_config(config):
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'message': '配置保存成功'
|
||||
})
|
||||
else:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': '配置保存失败'
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"保存配置失败: {e}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': str(e)
|
||||
})
|
||||
|
||||
@spider_bp.websocket('/ws/spider-status')
|
||||
async def spider_status_socket():
|
||||
"""WebSocket连接处理"""
|
||||
try:
|
||||
websocket = websockets.WebSocketServerProtocol()
|
||||
websocket_connections.add(websocket)
|
||||
|
||||
try:
|
||||
while True:
|
||||
# 保持连接活跃
|
||||
await websocket.ping()
|
||||
await asyncio.sleep(30)
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
pass
|
||||
finally:
|
||||
websocket_connections.remove(websocket)
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket连接处理失败: {e}")
|
||||
Reference in New Issue
Block a user