948 lines
42 KiB
HTML
948 lines
42 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="zh-CN">
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title>爬虫控制面板</title>
|
||
<link href="https://cdn.bootcdn.net/ajax/libs/twitter-bootstrap/5.0.2/css/bootstrap.min.css" rel="stylesheet">
|
||
<link href="https://cdn.bootcdn.net/ajax/libs/font-awesome/5.15.4/css/all.min.css" rel="stylesheet">
|
||
<style>
|
||
.topic-item {
|
||
margin: 5px;
|
||
padding: 8px 15px;
|
||
border-radius: 20px;
|
||
background-color: #f8f9fa;
|
||
display: inline-block;
|
||
cursor: pointer;
|
||
}
|
||
.topic-item.selected {
|
||
background-color: #0d6efd;
|
||
color: white;
|
||
}
|
||
.custom-topic-input {
|
||
margin: 10px 0;
|
||
}
|
||
.parameter-section {
|
||
margin: 20px 0;
|
||
padding: 20px;
|
||
border-radius: 10px;
|
||
background-color: #f8f9fa;
|
||
}
|
||
</style>
|
||
</head>
|
||
<body>
|
||
<div class="container mt-5">
|
||
<h2 class="mb-4">爬虫控制面板</h2>
|
||
|
||
<!-- 话题选择区域 -->
|
||
<div class="card mb-4">
|
||
<div class="card-header">
|
||
<h5 class="mb-0">选择话题类型</h5>
|
||
</div>
|
||
<div class="card-body">
|
||
<div id="predefinedTopics" class="mb-3">
|
||
<!-- 预定义话题将通过JavaScript动态加载 -->
|
||
</div>
|
||
|
||
<div class="custom-topic-input">
|
||
<h6>添加自定义话题</h6>
|
||
<div class="input-group">
|
||
<input type="text" class="form-control" id="customTopic" placeholder="输入自定义话题">
|
||
<button class="btn btn-primary" onclick="addCustomTopic()">
|
||
<i class="fas fa-plus"></i> 添加
|
||
</button>
|
||
</div>
|
||
</div>
|
||
|
||
<div id="selectedTopics" class="mt-3">
|
||
<h6>已选择的话题:</h6>
|
||
<div id="selectedTopicsList" class="mt-2">
|
||
<!-- 已选择的话题将在这里显示 -->
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- 爬虫参数配置 -->
|
||
<div class="card mb-4">
|
||
<div class="card-header">
|
||
<h5 class="mb-0">爬虫参数配置</h5>
|
||
</div>
|
||
<div class="card-body">
|
||
<div class="row">
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="crawlDepth" class="form-label">爬取深度</label>
|
||
<input type="number" class="form-control" id="crawlDepth" value="3" min="1" max="10">
|
||
<small class="text-muted">每个话题爬取的页数(1-10)</small>
|
||
</div>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="interval" class="form-label">爬取间隔(秒)</label>
|
||
<input type="number" class="form-control" id="interval" value="5" min="1">
|
||
<small class="text-muted">每次请求之间的间隔时间</small>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="row">
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="maxRetries" class="form-label">最大重试次数</label>
|
||
<input type="number" class="form-control" id="maxRetries" value="3" min="1">
|
||
</div>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="timeout" class="form-label">请求超时时间(秒)</label>
|
||
<input type="number" class="form-control" id="timeout" value="30" min="1">
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- 内容筛选配置 -->
|
||
<div class="card mb-4">
|
||
<div class="card-header d-flex justify-content-between align-items-center">
|
||
<h5 class="mb-0">内容筛选配置</h5>
|
||
<button class="btn btn-sm btn-outline-primary" type="button" data-bs-toggle="collapse" data-bs-target="#filterHelp">
|
||
<i class="fas fa-question-circle"></i> 帮助
|
||
</button>
|
||
</div>
|
||
<div class="collapse" id="filterHelp">
|
||
<div class="card-body bg-light">
|
||
<h6>筛选条件说明:</h6>
|
||
<ul>
|
||
<li>数值条件:设置大于某个值进行筛选,如点赞数>1000</li>
|
||
<li>正则匹配:使用正则表达式匹配内容,如包含特定关键词</li>
|
||
<li>多个条件之间是"与"的关系,即同时满足才会保留</li>
|
||
</ul>
|
||
<div class="alert alert-info">
|
||
<i class="fas fa-info-circle"></i> 提示:合理设置筛选条件可以提高数据质量
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="card-body">
|
||
<!-- 互动数据筛选 -->
|
||
<h6 class="mb-3">互动数据筛选</h6>
|
||
<div class="row">
|
||
<div class="col-md-3">
|
||
<div class="mb-3">
|
||
<label class="form-label">点赞数大于</label>
|
||
<input type="number" class="form-control" id="minLikes" value="0" min="0">
|
||
</div>
|
||
</div>
|
||
<div class="col-md-3">
|
||
<div class="mb-3">
|
||
<label class="form-label">评论数大于</label>
|
||
<input type="number" class="form-control" id="minComments" value="0" min="0">
|
||
</div>
|
||
</div>
|
||
<div class="col-md-3">
|
||
<div class="mb-3">
|
||
<label class="form-label">转发数大于</label>
|
||
<input type="number" class="form-control" id="minReposts" value="0" min="0">
|
||
</div>
|
||
</div>
|
||
<div class="col-md-3">
|
||
<div class="mb-3">
|
||
<label class="form-label">阅读数大于</label>
|
||
<input type="number" class="form-control" id="minReads" value="0" min="0">
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- 内容正则筛选 -->
|
||
<h6 class="mb-3 mt-4">内容正则筛选</h6>
|
||
<div id="regexFilters">
|
||
<!-- 正则表达式筛选器列表 -->
|
||
</div>
|
||
<button class="btn btn-outline-primary btn-sm mt-2" onclick="addRegexFilter()">
|
||
<i class="fas fa-plus"></i> 添加正则筛选
|
||
</button>
|
||
|
||
<!-- 高级筛选选项 -->
|
||
<h6 class="mb-3 mt-4">高级选项</h6>
|
||
<div class="form-check mb-2">
|
||
<input class="form-check-input" type="checkbox" id="filterOriginal">
|
||
<label class="form-check-label" for="filterOriginal">
|
||
仅爬取原创内容
|
||
</label>
|
||
</div>
|
||
<div class="form-check mb-2">
|
||
<input class="form-check-input" type="checkbox" id="filterWithMedia">
|
||
<label class="form-check-label" for="filterWithMedia">
|
||
必须包含图片或视频
|
||
</label>
|
||
</div>
|
||
<div class="form-check">
|
||
<input class="form-check-input" type="checkbox" id="filterVerified">
|
||
<label class="form-check-label" for="filterVerified">
|
||
仅认证用户的内容
|
||
</label>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- 账号配置 -->
|
||
<div class="card mb-4">
|
||
<div class="card-header d-flex justify-content-between align-items-center">
|
||
<h5 class="mb-0">账号配置</h5>
|
||
<div>
|
||
<button class="btn btn-sm btn-outline-primary me-2" type="button" data-bs-toggle="collapse" data-bs-target="#accountHelp">
|
||
<i class="fas fa-question-circle"></i> 帮助
|
||
</button>
|
||
<button class="btn btn-sm btn-success" onclick="addAccount()">
|
||
<i class="fas fa-plus"></i> 添加账号
|
||
</button>
|
||
</div>
|
||
</div>
|
||
<div class="collapse" id="accountHelp">
|
||
<div class="card-body bg-light">
|
||
<h6>如何获取Cookie?</h6>
|
||
<ol>
|
||
<li>登录微博网页版</li>
|
||
<li>按F12打开开发者工具</li>
|
||
<li>切换到Network标签页</li>
|
||
<li>刷新页面,找到请求头中的Cookie值</li>
|
||
</ol>
|
||
<div class="alert alert-warning">
|
||
<i class="fas fa-exclamation-triangle"></i> 注意:请勿泄露您的Cookie信息!
|
||
</div>
|
||
<div class="alert alert-info">
|
||
<i class="fas fa-info-circle"></i> 提示:添加多个账号可以提高爬取效率,系统会自动在账号间轮换。
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="card-body">
|
||
<div id="accountsList">
|
||
<!-- 账号列表将通过JavaScript动态生成 -->
|
||
</div>
|
||
<div class="alert alert-warning mt-3" id="noAccountsWarning" style="display: none;">
|
||
<i class="fas fa-exclamation-triangle"></i> 请至少添加一个账号
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- 并行配置 -->
|
||
<div class="card mb-4">
|
||
<div class="card-header">
|
||
<h5 class="mb-0">并行配置</h5>
|
||
</div>
|
||
<div class="card-body">
|
||
<div class="row">
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="maxConcurrent" class="form-label">最大并行数</label>
|
||
<input type="number" class="form-control" id="maxConcurrent" value="2" min="1" max="5">
|
||
<small class="text-muted">同时进行爬取的最大话题数(1-5)</small>
|
||
</div>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="requestsPerMinute" class="form-label">每分钟请求数限制</label>
|
||
<input type="number" class="form-control" id="requestsPerMinute" value="60" min="30" max="120">
|
||
<small class="text-muted">避免请求过于频繁(30-120)</small>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- 数据库配置 -->
|
||
<div class="card mb-4">
|
||
<div class="card-header">
|
||
<h5 class="mb-0">数据库配置</h5>
|
||
</div>
|
||
<div class="card-body">
|
||
<div class="row">
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="dbType" class="form-label">数据库类型</label>
|
||
<select class="form-select" id="dbType">
|
||
<option value="mysql">MySQL</option>
|
||
<option value="postgresql">PostgreSQL</option>
|
||
<option value="mongodb">MongoDB</option>
|
||
</select>
|
||
</div>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="dbHost" class="form-label">主机地址</label>
|
||
<input type="text" class="form-control" id="dbHost" value="localhost">
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="row">
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="dbPort" class="form-label">端口</label>
|
||
<input type="number" class="form-control" id="dbPort" value="3306">
|
||
</div>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="dbName" class="form-label">数据库名</label>
|
||
<input type="text" class="form-control" id="dbName" value="weibo_data">
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="row">
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="dbUser" class="form-label">用户名</label>
|
||
<input type="text" class="form-control" id="dbUser">
|
||
</div>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label for="dbPassword" class="form-label">密码</label>
|
||
<input type="password" class="form-control" id="dbPassword">
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="d-flex justify-content-end">
|
||
<button class="btn btn-primary" onclick="testDbConnection()">
|
||
<i class="fas fa-database"></i> 测试连接
|
||
</button>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- AI配置助手 -->
|
||
<div class="card mb-4">
|
||
<div class="card-header">
|
||
<h5 class="mb-0">
|
||
<i class="fas fa-robot"></i> AI配置助手
|
||
</h5>
|
||
</div>
|
||
<div class="card-body">
|
||
<div class="mb-3">
|
||
<label for="aiPrompt" class="form-label">用自然语言描述您的爬虫需求</label>
|
||
<textarea class="form-control" id="aiPrompt" rows="3"
|
||
placeholder="例如:我想爬取最近一周关于人工智能的热门微博,重点关注转发量超过1000的内容,每个话题爬取前5页内容。"></textarea>
|
||
</div>
|
||
<div class="d-flex justify-content-between align-items-center">
|
||
<button class="btn btn-primary" onclick="generateConfig()">
|
||
<i class="fas fa-magic"></i> 生成配置
|
||
</button>
|
||
<div class="form-check">
|
||
<input class="form-check-input" type="checkbox" id="autoApply" checked>
|
||
<label class="form-check-label" for="autoApply">
|
||
自动应用生成的配置
|
||
</label>
|
||
</div>
|
||
</div>
|
||
<div id="aiResponse" class="mt-3" style="display: none;">
|
||
<div class="alert alert-info">
|
||
<h6 class="alert-heading">AI助手建议:</h6>
|
||
<p id="aiSuggestion" class="mb-0"></p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- 操作按钮 -->
|
||
<div class="d-flex justify-content-between mb-5">
|
||
<button class="btn btn-primary" onclick="startCrawling()">
|
||
<i class="fas fa-play"></i> 开始爬取
|
||
</button>
|
||
<button class="btn btn-secondary" onclick="saveConfig()">
|
||
<i class="fas fa-save"></i> 保存配置
|
||
</button>
|
||
</div>
|
||
|
||
<!-- 爬虫状态和日志 -->
|
||
<div class="card">
|
||
<div class="card-header">
|
||
<h5 class="mb-0">爬虫状态</h5>
|
||
</div>
|
||
<div class="card-body">
|
||
<div class="progress mb-3">
|
||
<div id="crawlProgress" class="progress-bar" role="progressbar" style="width: 0%"></div>
|
||
</div>
|
||
<div class="border p-3 bg-light" style="height: 200px; overflow-y: auto;">
|
||
<pre id="crawlLog" class="mb-0"></pre>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<script src="https://cdn.bootcdn.net/ajax/libs/jquery/3.6.0/jquery.min.js"></script>
|
||
<script src="https://cdn.bootcdn.net/ajax/libs/twitter-bootstrap/5.0.2/js/bootstrap.bundle.min.js"></script>
|
||
<script>
|
||
// 预定义话题列表
|
||
const predefinedTopics = [
|
||
'热门', '社会', '科技', '娱乐', '体育', '财经',
|
||
'教育', '健康', '军事', '文化', '汽车', '美食'
|
||
];
|
||
|
||
// 已选择的话题
|
||
let selectedTopics = new Set();
|
||
|
||
// 初始化页面
|
||
window.onload = function() {
|
||
loadPredefinedTopics();
|
||
};
|
||
|
||
// 加载预定义话题
|
||
function loadPredefinedTopics() {
|
||
const topicsDiv = document.getElementById('predefinedTopics');
|
||
predefinedTopics.forEach(topic => {
|
||
const topicElement = document.createElement('span');
|
||
topicElement.className = 'topic-item';
|
||
topicElement.textContent = topic;
|
||
topicElement.onclick = () => toggleTopic(topic, topicElement);
|
||
topicsDiv.appendChild(topicElement);
|
||
});
|
||
}
|
||
|
||
// 切换话题选择状态
|
||
function toggleTopic(topic, element) {
|
||
if (selectedTopics.has(topic)) {
|
||
selectedTopics.delete(topic);
|
||
element.classList.remove('selected');
|
||
} else {
|
||
selectedTopics.add(topic);
|
||
element.classList.add('selected');
|
||
}
|
||
updateSelectedTopicsList();
|
||
}
|
||
|
||
// 添加自定义话题
|
||
function addCustomTopic() {
|
||
const input = document.getElementById('customTopic');
|
||
const topic = input.value.trim();
|
||
if (topic) {
|
||
selectedTopics.add(topic);
|
||
input.value = '';
|
||
updateSelectedTopicsList();
|
||
}
|
||
}
|
||
|
||
// 更新已选择的话题列表
|
||
function updateSelectedTopicsList() {
|
||
const listDiv = document.getElementById('selectedTopicsList');
|
||
listDiv.innerHTML = '';
|
||
selectedTopics.forEach(topic => {
|
||
const topicElement = document.createElement('span');
|
||
topicElement.className = 'topic-item selected';
|
||
topicElement.textContent = topic;
|
||
topicElement.onclick = () => {
|
||
selectedTopics.delete(topic);
|
||
updateSelectedTopicsList();
|
||
};
|
||
listDiv.appendChild(topicElement);
|
||
});
|
||
}
|
||
|
||
// 开始爬取
|
||
function startCrawling() {
|
||
if (selectedTopics.size === 0) {
|
||
alert('请至少选择一个话题!');
|
||
return;
|
||
}
|
||
|
||
// 验证必要的配置
|
||
if (!validateConfig()) {
|
||
return;
|
||
}
|
||
|
||
const config = {
|
||
topics: Array.from(selectedTopics),
|
||
parameters: {
|
||
crawlDepth: parseInt(document.getElementById('crawlDepth').value),
|
||
interval: parseInt(document.getElementById('interval').value),
|
||
maxRetries: parseInt(document.getElementById('maxRetries').value),
|
||
timeout: parseInt(document.getElementById('timeout').value),
|
||
maxConcurrent: parseInt(document.getElementById('maxConcurrent').value),
|
||
requestsPerMinute: parseInt(document.getElementById('requestsPerMinute').value)
|
||
},
|
||
filters: {
|
||
interaction: {
|
||
minLikes: parseInt(document.getElementById('minLikes').value) || 0,
|
||
minComments: parseInt(document.getElementById('minComments').value) || 0,
|
||
minReposts: parseInt(document.getElementById('minReposts').value) || 0,
|
||
minReads: parseInt(document.getElementById('minReads').value) || 0
|
||
},
|
||
regex: getRegexFilters(),
|
||
options: {
|
||
originalOnly: document.getElementById('filterOriginal').checked,
|
||
withMediaOnly: document.getElementById('filterWithMedia').checked,
|
||
verifiedOnly: document.getElementById('filterVerified').checked
|
||
}
|
||
},
|
||
accounts: getAccountsConfig(),
|
||
database: {
|
||
type: document.getElementById('dbType').value,
|
||
host: document.getElementById('dbHost').value,
|
||
port: parseInt(document.getElementById('dbPort').value),
|
||
name: document.getElementById('dbName').value,
|
||
user: document.getElementById('dbUser').value,
|
||
password: document.getElementById('dbPassword').value
|
||
}
|
||
};
|
||
|
||
// 发送爬虫配置到后端
|
||
fetch('/api/spider/start', {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json'
|
||
},
|
||
body: JSON.stringify(config)
|
||
})
|
||
.then(response => response.json())
|
||
.then(data => {
|
||
if (data.success) {
|
||
updateCrawlLog('爬虫任务已启动...');
|
||
} else {
|
||
updateCrawlLog('启动失败:' + data.message);
|
||
}
|
||
})
|
||
.catch(error => {
|
||
updateCrawlLog('错误:' + error.message);
|
||
});
|
||
}
|
||
|
||
// 账号管理相关函数
|
||
let accounts = [];
|
||
let accountIdCounter = 0;
|
||
|
||
function createAccountElement(account) {
|
||
const accountDiv = document.createElement('div');
|
||
accountDiv.className = 'border rounded p-3 mb-3 position-relative account-item';
|
||
accountDiv.dataset.id = account.id;
|
||
|
||
const deleteButton = document.createElement('button');
|
||
deleteButton.className = 'btn btn-sm btn-danger position-absolute top-0 end-0 m-2';
|
||
deleteButton.innerHTML = '<i class="fas fa-times"></i>';
|
||
deleteButton.onclick = () => removeAccount(account.id);
|
||
|
||
const content = `
|
||
<div class="row">
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label class="form-label">用户名</label>
|
||
<input type="text" class="form-control account-username" value="${account.username || ''}" placeholder="微博用户名">
|
||
</div>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label class="form-label">密码</label>
|
||
<input type="password" class="form-control account-password" value="${account.password || ''}" placeholder="微博密码">
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="mb-3">
|
||
<label class="form-label">Cookie</label>
|
||
<textarea class="form-control account-cookie" rows="2" placeholder="请输入微博Cookie">${account.cookie || ''}</textarea>
|
||
</div>
|
||
<div class="form-check mb-3">
|
||
<input class="form-check-input account-save-cookie" type="checkbox" ${account.saveCookie ? 'checked' : ''}>
|
||
<label class="form-check-label">
|
||
保存Cookie(加密存储)
|
||
</label>
|
||
</div>
|
||
<div class="account-status alert alert-info">
|
||
状态:待验证
|
||
<button class="btn btn-sm btn-outline-primary ms-2" onclick="validateAccount(${account.id})">
|
||
<i class="fas fa-check-circle"></i> 验证账号
|
||
</button>
|
||
</div>
|
||
`;
|
||
|
||
accountDiv.innerHTML = content;
|
||
accountDiv.appendChild(deleteButton);
|
||
return accountDiv;
|
||
}
|
||
|
||
function addAccount() {
|
||
const account = {
|
||
id: accountIdCounter++,
|
||
username: '',
|
||
password: '',
|
||
cookie: '',
|
||
saveCookie: false,
|
||
status: 'pending'
|
||
};
|
||
accounts.push(account);
|
||
|
||
const accountsList = document.getElementById('accountsList');
|
||
accountsList.appendChild(createAccountElement(account));
|
||
updateAccountsWarning();
|
||
}
|
||
|
||
function removeAccount(id) {
|
||
accounts = accounts.filter(account => account.id !== id);
|
||
const accountElement = document.querySelector(`.account-item[data-id="${id}"]`);
|
||
if (accountElement) {
|
||
accountElement.remove();
|
||
}
|
||
updateAccountsWarning();
|
||
}
|
||
|
||
function updateAccountsWarning() {
|
||
const warning = document.getElementById('noAccountsWarning');
|
||
warning.style.display = accounts.length === 0 ? 'block' : 'none';
|
||
}
|
||
|
||
function getAccountsConfig() {
|
||
return accounts.map(account => {
|
||
const accountElement = document.querySelector(`.account-item[data-id="${account.id}"]`);
|
||
return {
|
||
username: accountElement.querySelector('.account-username').value,
|
||
password: accountElement.querySelector('.account-password').value,
|
||
cookie: accountElement.querySelector('.account-cookie').value,
|
||
saveCookie: accountElement.querySelector('.account-save-cookie').checked
|
||
};
|
||
});
|
||
}
|
||
|
||
async function validateAccount(id) {
|
||
const accountElement = document.querySelector(`.account-item[data-id="${id}"]`);
|
||
const statusElement = accountElement.querySelector('.account-status');
|
||
const cookie = accountElement.querySelector('.account-cookie').value.trim();
|
||
|
||
if (!cookie) {
|
||
statusElement.className = 'account-status alert alert-danger';
|
||
statusElement.innerHTML = '状态:验证失败 - Cookie不能为空';
|
||
return;
|
||
}
|
||
|
||
statusElement.className = 'account-status alert alert-warning';
|
||
statusElement.innerHTML = '状态:验证中...';
|
||
|
||
try {
|
||
const response = await fetch('/api/spider/validate-account', {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json'
|
||
},
|
||
body: JSON.stringify({
|
||
cookie: cookie
|
||
})
|
||
});
|
||
|
||
const data = await response.json();
|
||
if (data.success) {
|
||
statusElement.className = 'account-status alert alert-success';
|
||
statusElement.innerHTML = '状态:验证成功';
|
||
} else {
|
||
statusElement.className = 'account-status alert alert-danger';
|
||
statusElement.innerHTML = `状态:验证失败 - ${data.message}`;
|
||
}
|
||
} catch (error) {
|
||
statusElement.className = 'account-status alert alert-danger';
|
||
statusElement.innerHTML = `状态:验证失败 - ${error.message}`;
|
||
}
|
||
}
|
||
|
||
// 正则筛选器管理
|
||
let regexFilters = [];
|
||
let regexFilterIdCounter = 0;
|
||
|
||
function createRegexFilterElement(filter) {
|
||
const filterDiv = document.createElement('div');
|
||
filterDiv.className = 'border rounded p-3 mb-3 position-relative regex-filter-item';
|
||
filterDiv.dataset.id = filter.id;
|
||
|
||
const deleteButton = document.createElement('button');
|
||
deleteButton.className = 'btn btn-sm btn-danger position-absolute top-0 end-0 m-2';
|
||
deleteButton.innerHTML = '<i class="fas fa-times"></i>';
|
||
deleteButton.onclick = () => removeRegexFilter(filter.id);
|
||
|
||
const content = `
|
||
<div class="row">
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label class="form-label">正则表达式</label>
|
||
<input type="text" class="form-control regex-pattern" value="${filter.pattern || ''}" placeholder="输入正则表达式">
|
||
</div>
|
||
</div>
|
||
<div class="col-md-6">
|
||
<div class="mb-3">
|
||
<label class="form-label">匹配目标</label>
|
||
<select class="form-select regex-target">
|
||
<option value="content" ${filter.target === 'content' ? 'selected' : ''}>微博内容</option>
|
||
<option value="author" ${filter.target === 'author' ? 'selected' : ''}>作者名</option>
|
||
<option value="location" ${filter.target === 'location' ? 'selected' : ''}>发布位置</option>
|
||
</select>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<div class="form-check">
|
||
<input class="form-check-input regex-inverse" type="checkbox" ${filter.inverse ? 'checked' : ''}>
|
||
<label class="form-check-label">
|
||
反向匹配(不包含匹配项)
|
||
</label>
|
||
</div>
|
||
`;
|
||
|
||
filterDiv.innerHTML = content;
|
||
filterDiv.appendChild(deleteButton);
|
||
return filterDiv;
|
||
}
|
||
|
||
function addRegexFilter() {
|
||
const filter = {
|
||
id: regexFilterIdCounter++,
|
||
pattern: '',
|
||
target: 'content',
|
||
inverse: false
|
||
};
|
||
regexFilters.push(filter);
|
||
|
||
const filtersList = document.getElementById('regexFilters');
|
||
filtersList.appendChild(createRegexFilterElement(filter));
|
||
}
|
||
|
||
function removeRegexFilter(id) {
|
||
regexFilters = regexFilters.filter(filter => filter.id !== id);
|
||
const filterElement = document.querySelector(`.regex-filter-item[data-id="${id}"]`);
|
||
if (filterElement) {
|
||
filterElement.remove();
|
||
}
|
||
}
|
||
|
||
function getRegexFilters() {
|
||
return regexFilters.map(filter => {
|
||
const filterElement = document.querySelector(`.regex-filter-item[data-id="${filter.id}"]`);
|
||
return {
|
||
pattern: filterElement.querySelector('.regex-pattern').value,
|
||
target: filterElement.querySelector('.regex-target').value,
|
||
inverse: filterElement.querySelector('.regex-inverse').checked
|
||
};
|
||
}).filter(filter => filter.pattern.trim() !== '');
|
||
}
|
||
|
||
// 验证配置
|
||
function validateConfig() {
|
||
// 验证正则表达式
|
||
const invalidRegex = regexFilters.some(filter => {
|
||
const filterElement = document.querySelector(`.regex-filter-item[data-id="${filter.id}"]`);
|
||
const pattern = filterElement.querySelector('.regex-pattern').value.trim();
|
||
if (pattern !== '') {
|
||
try {
|
||
new RegExp(pattern);
|
||
return false;
|
||
} catch (e) {
|
||
alert(`正则表达式 "${pattern}" 格式无效!`);
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
});
|
||
|
||
if (invalidRegex) {
|
||
return false;
|
||
}
|
||
|
||
// 验证是否有账号配置
|
||
if (accounts.length === 0) {
|
||
alert('请至少添加一个账号!');
|
||
return false;
|
||
}
|
||
|
||
// 验证每个账号是否都有Cookie
|
||
const invalidAccounts = accounts.filter(account => {
|
||
const accountElement = document.querySelector(`.account-item[data-id="${account.id}"]`);
|
||
return !accountElement.querySelector('.account-cookie').value.trim();
|
||
});
|
||
|
||
if (invalidAccounts.length > 0) {
|
||
alert('存在未配置Cookie的账号,请检查!');
|
||
return false;
|
||
}
|
||
|
||
// 验证并行配置
|
||
const maxConcurrent = parseInt(document.getElementById('maxConcurrent').value);
|
||
const requestsPerMinute = parseInt(document.getElementById('requestsPerMinute').value);
|
||
if (maxConcurrent < 1 || maxConcurrent > 5) {
|
||
alert('最大并行数必须在1-5之间!');
|
||
return false;
|
||
}
|
||
if (requestsPerMinute < 30 || requestsPerMinute > 120) {
|
||
alert('每分钟请求数必须在30-120之间!');
|
||
return false;
|
||
}
|
||
|
||
// 验证数据库配置
|
||
const dbConfig = {
|
||
host: document.getElementById('dbHost').value.trim(),
|
||
port: document.getElementById('dbPort').value.trim(),
|
||
name: document.getElementById('dbName').value.trim(),
|
||
user: document.getElementById('dbUser').value.trim(),
|
||
password: document.getElementById('dbPassword').value.trim()
|
||
};
|
||
|
||
if (!dbConfig.host || !dbConfig.port || !dbConfig.name || !dbConfig.user || !dbConfig.password) {
|
||
alert('请完整填写数据库配置信息!');
|
||
return false;
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
// 测试数据库连接
|
||
async function testDbConnection() {
|
||
const dbConfig = {
|
||
type: document.getElementById('dbType').value,
|
||
host: document.getElementById('dbHost').value,
|
||
port: parseInt(document.getElementById('dbPort').value),
|
||
name: document.getElementById('dbName').value,
|
||
user: document.getElementById('dbUser').value,
|
||
password: document.getElementById('dbPassword').value
|
||
};
|
||
|
||
try {
|
||
const response = await fetch('/api/spider/test-db', {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json'
|
||
},
|
||
body: JSON.stringify(dbConfig)
|
||
});
|
||
|
||
const data = await response.json();
|
||
if (data.success) {
|
||
alert('数据库连接测试成功!');
|
||
} else {
|
||
alert('数据库连接测试失败:' + data.message);
|
||
}
|
||
} catch (error) {
|
||
alert('测试连接时发生错误:' + error.message);
|
||
}
|
||
}
|
||
|
||
// 监听数据库类型变化
|
||
document.getElementById('dbType').addEventListener('change', function() {
|
||
const dbType = this.value;
|
||
const portInput = document.getElementById('dbPort');
|
||
|
||
// 根据数据库类型设置默认端口
|
||
switch(dbType) {
|
||
case 'mysql':
|
||
portInput.value = '3306';
|
||
break;
|
||
case 'postgresql':
|
||
portInput.value = '5432';
|
||
break;
|
||
case 'mongodb':
|
||
portInput.value = '27017';
|
||
break;
|
||
}
|
||
});
|
||
|
||
// 保存配置
|
||
function saveConfig() {
|
||
const config = {
|
||
topics: Array.from(selectedTopics),
|
||
parameters: {
|
||
crawlDepth: parseInt(document.getElementById('crawlDepth').value),
|
||
interval: parseInt(document.getElementById('interval').value),
|
||
maxRetries: parseInt(document.getElementById('maxRetries').value),
|
||
timeout: parseInt(document.getElementById('timeout').value)
|
||
}
|
||
};
|
||
|
||
fetch('/api/spider/save-config', {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json'
|
||
},
|
||
body: JSON.stringify(config)
|
||
})
|
||
.then(response => response.json())
|
||
.then(data => {
|
||
if (data.success) {
|
||
alert('配置已保存!');
|
||
} else {
|
||
alert('保存失败:' + data.message);
|
||
}
|
||
})
|
||
.catch(error => {
|
||
alert('保存出错:' + error.message);
|
||
});
|
||
}
|
||
|
||
// 更新爬虫日志
|
||
function updateCrawlLog(message) {
|
||
const log = document.getElementById('crawlLog');
|
||
const timestamp = new Date().toLocaleTimeString();
|
||
log.innerHTML += `[${timestamp}] ${message}\n`;
|
||
log.scrollTop = log.scrollHeight;
|
||
}
|
||
|
||
// WebSocket连接用于实时更新爬虫状态
|
||
const ws = new WebSocket(`ws://${window.location.host}/ws/spider-status`);
|
||
|
||
ws.onmessage = function(event) {
|
||
const data = JSON.parse(event.data);
|
||
if (data.type === 'progress') {
|
||
document.getElementById('crawlProgress').style.width = data.value + '%';
|
||
} else if (data.type === 'log') {
|
||
updateCrawlLog(data.message);
|
||
}
|
||
};
|
||
|
||
// AI配置生成
|
||
async function generateConfig() {
|
||
const prompt = document.getElementById('aiPrompt').value.trim();
|
||
if (!prompt) {
|
||
alert('请输入您的爬虫需求描述!');
|
||
return;
|
||
}
|
||
|
||
const aiResponse = document.getElementById('aiResponse');
|
||
const aiSuggestion = document.getElementById('aiSuggestion');
|
||
|
||
try {
|
||
const response = await fetch('/api/spider/ai-config', {
|
||
method: 'POST',
|
||
headers: {
|
||
'Content-Type': 'application/json'
|
||
},
|
||
body: JSON.stringify({ prompt })
|
||
});
|
||
|
||
const data = await response.json();
|
||
if (data.success) {
|
||
// 显示AI建议
|
||
aiSuggestion.textContent = data.suggestion;
|
||
aiResponse.style.display = 'block';
|
||
|
||
// 如果选择自动应用配置
|
||
if (document.getElementById('autoApply').checked) {
|
||
// 清除现有选择
|
||
selectedTopics.clear();
|
||
|
||
// 应用新的话题
|
||
data.config.topics.forEach(topic => {
|
||
selectedTopics.add(topic);
|
||
});
|
||
|
||
// 更新参数
|
||
document.getElementById('crawlDepth').value = data.config.parameters.crawlDepth;
|
||
document.getElementById('interval').value = data.config.parameters.interval;
|
||
document.getElementById('maxRetries').value = data.config.parameters.maxRetries;
|
||
document.getElementById('timeout').value = data.config.parameters.timeout;
|
||
|
||
// 更新UI
|
||
updateSelectedTopicsList();
|
||
|
||
// 添加提示
|
||
updateCrawlLog('AI配置已自动应用');
|
||
}
|
||
} else {
|
||
throw new Error(data.message);
|
||
}
|
||
} catch (error) {
|
||
aiSuggestion.textContent = '生成配置时出错:' + error.message;
|
||
aiResponse.style.display = 'block';
|
||
}
|
||
}
|
||
</script>
|
||
</body>
|
||
</html> |