Add AI-powered Spider Configuration Assistant.
This commit is contained in:
@@ -103,6 +103,39 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- AI配置助手 -->
|
||||
<div class="card mb-4">
|
||||
<div class="card-header">
|
||||
<h5 class="mb-0">
|
||||
<i class="fas fa-robot"></i> AI配置助手
|
||||
</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="mb-3">
|
||||
<label for="aiPrompt" class="form-label">用自然语言描述您的爬虫需求</label>
|
||||
<textarea class="form-control" id="aiPrompt" rows="3"
|
||||
placeholder="例如:我想爬取最近一周关于人工智能的热门微博,重点关注转发量超过1000的内容,每个话题爬取前5页内容。"></textarea>
|
||||
</div>
|
||||
<div class="d-flex justify-content-between align-items-center">
|
||||
<button class="btn btn-primary" onclick="generateConfig()">
|
||||
<i class="fas fa-magic"></i> 生成配置
|
||||
</button>
|
||||
<div class="form-check">
|
||||
<input class="form-check-input" type="checkbox" id="autoApply" checked>
|
||||
<label class="form-check-label" for="autoApply">
|
||||
自动应用生成的配置
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
<div id="aiResponse" class="mt-3" style="display: none;">
|
||||
<div class="alert alert-info">
|
||||
<h6 class="alert-heading">AI助手建议:</h6>
|
||||
<p id="aiSuggestion" class="mb-0"></p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 操作按钮 -->
|
||||
<div class="d-flex justify-content-between mb-5">
|
||||
<button class="btn btn-primary" onclick="startCrawling()">
|
||||
@@ -286,6 +319,63 @@
|
||||
updateCrawlLog(data.message);
|
||||
}
|
||||
};
|
||||
|
||||
// AI配置生成
|
||||
async function generateConfig() {
|
||||
const prompt = document.getElementById('aiPrompt').value.trim();
|
||||
if (!prompt) {
|
||||
alert('请输入您的爬虫需求描述!');
|
||||
return;
|
||||
}
|
||||
|
||||
const aiResponse = document.getElementById('aiResponse');
|
||||
const aiSuggestion = document.getElementById('aiSuggestion');
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/spider/ai-config', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
body: JSON.stringify({ prompt })
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
if (data.success) {
|
||||
// 显示AI建议
|
||||
aiSuggestion.textContent = data.suggestion;
|
||||
aiResponse.style.display = 'block';
|
||||
|
||||
// 如果选择自动应用配置
|
||||
if (document.getElementById('autoApply').checked) {
|
||||
// 清除现有选择
|
||||
selectedTopics.clear();
|
||||
|
||||
// 应用新的话题
|
||||
data.config.topics.forEach(topic => {
|
||||
selectedTopics.add(topic);
|
||||
});
|
||||
|
||||
// 更新参数
|
||||
document.getElementById('crawlDepth').value = data.config.parameters.crawlDepth;
|
||||
document.getElementById('interval').value = data.config.parameters.interval;
|
||||
document.getElementById('maxRetries').value = data.config.parameters.maxRetries;
|
||||
document.getElementById('timeout').value = data.config.parameters.timeout;
|
||||
|
||||
// 更新UI
|
||||
updateSelectedTopicsList();
|
||||
|
||||
// 添加提示
|
||||
updateCrawlLog('AI配置已自动应用');
|
||||
}
|
||||
} else {
|
||||
throw new Error(data.message);
|
||||
}
|
||||
} catch (error) {
|
||||
aiSuggestion.textContent = '生成配置时出错:' + error.message;
|
||||
aiResponse.style.display = 'block';
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
+131
-1
@@ -8,6 +8,8 @@ import asyncio
|
||||
import websockets
|
||||
import logging
|
||||
from spider.spiderData import SpiderData
|
||||
from openai import OpenAI
|
||||
from anthropic import Anthropic
|
||||
|
||||
# 创建蓝图
|
||||
spider_bp = Blueprint('spider', __name__)
|
||||
@@ -210,4 +212,132 @@ async def spider_status_socket():
|
||||
finally:
|
||||
websocket_connections.remove(websocket)
|
||||
except Exception as e:
|
||||
logger.error(f"WebSocket连接处理失败: {e}")
|
||||
logger.error(f"WebSocket连接处理失败: {e}")
|
||||
|
||||
def get_ai_client():
|
||||
"""获取可用的AI客户端"""
|
||||
# 按优先级尝试不同的AI服务
|
||||
if os.getenv('ANTHROPIC_API_KEY'):
|
||||
return {
|
||||
'type': 'anthropic',
|
||||
'client': Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
|
||||
}
|
||||
elif os.getenv('OPENAI_API_KEY'):
|
||||
return {
|
||||
'type': 'openai',
|
||||
'client': OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||
}
|
||||
else:
|
||||
raise ValueError("未找到可用的AI API密钥")
|
||||
|
||||
def parse_ai_response(response_text):
|
||||
"""解析AI响应中的JSON配置"""
|
||||
try:
|
||||
# 查找JSON内容
|
||||
start = response_text.find('{')
|
||||
end = response_text.rfind('}') + 1
|
||||
if start == -1 or end == 0:
|
||||
raise ValueError("未找到有效的JSON配置")
|
||||
|
||||
json_str = response_text[start:end]
|
||||
config = json.loads(json_str)
|
||||
|
||||
# 验证配置格式
|
||||
if not isinstance(config.get('topics'), list):
|
||||
raise ValueError("配置必须包含话题列表")
|
||||
|
||||
parameters = config.get('parameters', {})
|
||||
if not all(key in parameters for key in ['crawlDepth', 'interval', 'maxRetries', 'timeout']):
|
||||
raise ValueError("配置缺少必要的参数")
|
||||
|
||||
# 提取建议文本(JSON之前的部分)
|
||||
suggestion = response_text[:start].strip()
|
||||
|
||||
return config, suggestion
|
||||
except Exception as e:
|
||||
raise ValueError(f"解析AI响应失败: {str(e)}")
|
||||
|
||||
@spider_bp.route('/api/spider/ai-config', methods=['POST'])
|
||||
def generate_ai_config():
|
||||
"""使用AI生成爬虫配置"""
|
||||
try:
|
||||
prompt = request.json.get('prompt', '')
|
||||
if not prompt:
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': '请提供爬虫需求描述'
|
||||
})
|
||||
|
||||
# 构建AI提示
|
||||
system_prompt = """你是一个专业的爬虫配置助手。请根据用户的自然语言描述,生成合适的微博爬虫配置。
|
||||
配置应包含以下内容:
|
||||
1. 要爬取的话题列表
|
||||
2. 爬虫参数(爬取深度、间隔时间、重试次数、超时时间)
|
||||
|
||||
请先用通俗易懂的语言解释你的配置建议,然后在最后提供一个JSON格式的具体配置。
|
||||
注意:
|
||||
- 爬取深度(crawlDepth)范围:1-10页
|
||||
- 间隔时间(interval)范围:3-30秒
|
||||
- 重试次数(maxRetries)范围:1-5次
|
||||
- 超时时间(timeout)范围:10-60秒
|
||||
- 所有参数都必须是整数
|
||||
|
||||
示例输出格式:
|
||||
根据您的需求,我建议...
|
||||
|
||||
{
|
||||
"topics": ["话题1", "话题2"],
|
||||
"parameters": {
|
||||
"crawlDepth": 5,
|
||||
"interval": 5,
|
||||
"maxRetries": 3,
|
||||
"timeout": 30
|
||||
}
|
||||
}"""
|
||||
|
||||
# 获取AI客户端
|
||||
ai = get_ai_client()
|
||||
|
||||
try:
|
||||
if ai['type'] == 'anthropic':
|
||||
response = ai['client'].messages.create(
|
||||
model="claude-3-sonnet-20240229",
|
||||
max_tokens=1000,
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
)
|
||||
response_text = response.content[0].text
|
||||
else: # OpenAI
|
||||
response = ai['client'].chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
)
|
||||
response_text = response.choices[0].message.content
|
||||
|
||||
# 解析AI响应
|
||||
config, suggestion = parse_ai_response(response_text)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'config': config,
|
||||
'suggestion': suggestion
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"AI服务调用失败: {e}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': f"AI配置生成失败: {str(e)}"
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"生成配置失败: {e}")
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'message': str(e)
|
||||
})
|
||||
Reference in New Issue
Block a user