From b4f14ae3e7e8ee3e44c3e8713ab9a4d9c110e148 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=98=BF=E5=BD=ADBaileys?=
<142196225+apeng0406@users.noreply.github.com>
Date: Sun, 9 Mar 2025 19:09:00 +0800
Subject: [PATCH] Bilingual support, with full system support for Chinese and
English switching.
---
app.py | 18 +-
static/js/i18n.js | 239 +++
templates/404.html | 85 +-
templates/error.html | 89 +-
templates/spider_control.html | 760 +++----
views/page/__pycache__/page.cpython-38.pyc | Bin 0 -> 5041 bytes
views/page/page.py | 1096 +++++-----
views/page/templates/articleChar.html | 688 +++---
views/page/templates/articleContentCloud.html | 286 +--
views/page/templates/base_page.html | 981 ++++-----
views/page/templates/commentChar.html | 632 +++---
views/page/templates/hotWord.html | 1004 ++++-----
views/page/templates/index.html | 57 +-
views/page/templates/ipChar.html | 548 ++---
views/page/templates/tableData.html | 451 ++--
views/page/templates/yuqingChar.html | 800 +++----
views/page/templates/yuqingpredict.html | 1848 ++++++++---------
views/spider_control.py | 740 +++----
views/user/__pycache__/user.cpython-38.pyc | Bin 0 -> 2249 bytes
views/user/templates/login_and_register.html | 23 +
views/user/user.py | 744 +++----
21 files changed, 5725 insertions(+), 5364 deletions(-)
create mode 100644 static/js/i18n.js
create mode 100644 views/page/__pycache__/page.cpython-38.pyc
create mode 100644 views/user/__pycache__/user.cpython-38.pyc
diff --git a/app.py b/app.py
index 7b18720..8bf6bc3 100644
--- a/app.py
+++ b/app.py
@@ -142,15 +142,27 @@ def not_found_error(error):
@app.errorhandler(500)
def internal_error(error):
- return render_template('500.html'), 500
+ return render_template('error.html',
+ error_code=500,
+ error_title='服务器错误',
+ error_message='服务器遇到了一个问题,请稍后再试。',
+ error_i18n_key='serverError'), 500
@app.errorhandler(403)
def forbidden_error(error):
- return render_template('403.html'), 403
+ return render_template('error.html',
+ error_code=403,
+ error_title='禁止访问',
+ error_message='您没有权限访问此页面。',
+ error_i18n_key='forbidden'), 403
@app.errorhandler(400)
def bad_request_error(error):
- return render_template('400.html'), 400
+ return render_template('error.html',
+ error_code=400,
+ error_title='错误请求',
+ error_message='服务器无法理解您的请求。',
+ error_i18n_key='badRequest'), 400
# 数据库配置
DB_CONFIG = {
diff --git a/static/js/i18n.js b/static/js/i18n.js
new file mode 100644
index 0000000..db91b8c
--- /dev/null
+++ b/static/js/i18n.js
@@ -0,0 +1,239 @@
+// 多语言支持文件
+const translations = {
+ 'zh': {
+ // 导航菜单
+ 'home': '首页',
+ 'hotWord': '热词统计',
+ 'tableData': '微博舆情统计',
+ 'articleChar': '文章分析',
+ 'ipChar': 'IP分析',
+ 'commentChar': '评论分析',
+ 'yuqingChar': '舆情分析',
+ 'yuqingpredict': '舆情预测',
+ 'articleCloud': '文章内容词云图',
+ 'dataVisualization': '数据可视化',
+ 'weiboSystem': '微博舆情分析系统',
+ 'wordCloud': '词云图',
+
+ // 首页
+ 'articleCount': '文章个数',
+ 'articleCrawlRule': '文章爬取规则',
+ 'nextCrawlTime': '下次爬取时间',
+ 'articlePublishTimeCount': '文章发布时间个数',
+ 'commentLikeCountTopFore': '评论点赞量 Top Fore',
+ 'viewAll': '查看全部',
+ 'articleTypeRatio': '文章类型占比',
+ 'commentUserWordCloud': '评论用户名词云图',
+ 'commentUserTimeRatio': '评论用户时间占比',
+
+ // 热词页面
+ 'hotWordStatistics': '热词统计页',
+ 'hotWordCloud': '热词词云图',
+ 'hotWordRanking': '热词查询表格',
+ 'wordFrequency': '词频',
+ 'hotWordSelection': '热词选择',
+ 'hotWordName': '热词名称',
+ 'occurrenceCount': '出现次数',
+ 'hotWordSentiment': '热词情感',
+ 'hotWordYearTrend': '热词年份变化趋势',
+ 'queryCommentsByHotWord': '根据选择的热词从而查询出评论数据',
+ 'hotWordTimeDistribution': '热词出现时间分布个数',
+
+ // 舆情分析页面
+ 'hotWordSentimentTrendBar': '热词情感趋势柱状图',
+ 'hotWordSentimentTrendTree': '热词情感趋势树形图',
+ 'articleCommentSentimentTrendPie': '文章内容与评论内容舆情趋势饼状图',
+
+ // 舆情预测页面
+ 'topicStatisticsPage': '话题统计页',
+
+ // 文章分析页面
+ 'articleCharPage': '文章分析页',
+ 'typeSelection': '类型选择',
+ 'articleLikeAnalysis': '文章点赞量分析 👍',
+ 'articleCommentAnalysis': '文章评论量分析 🔥',
+ 'articleForwardAnalysis': '文章转发量分析 🥇',
+ 'likeRangeStatistics': '点赞区间统计',
+ 'rangeCount': '区间个数',
+
+ // 评论分析页面
+ 'commentLikeRangeChart': '评论点赞次数区间图',
+ 'commentUserGenderRatio': '评论用户性别占比',
+ 'userCommentWordCloud': '用户评论词云图',
+
+ // IP分析页面
+ 'articleIpLocationAnalysis': '文章IP位置分析图',
+ 'commentIpLocationAnalysis': '评论IP位置分析图',
+
+ // 评论相关
+ 'commentUser': '评论用户',
+ 'commentGender': '评论性别',
+ 'commentAddress': '评论地址',
+ 'commentContent': '评论内容',
+ 'likeCount': '点赞量',
+
+ // 微博舆情统计页面
+ 'weiboArticleStatTable': '微博文章统计表格 - 舆情 情感分类',
+ 'sentimentClassification': '情感分类',
+ 'articleId': '文章ID',
+ 'articleIp': '文章IP',
+ 'articleTitle': '文章标题',
+ 'articleLike': '点赞量',
+ 'articleForward': '转发量',
+ 'articleComment': '评论量',
+ 'articleType': '类型',
+ 'articleContent': '内容',
+ 'articleTime': '发布时间',
+
+ // 通用
+ 'switchToEnglish': '切换到英文',
+ 'switchToChinese': '切换到中文',
+ 'semester': '网安小学期',
+
+ // 错误页面
+ 'pageNotFound': '页面未找到',
+ 'backToHome': '返回首页',
+ 'serverError': '服务器错误',
+ 'forbidden': '禁止访问',
+ 'badRequest': '错误请求'
+ },
+ 'en': {
+ // Navigation menu
+ 'home': 'Home',
+ 'hotWord': 'Hot Words',
+ 'tableData': 'Weibo Public Opinion Stats',
+ 'articleChar': 'Article Analysis',
+ 'ipChar': 'IP Analysis',
+ 'commentChar': 'Comment Analysis',
+ 'yuqingChar': 'Public Opinion Analysis',
+ 'yuqingpredict': 'Opinion Prediction',
+ 'articleCloud': 'Article Content Word Cloud',
+ 'dataVisualization': 'Data Visualization',
+ 'weiboSystem': 'Weibo Public Opinion Analysis System',
+ 'wordCloud': 'Word Cloud',
+
+ // Home page
+ 'articleCount': 'Article Count',
+ 'articleCrawlRule': 'Article Crawl Rule',
+ 'nextCrawlTime': 'Next Crawl Time',
+ 'articlePublishTimeCount': 'Article Publish Time Count',
+ 'commentLikeCountTopFore': 'Comment Like Count Top Four',
+ 'viewAll': 'View All',
+ 'articleTypeRatio': 'Article Type Ratio',
+ 'commentUserWordCloud': 'Comment User Word Cloud',
+ 'commentUserTimeRatio': 'Comment User Time Ratio',
+
+ // Hot word page
+ 'hotWordStatistics': 'Hot Word Statistics',
+ 'hotWordCloud': 'Hot Word Cloud',
+ 'hotWordRanking': 'Hot Word Ranking',
+ 'wordFrequency': 'Word Frequency',
+ 'hotWordSelection': 'Hot Word Selection',
+ 'hotWordName': 'Hot Word Name',
+ 'occurrenceCount': 'Occurrence Count',
+ 'hotWordSentiment': 'Hot Word Sentiment',
+ 'hotWordYearTrend': 'Hot Word Year Trend',
+ 'queryCommentsByHotWord': 'Query comments based on selected hot word',
+ 'hotWordTimeDistribution': 'Hot Word Time Distribution Count',
+
+ // Public opinion analysis page
+ 'hotWordSentimentTrendBar': 'Hot Word Sentiment Trend Bar Chart',
+ 'hotWordSentimentTrendTree': 'Hot Word Sentiment Trend Tree Chart',
+ 'articleCommentSentimentTrendPie': 'Article and Comment Sentiment Trend Pie Chart',
+
+ // Opinion prediction page
+ 'topicStatisticsPage': 'Topic Statistics Page',
+
+ // Article analysis page
+ 'articleCharPage': 'Article Analysis Page',
+ 'typeSelection': 'Type Selection',
+ 'articleLikeAnalysis': 'Article Like Analysis 👍',
+ 'articleCommentAnalysis': 'Article Comment Analysis 🔥',
+ 'articleForwardAnalysis': 'Article Forward Analysis 🥇',
+ 'likeRangeStatistics': 'Like Range Statistics',
+ 'rangeCount': 'Range Count',
+
+ // Comment analysis page
+ 'commentLikeRangeChart': 'Comment Like Range Chart',
+ 'commentUserGenderRatio': 'Comment User Gender Ratio',
+ 'userCommentWordCloud': 'User Comment Word Cloud',
+
+ // IP analysis page
+ 'articleIpLocationAnalysis': 'Article IP Location Analysis',
+ 'commentIpLocationAnalysis': 'Comment IP Location Analysis',
+
+ // Comment related
+ 'commentUser': 'Comment User',
+ 'commentGender': 'Gender',
+ 'commentAddress': 'Address',
+ 'commentContent': 'Content',
+ 'likeCount': 'Likes',
+
+ // Weibo public opinion stats page
+ 'weiboArticleStatTable': 'Weibo Article Statistics Table - Sentiment Classification',
+ 'sentimentClassification': 'Sentiment Classification',
+ 'articleId': 'Article ID',
+ 'articleIp': 'Article IP',
+ 'articleTitle': 'Article Title',
+ 'articleLike': 'Likes',
+ 'articleForward': 'Forwards',
+ 'articleComment': 'Comments',
+ 'articleType': 'Type',
+ 'articleContent': 'Content',
+ 'articleTime': 'Publish Time',
+
+ // Common
+ 'switchToEnglish': 'Switch to English',
+ 'switchToChinese': 'Switch to Chinese',
+ 'semester': 'Network Security Semester',
+
+ // Error pages
+ 'pageNotFound': 'Page Not Found',
+ 'backToHome': 'Back to Home',
+ 'serverError': 'Server Error',
+ 'forbidden': 'Forbidden',
+ 'badRequest': 'Bad Request'
+ }
+};
+
+// 获取当前语言
+function getCurrentLanguage() {
+ return localStorage.getItem('language') || 'zh';
+}
+
+// 设置语言
+function setLanguage(lang) {
+ localStorage.setItem('language', lang);
+ location.reload();
+}
+
+// 翻译函数
+function t(key) {
+ const lang = getCurrentLanguage();
+ return translations[lang][key] || key;
+}
+
+// 页面加载时应用翻译
+document.addEventListener('DOMContentLoaded', function() {
+ // 应用当前语言
+ applyTranslations();
+
+ // 添加语言切换按钮事件
+ const langSwitcher = document.getElementById('language-switcher');
+ if (langSwitcher) {
+ langSwitcher.addEventListener('click', function() {
+ const currentLang = getCurrentLanguage();
+ const newLang = currentLang === 'zh' ? 'en' : 'zh';
+ setLanguage(newLang);
+ });
+ }
+});
+
+// 应用翻译到页面元素
+function applyTranslations() {
+ const elements = document.querySelectorAll('[data-i18n]');
+ elements.forEach(el => {
+ const key = el.getAttribute('data-i18n');
+ el.textContent = t(key);
+ });
+}
\ No newline at end of file
diff --git a/templates/404.html b/templates/404.html
index 4fb4dd5..5357503 100644
--- a/templates/404.html
+++ b/templates/404.html
@@ -1,29 +1,58 @@
-
-
-
-
-
- 404页面
-
-
-
-
-
-
-
-
-
-
-

-

-
噢!该页面没有找到..
-
本次请求没有任何反应.
-
回到登录页
-
-
-
-
-
- /
-
+
+
+
+
+
+ 404 - 页面未找到
+
+
+
+
+
+
+
+
404
+
页面未找到
+
您请求的页面不存在或已被移除。
+
返回首页
+
+
+
+
+
\ No newline at end of file
diff --git a/templates/error.html b/templates/error.html
index 7275342..79f366d 100644
--- a/templates/error.html
+++ b/templates/error.html
@@ -1,33 +1,58 @@
-
-
-
-
-
- 错误页面
-
-
-
-
-
-
-
-
-
-
-
-
-

-
{{ errorMsg }}
-
请回去再次检查问题并且修改问题.
-
回到登录页
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+ {{ error_code }} - {{ error_title }}
+
+
+
+
+
+
+
+
{{ error_code }}
+
{{ error_title }}
+
{{ error_message }}
+
返回首页
+
+
+
+
+
\ No newline at end of file
diff --git a/templates/spider_control.html b/templates/spider_control.html
index af29ce7..37aeefe 100644
--- a/templates/spider_control.html
+++ b/templates/spider_control.html
@@ -1,381 +1,381 @@
-
-
-
-
-
- 爬虫控制面板
-
-
-
-
-
-
-
爬虫控制面板
-
-
-
-
-
-
-
-
-
-
-
-
-
- 每个话题爬取的页数(1-10)
-
-
-
-
-
-
- 每次请求之间的间隔时间
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+ 爬虫控制面板
+
+
+
+
+
+
+
爬虫控制面板
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 每个话题爬取的页数(1-10)
+
+
+
+
+
+
+ 每次请求之间的间隔时间
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/views/page/__pycache__/page.cpython-38.pyc b/views/page/__pycache__/page.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eae39e81bbd6e0d56dfa87cb627217d83a0ecc61
GIT binary patch
literal 5041
zcma)A-ESOM6`z@%nVtRc_eX5UPSWpgfjCJ^3njF1oSGDjYZ_OjBMWQ2_u3wN_M>-Z
z8}Ditkb?S9DX+W%2}nR6`o;tA{1^8DA%P1015m4qNSt%#&euAXy4IYz_uS9p@
zT)#6mR@UIR^Y;%Ae!Hw`|HMl6XG7%%Ji!fJ)0oC|Uu){F-ZWgJS#S$Y(>0rxYc*}x
z*0J62i%rLMs9f+%JE$x8W%aJm+w`l=F?Xyv?vAT|6RLeuy{G7HMg4S*Tg>LutjHWz
zx^KEOeCEL{D?ij(g;js0-P7GU(u{#-oK28so-~u7nPStVIY*iq(9E(q(kzf>9yI6J
z0%;aWvk00ccAhj#q*(^d3cEm>^Q5^5noH~n(kzqaNzgpSo+ix-X`TViv+Oz2Twu@N
z(Q38N5Zjt9tqs2(oXBFpgTQOIcY#>HO9n;pL%a_I=pJi!~GEZy|Gyd%6;D2sx>
z-{nDA(`E5at9^HCa~q2K3pGQIAM$X`_iwkuciVymvKrNQ+8wVE)O3MXmS(4Z$otkR
zzGNw0D}dqUuJHFdf**LNaw2ZnJ81i`DHw3nZt@0hJ^q!TP=#k5o?r_^s6Eh`{!kxk
zp>a>!Ee!RcF)W0Id)m+pP0$!aYiNfS)Qh1F(g};q7-$21UJ;
zWo}8Q8*tI8H+jG80XCNeFxZSc0x}zI!KHP2s~*;+)uUG_>~;A15zH>f8iIo*)ufn7ccHEJfR1}>t-9X}yx>kR05a@;;PJb`F0zKit%Rs!_&!kU21-OfxxQg_RPb+C)`{mG75;rj;$CqX6Hi$)fr;sr?(O@cYN>|3&xtufKls
z=`a68#ZN!^!)L$wJr)1+m)}A0%c>eg?Ect^*j-V|O)m)LIO7NPt`CGGBL;GRf)4=5u3;XSmZ}U5xbdnaTvH`L^PTi&XOWBRky+1Ii4cZ
zg@Z%OxU5I#$(@A>6zw-WKjebCWjXpBF8@4=8j2+p=TW=>LRufx!M}-bpaOXsi4`ne
z1c8Htmq8%KY~2Bx;UB(DpqoQF)SL8-5bq0`qhCVWkt@vc4!k@Xb3`FPZ|)y%JX*YizmrINQ?T$O%Q86jyMC
zt0=Ca$Q$@^mX%Iyk2hdI@GOYPys>f1x@n?G&zSSu(6e7&4eR?pN22BIiQkL|ZXF)t
zOlnVNfEO{23(A5Qz$56PiCzUQNH#LS+V*`DPPMYkhBOcS`r%H0JXtP^L50{h@h)UV
z*#@HeVa?8fk0*_ACm+8P4?u%P5Jq!}?_j@IP>h@b)cp*e09`wRWa?Ed!}Uirs$X4I
z0TDw)4wPJ^?!X)=C>W`SdPH)7AtAN6l9pqF1GNhf$;r8@SROmrBE)isnF-C)SZ+})
zw=+bSCI;C4WM;Ryxb9#x(LIKA2NWs2rG0u8?Jn(}dY@iXb$Vx+?-JG3eCY{rXMoiz
z4ky`wJC`bxAPxs4)Aw!OX+!=1Cob#UUIj$O_fVv?zmC=Kqj(L)>nQSOm6Z$t$IzZR
z1K`@U=p3Mn8#w{G@wvDG{rkmLuXD!MxdVWwYSh-0k3r~+GcU-tknm5;s8yyjOcUg9RE1PaiphZ$crD1#lRTKIW%|6pujx5V10dOlGE!h-yX=
zKMl7KK{0jT_Q)aSkeEvSCj$3Yi@PJLoxAObPHNheM7Zx!Oe&e(d<5+QCSV8*hQ@>|
z{}Z=B+pnzly6=0f!!yX_!X9=VK_-*CM*&Uc?>2-pD`rlbvjbw73dJ0nNLP&jQ$ZpH
zthn!ShG*DOg{w*Fnn=!0){PRS-s}EEeNBudrXG1M!0FDP)gB4XBbe1@QkXFSi47D#
zK#?pz3Vk{KtY1UN0M9cbUj^*ZDUY1fFT`zV?~f}Rz|D$zjqr?PPO^^&&^vF2*fFbY
zH+9Zmk$sG0yX2gYog~|3xolVDvfZCkwp&u!Zux6vyRH@G0heIo;t3RZ4-ikGcp5~1
zIs#06IZ;8NOR7~et^zMoaBHQh(5k7I7B50yN=Kcug10Fg!W=7_iEyMw&jTeA$9Vd+ZwoVSJR!Cuvoa_%_(ot4d<5_U{
z-*9Q%1Ad_R5xlo*7o`O`P;ep)s75nl4`HFOQJ{}?E8WoZgUiic%kR(?!JUgVU|+l_
zV|t19au+h{{~;vg{HjMB0jSi+D)&XrELK${Xy8CWUK= 100: # 每分钟100次请求限制
- return jsonify({'error': 'Too many requests'}), 429
- pipe = redis_client.pipeline()
- pipe.incr(key)
- pipe.expire(key, 60) # 60秒后重置
- pipe.execute()
- return f(*args, **kwargs)
- return decorated_function
-
-# 设置设备
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-# 设置模型路径
-model_save_path = 'model_pro/final_model.pt'
-bert_model_path = 'model_pro/bert_model'
-ctm_tokenizer_path = 'model_pro/sentence_bert_model'
-
-# 初始化模型
-try:
- model_manager.load_models(model_save_path, bert_model_path, ctm_tokenizer_path)
-except Exception as e:
- logging.error(f"模型加载失败: {e}")
-
-# 数据库配置
-DATABASE_URL = os.getenv('DATABASE_URL', "sqlite:///ai_analysis.db")
-engine = create_engine(DATABASE_URL)
-AIAnalysis.metadata.create_all(engine)
-
-def predict_sentiment(text):
- """使用改进版模型预测单个文本的情感"""
- try:
- if not text or len(text.strip()) == 0:
- return None, None
-
- # 清理输入
- cleaned_text = sanitize_input(text)
- if not cleaned_text:
- return None, None
-
- predictions, probabilities = model_manager.predict_batch([cleaned_text])
- if predictions is not None and len(predictions) > 0:
- return predictions[0], probabilities[0][predictions[0]]
- return None, None
- except Exception as e:
- logging.error(f"预测过程中出现错误: {e}")
- return None, None
-
-@pb.route('/home')
-@login_required
-def home():
- try:
- username = session.get('username')
- articleLenMax, likeCountMaxAuthorName, cityMax = getHomeTagsData()
- commentsLikeCountTopFore = getHomeCommentsLikeCountTopFore()
- X, Y = getHomeArticleCreatedAtChart()
- typeChart = getHomeTypeChart()
- createAtChart = getHomeCommentCreatedChart()
-
- return render_template('index.html',
- username=username,
- articleLenMax=articleLenMax,
- likeCountMaxAuthorName=likeCountMaxAuthorName,
- cityMax=cityMax,
- commentsLikeCountTopFore=commentsLikeCountTopFore,
- xData=X,
- yData=Y,
- typeChart=typeChart,
- createAtChart=createAtChart)
- except Exception as e:
- logging.error(f"加载首页时发生错误: {e}")
- return render_template('error.html', error_message="加载首页失败")
-
-@pb.route('/hotWord')
-@login_required
-def hotWord():
- try:
- username = session.get('username')
- hotWordList = getAllHotWords()
- if not hotWordList:
- return render_template('error.html', error_message="无法获取热词列表")
-
- defaultHotWord = sanitize_input(request.args.get('hotWord', hotWordList[0][0]))
-
- # 验证热词是否在列表中
- if not any(defaultHotWord in word for word in hotWordList):
- return abort(400, "无效的热词")
-
- hotWordLen = getHotWordLen(defaultHotWord)
- X, Y = getHotWordPageCreatedAtCharData(defaultHotWord)
-
- value = SnowNLP(defaultHotWord).sentiments
- if value == 0.5:
- sentences = '中性'
- elif value > 0.5:
- sentences = '正面'
- elif value < 0.5:
- sentences = '负面'
-
- comments = getCommentFilterData(defaultHotWord)
-
- return render_template('hotWord.html',
- username=username,
- hotWordList=hotWordList,
- defaultHotWord=defaultHotWord,
- hotWordLen=hotWordLen,
- sentences=sentences,
- xData=X,
- yData=Y,
- comments=comments)
- except Exception as e:
- logging.error(f"加载热词页面时发生错误: {e}")
- return render_template('error.html', error_message="加载热词页面失败")
-
-@pb.route('/hotTopic')
-def hotTopic():
- username = session.get('username')
- topicList = getAllTopics()
- defaultTopic = topicList[0][0]
- if request.args.get('topic'):
- defaultTopic = request.args.get('topic')
- topicLen = getTopicLen(defaultTopic)
- X, Y = getTopicPageCreatedAtCharData()
- sentences = ''
-
- # ... 这里要嵌入 topic 相关内容(热度?)来填充 sentences
-
- comments = getCommentFilterDataTopic(defaultTopic)
- return render_template('hotWord.html',
- username=username,
- topicList=topicList,
- defaultTopic=defaultTopic,
- topicLen=topicLen,
- sentences=sentences,
- xData=X,
- yData=Y,
- comments=comments)
-
-@pb.route('/tableData')
-@login_required
-def tableData():
- try:
- username = session.get('username')
- defaultFlag = bool(request.args.get('flag', False))
- tableData = getTableDataList(defaultFlag)
-
- return render_template('tableData.html',
- username=username,
- tableData=tableData,
- defaultFlag=defaultFlag)
- except Exception as e:
- logging.error(f"加载表格数据时发生错误: {e}")
- return render_template('error.html', error_message="加载表格数据失败")
-
-@pb.route('/articleChar')
-def articleChar():
- username = session.get('username')
- typeList = getTypeList()
- defaultType = typeList[0]
- if request.args.get('type'): defaultType = request.args.get('type')
- X, Y = getArticleLikeCount(defaultType)
- x1Data, y1Data = getArticleCommentsLen(defaultType)
- x2Data, y2Data = getArticleRepotsLen(defaultType)
- return render_template('articleChar.html',
- username=username,
- typeList=typeList,
- defaultType=defaultType,
- xData=X,
- yData=Y,
- x1Data=x1Data,
- y1Data=y1Data,
- x2Data=x2Data,
- y2Data=y2Data)
-
-@pb.route('/ipChar')
-@login_required
-def ipChar():
- try:
- username = session.get('username')
- articleRegionData = getIPByArticleRegion()
- commentRegionData = getIPByCommentsRegion()
-
- return render_template('ipChar.html',
- username=username,
- articleRegionData=articleRegionData,
- commentRegionData=commentRegionData)
- except Exception as e:
- logging.error(f"加载IP统计时发生错误: {e}")
- return render_template('error.html', error_message="加载IP统计失败")
-
-@pb.route('/commentChar')
-@login_required
-def commentChar():
- try:
- username = session.get('username')
- X, Y = getCommentDataOne()
- genderPieData = getCommentDataTwo()
-
- return render_template('commentChar.html',
- username=username,
- xData=X,
- yData=Y,
- genderPieData=genderPieData)
- except Exception as e:
- logging.error(f"加载评论统计时发生错误: {e}")
- return render_template('error.html', error_message="加载评论统计失败")
-
-@pb.route('/yuqingChar')
-@login_required
-def yuqingChar():
- try:
- username = session.get('username')
- model_type = sanitize_input(request.args.get('model', 'pro'))
-
- # 验证模型类型
- if model_type not in ['pro', 'basic']:
- return abort(400, "无效的模型类型")
-
- X, Y, biedata = getYuQingCharDataOne()
- biedata1, biedata2 = getYuQingCharDataTwo(model_type)
- x1Data, y1Data = getYuQingCharDataThree()
-
- return render_template('yuqingChar.html',
- username=username,
- xData=X,
- yData=Y,
- biedata=biedata,
- biedata1=biedata1,
- biedata2=biedata2,
- x1Data=x1Data,
- y1Data=y1Data,
- model_type=model_type)
- except Exception as e:
- logging.error(f"加载舆情统计时发生错误: {e}")
- return render_template('error.html', error_message="加载舆情统计失败")
-
-@pb.route('/yuqingpredict')
-@login_required
-def yuqingpredict():
- try:
- username = session.get('username')
- TopicList = getAllTopicData()
- if not TopicList:
- return render_template('error.html', error_message="无法获取话题列表")
-
- defaultTopic = sanitize_input(request.args.get('Topic', TopicList[0][0]))
-
- # 验证话题是否在列表中
- if not any(defaultTopic in topic for topic in TopicList):
- return abort(400, "无效的话题")
-
- TopicLen = getTopicLen(defaultTopic)
- X, Y = getTopicCreatedAtandpredictData(defaultTopic)
-
- model_type = sanitize_input(request.args.get('model', 'pro'))
- if model_type not in ['pro', 'basic']:
- return abort(400, "无效的模型类型")
-
- # 尝试从缓存获取预测结果
- cache_key = f"{defaultTopic}_{model_type}"
- cached_result = prediction_cache.get(cache_key)
-
- if cached_result is not None:
- sentences = cached_result
- else:
- if model_type == 'basic':
- value = SnowNLP(defaultTopic).sentiments
- if value == 0.5:
- sentences = '中性'
- elif value > 0.5:
- sentences = '正面'
- elif value < 0.5:
- sentences = '负面'
- else:
- predicted_label, confidence = predict_sentiment(defaultTopic)
- if predicted_label is not None:
- sentences = '良好' if predicted_label == 0 else '不良'
- sentences = f"{sentences} (置信度: {confidence:.2%})"
- else:
- sentences = '预测失败,请稍后重试'
- logging.error(f"预测失败,话题: {defaultTopic}")
-
- # 将结果存入缓存
- prediction_cache.set(cache_key, sentences)
-
- comments = getCommentFilterDataTopic(defaultTopic)
-
- return render_template('yuqingpredict.html',
- username=username,
- TopicList=TopicList,
- defaultTopic=defaultTopic,
- TopicLen=TopicLen,
- sentences=sentences,
- xData=X,
- yData=Y,
- comments=comments,
- model_type=model_type)
- except Exception as e:
- logging.error(f"加载舆情预测时发生错误: {e}")
- return render_template('error.html', error_message="加载舆情预测失败")
-
-@pb.route('/articleCloud')
-@login_required
-def articleCloud():
- try:
- username = session.get('username')
- return render_template('articleContentCloud.html', username=username)
- except Exception as e:
- logging.error(f"加载文章云图时发生错误: {e}")
- return render_template('error.html', error_message="加载文章云图失败")
-
-@pb.route('/page/index')
-def index():
- """首页路由"""
- try:
- hotWordList = getAllHotWords()
- logging.info("成功获取热词列表")
- return render_template('index.html', hotWordList=hotWordList)
- except Exception as e:
- logging.error(f"渲染首页时发生错误: {e}")
- return render_template('error.html', error_message="加载首页失败")
-
-@pb.route('/page/article/')
-def article(type):
- """文章列表页路由"""
- try:
- articleList = getArticleByType(type)
- logging.info(f"成功获取类型为 {type} 的文章列表")
- return render_template('article.html', articleList=articleList)
- except Exception as e:
- logging.error(f"获取文章列表时发生错误: {e}")
- return render_template('error.html', error_message="加载文章列表失败")
-
-@pb.route('/page/articleChar/')
-def articleChar(id):
- """文章详情页路由"""
- try:
- article = getArticleById(id)
- if not article:
- logging.warning(f"未找到ID为 {id} 的文章")
- return render_template('error.html', error_message="文章不存在")
- logging.info(f"成功获取ID为 {id} 的文章详情")
- return render_template('articleChar.html', article=article)
- except Exception as e:
- logging.error(f"获取文章详情时发生错误: {e}")
- return render_template('error.html', error_message="加载文章详情失败")
-
-@pb.route('/api/analyze_messages', methods=['POST'])
-@api_login_required
-@rate_limit
-async def analyze_messages():
- try:
- if not validate_csrf_token():
- return jsonify({'error': 'Invalid CSRF token'}), 403
-
- data = request.get_json()
- if not data:
- return jsonify({'error': 'No data provided'}), 400
-
- batch_size = min(int(data.get('batch_size', 50)), 100) # 限制批量大小
- model_type = sanitize_input(data.get('model_type', 'gpt-3.5-turbo'))
- analysis_depth = sanitize_input(data.get('analysis_depth', 'standard'))
-
- # 验证参数
- if model_type not in ['gpt-3.5-turbo', 'gpt-4']:
- return jsonify({'error': 'Invalid model type'}), 400
-
- if analysis_depth not in ['basic', 'standard', 'deep']:
- return jsonify({'error': 'Invalid analysis depth'}), 400
-
- messages = getRecentMessages(batch_size)
- if not messages:
- return jsonify({
- 'success': False,
- 'error': '没有找到需要分析的消息'
- }), 404
-
- analysis_results = await ai_analyzer.analyze_messages(
- messages=messages,
- batch_size=batch_size,
- model_type=model_type,
- analysis_depth=analysis_depth
- )
-
- if not analysis_results:
- return jsonify({
- 'success': False,
- 'error': '分析过程中出现错误'
- }), 500
-
- try:
- with Session(engine) as session:
- for result in analysis_results:
- analysis = AIAnalysis(
- message_id=result['message_id'],
- sentiment=result['sentiment'],
- sentiment_score=float(result['sentiment_score']),
- keywords=result['keywords'],
- key_points=result['key_points'],
- influence_analysis=result['influence_analysis'],
- risk_level=result['risk_level']
- )
- session.add(analysis)
- session.commit()
- except Exception as e:
- logging.error(f"保存分析结果时出错: {e}")
- return jsonify({
- 'success': False,
- 'error': '保存分析结果失败'
- }), 500
-
- display_results = [
- ai_analyzer.format_analysis_for_display(result)
- for result in analysis_results
- ]
-
- return jsonify({
- 'success': True,
- 'data': display_results
- })
-
- except Exception as e:
- logging.error(f"分析消息时发生错误: {e}")
- return jsonify({
- 'success': False,
- 'error': str(e)
- }), 500
-
-@pb.route('/api/get_analysis/')
-@api_login_required
-@rate_limit
-def get_message_analysis(message_id):
- """获取特定消息的分析结果"""
- try:
- if not message_id or message_id < 1:
- return jsonify({'error': 'Invalid message ID'}), 400
-
- with Session(engine) as session:
- analysis = session.query(AIAnalysis)\
- .filter(AIAnalysis.message_id == message_id)\
- .order_by(AIAnalysis.created_at.desc())\
- .first()
-
- if analysis:
- return jsonify({
- 'success': True,
- 'data': analysis.to_dict()
- })
- else:
- return jsonify({
- 'success': False,
- 'error': '未找到分析结果'
- }), 404
-
- except Exception as e:
- logging.error(f"获取分析结果时出错: {e}")
- return jsonify({
- 'success': False,
- 'error': str(e)
- }), 500
-
-def getRecentMessages(limit=50):
- """获取最近的消息"""
- # 这里需要根据你的数据库结构实现具体的查询逻辑
- messages = []
- try:
- # 示例查询逻辑
- with Session(engine) as session:
- results = session.execute(
- """
- SELECT id, content
- FROM comments
- ORDER BY created_at DESC
- LIMIT :limit
- """,
- {'limit': limit}
- ).fetchall()
-
- messages = [
- {'id': row[0], 'content': row[1]}
- for row in results
- ]
- except Exception as e:
- logging.error(f"获取最近消息时出错: {e}")
-
- return messages
+from flask import Flask, session, render_template, redirect, Blueprint, request, jsonify, abort
+from utils.mynlp import SnowNLP
+from utils.getHomePageData import *
+from utils.getHotWordPageData import *
+from utils.getTableData import *
+from utils.getPublicData import getAllHotWords, getAllTopics, getArticleByType, getArticleById
+from utils.getEchartsData import *
+from utils.getTopicPageData import *
+from utils.yuqingpredict import *
+from utils.logger import app_logger as logging
+from utils.cache_manager import prediction_cache
+from utils.ai_analyzer import ai_analyzer
+from utils.ai_analysis import AIAnalysis
+from sqlalchemy.orm import Session
+from sqlalchemy import create_engine
+import asyncio
+import torch
+from BCAT_front.predict import model_manager
+from functools import wraps
+import bleach
+import re
+from datetime import datetime, timedelta
+
+pb = Blueprint('page',
+ __name__,
+ url_prefix='/page',
+ template_folder='templates')
+
+def sanitize_input(text):
+ """清理用户输入,防止XSS攻击"""
+ if text is None:
+ return None
+ return bleach.clean(str(text), strip=True)
+
+def validate_csrf_token():
+ """验证CSRF令牌"""
+ token = request.form.get('csrf_token')
+ stored_token = session.get('csrf_token')
+ if not token or not stored_token or token != stored_token:
+ return False
+ return True
+
+def login_required(f):
+ @wraps(f)
+ def decorated_function(*args, **kwargs):
+ if 'username' not in session:
+ return redirect('/user/login')
+ return f(*args, **kwargs)
+ return decorated_function
+
+def api_login_required(f):
+ @wraps(f)
+ def decorated_function(*args, **kwargs):
+ if 'username' not in session:
+ return jsonify({'error': 'Unauthorized'}), 401
+ return f(*args, **kwargs)
+ return decorated_function
+
+def rate_limit(f):
+ @wraps(f)
+ def decorated_function(*args, **kwargs):
+ key = f"rate_limit:{request.remote_addr}:{f.__name__}"
+ current = int(redis_client.get(key) or 0)
+ if current >= 100: # 每分钟100次请求限制
+ return jsonify({'error': 'Too many requests'}), 429
+ pipe = redis_client.pipeline()
+ pipe.incr(key)
+ pipe.expire(key, 60) # 60秒后重置
+ pipe.execute()
+ return f(*args, **kwargs)
+ return decorated_function
+
+# 设置设备
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# 设置模型路径
+model_save_path = 'model_pro/final_model.pt'
+bert_model_path = 'model_pro/bert_model'
+ctm_tokenizer_path = 'model_pro/sentence_bert_model'
+
+# 初始化模型
+try:
+ model_manager.load_models(model_save_path, bert_model_path, ctm_tokenizer_path)
+except Exception as e:
+ logging.error(f"模型加载失败: {e}")
+
+# 数据库配置
+DATABASE_URL = os.getenv('DATABASE_URL', "sqlite:///ai_analysis.db")
+engine = create_engine(DATABASE_URL)
+AIAnalysis.metadata.create_all(engine)
+
+def predict_sentiment(text):
+ """使用改进版模型预测单个文本的情感"""
+ try:
+ if not text or len(text.strip()) == 0:
+ return None, None
+
+ # 清理输入
+ cleaned_text = sanitize_input(text)
+ if not cleaned_text:
+ return None, None
+
+ predictions, probabilities = model_manager.predict_batch([cleaned_text])
+ if predictions is not None and len(predictions) > 0:
+ return predictions[0], probabilities[0][predictions[0]]
+ return None, None
+ except Exception as e:
+ logging.error(f"预测过程中出现错误: {e}")
+ return None, None
+
+@pb.route('/home')
+@login_required
+def home():
+ try:
+ username = session.get('username')
+ articleLenMax, likeCountMaxAuthorName, cityMax = getHomeTagsData()
+ commentsLikeCountTopFore = getHomeCommentsLikeCountTopFore()
+ X, Y = getHomeArticleCreatedAtChart()
+ typeChart = getHomeTypeChart()
+ createAtChart = getHomeCommentCreatedChart()
+
+ return render_template('index.html',
+ username=username,
+ articleLenMax=articleLenMax,
+ likeCountMaxAuthorName=likeCountMaxAuthorName,
+ cityMax=cityMax,
+ commentsLikeCountTopFore=commentsLikeCountTopFore,
+ xData=X,
+ yData=Y,
+ typeChart=typeChart,
+ createAtChart=createAtChart)
+ except Exception as e:
+ logging.error(f"加载首页时发生错误: {e}")
+ return render_template('error.html', error_message="加载首页失败")
+
+@pb.route('/hotWord')
+@login_required
+def hotWord():
+ try:
+ username = session.get('username')
+ hotWordList = getAllHotWords()
+ if not hotWordList:
+ return render_template('error.html', error_message="无法获取热词列表")
+
+ defaultHotWord = sanitize_input(request.args.get('hotWord', hotWordList[0][0]))
+
+ # 验证热词是否在列表中
+ if not any(defaultHotWord in word for word in hotWordList):
+ return abort(400, "无效的热词")
+
+ hotWordLen = getHotWordLen(defaultHotWord)
+ X, Y = getHotWordPageCreatedAtCharData(defaultHotWord)
+
+ value = SnowNLP(defaultHotWord).sentiments
+ if value == 0.5:
+ sentences = '中性'
+ elif value > 0.5:
+ sentences = '正面'
+ elif value < 0.5:
+ sentences = '负面'
+
+ comments = getCommentFilterData(defaultHotWord)
+
+ return render_template('hotWord.html',
+ username=username,
+ hotWordList=hotWordList,
+ defaultHotWord=defaultHotWord,
+ hotWordLen=hotWordLen,
+ sentences=sentences,
+ xData=X,
+ yData=Y,
+ comments=comments)
+ except Exception as e:
+ logging.error(f"加载热词页面时发生错误: {e}")
+ return render_template('error.html', error_message="加载热词页面失败")
+
+@pb.route('/hotTopic')
+def hotTopic():
+ username = session.get('username')
+ topicList = getAllTopics()
+ defaultTopic = topicList[0][0]
+ if request.args.get('topic'):
+ defaultTopic = request.args.get('topic')
+ topicLen = getTopicLen(defaultTopic)
+ X, Y = getTopicPageCreatedAtCharData()
+ sentences = ''
+
+ # ... 这里要嵌入 topic 相关内容(热度?)来填充 sentences
+
+ comments = getCommentFilterDataTopic(defaultTopic)
+ return render_template('hotWord.html',
+ username=username,
+ topicList=topicList,
+ defaultTopic=defaultTopic,
+ topicLen=topicLen,
+ sentences=sentences,
+ xData=X,
+ yData=Y,
+ comments=comments)
+
+@pb.route('/tableData')
+@login_required
+def tableData():
+ try:
+ username = session.get('username')
+ defaultFlag = bool(request.args.get('flag', False))
+ tableData = getTableDataList(defaultFlag)
+
+ return render_template('tableData.html',
+ username=username,
+ tableData=tableData,
+ defaultFlag=defaultFlag)
+ except Exception as e:
+ logging.error(f"加载表格数据时发生错误: {e}")
+ return render_template('error.html', error_message="加载表格数据失败")
+
+@pb.route('/articleChar')
+def articleChar():
+ username = session.get('username')
+ typeList = getTypeList()
+ defaultType = typeList[0]
+ if request.args.get('type'): defaultType = request.args.get('type')
+ X, Y = getArticleLikeCount(defaultType)
+ x1Data, y1Data = getArticleCommentsLen(defaultType)
+ x2Data, y2Data = getArticleRepotsLen(defaultType)
+ return render_template('articleChar.html',
+ username=username,
+ typeList=typeList,
+ defaultType=defaultType,
+ xData=X,
+ yData=Y,
+ x1Data=x1Data,
+ y1Data=y1Data,
+ x2Data=x2Data,
+ y2Data=y2Data)
+
+@pb.route('/ipChar')
+@login_required
+def ipChar():
+ try:
+ username = session.get('username')
+ articleRegionData = getIPByArticleRegion()
+ commentRegionData = getIPByCommentsRegion()
+
+ return render_template('ipChar.html',
+ username=username,
+ articleRegionData=articleRegionData,
+ commentRegionData=commentRegionData)
+ except Exception as e:
+ logging.error(f"加载IP统计时发生错误: {e}")
+ return render_template('error.html', error_message="加载IP统计失败")
+
+@pb.route('/commentChar')
+@login_required
+def commentChar():
+ try:
+ username = session.get('username')
+ X, Y = getCommentDataOne()
+ genderPieData = getCommentDataTwo()
+
+ return render_template('commentChar.html',
+ username=username,
+ xData=X,
+ yData=Y,
+ genderPieData=genderPieData)
+ except Exception as e:
+ logging.error(f"加载评论统计时发生错误: {e}")
+ return render_template('error.html', error_message="加载评论统计失败")
+
+@pb.route('/yuqingChar')
+@login_required
+def yuqingChar():
+ try:
+ username = session.get('username')
+ model_type = sanitize_input(request.args.get('model', 'pro'))
+
+ # 验证模型类型
+ if model_type not in ['pro', 'basic']:
+ return abort(400, "无效的模型类型")
+
+ X, Y, biedata = getYuQingCharDataOne()
+ biedata1, biedata2 = getYuQingCharDataTwo(model_type)
+ x1Data, y1Data = getYuQingCharDataThree()
+
+ return render_template('yuqingChar.html',
+ username=username,
+ xData=X,
+ yData=Y,
+ biedata=biedata,
+ biedata1=biedata1,
+ biedata2=biedata2,
+ x1Data=x1Data,
+ y1Data=y1Data,
+ model_type=model_type)
+ except Exception as e:
+ logging.error(f"加载舆情统计时发生错误: {e}")
+ return render_template('error.html', error_message="加载舆情统计失败")
+
+@pb.route('/yuqingpredict')
+@login_required
+def yuqingpredict():
+ try:
+ username = session.get('username')
+ TopicList = getAllTopicData()
+ if not TopicList:
+ return render_template('error.html', error_message="无法获取话题列表")
+
+ defaultTopic = sanitize_input(request.args.get('Topic', TopicList[0][0]))
+
+ # 验证话题是否在列表中
+ if not any(defaultTopic in topic for topic in TopicList):
+ return abort(400, "无效的话题")
+
+ TopicLen = getTopicLen(defaultTopic)
+ X, Y = getTopicCreatedAtandpredictData(defaultTopic)
+
+ model_type = sanitize_input(request.args.get('model', 'pro'))
+ if model_type not in ['pro', 'basic']:
+ return abort(400, "无效的模型类型")
+
+ # 尝试从缓存获取预测结果
+ cache_key = f"{defaultTopic}_{model_type}"
+ cached_result = prediction_cache.get(cache_key)
+
+ if cached_result is not None:
+ sentences = cached_result
+ else:
+ if model_type == 'basic':
+ value = SnowNLP(defaultTopic).sentiments
+ if value == 0.5:
+ sentences = '中性'
+ elif value > 0.5:
+ sentences = '正面'
+ elif value < 0.5:
+ sentences = '负面'
+ else:
+ predicted_label, confidence = predict_sentiment(defaultTopic)
+ if predicted_label is not None:
+ sentences = '良好' if predicted_label == 0 else '不良'
+ sentences = f"{sentences} (置信度: {confidence:.2%})"
+ else:
+ sentences = '预测失败,请稍后重试'
+ logging.error(f"预测失败,话题: {defaultTopic}")
+
+ # 将结果存入缓存
+ prediction_cache.set(cache_key, sentences)
+
+ comments = getCommentFilterDataTopic(defaultTopic)
+
+ return render_template('yuqingpredict.html',
+ username=username,
+ TopicList=TopicList,
+ defaultTopic=defaultTopic,
+ TopicLen=TopicLen,
+ sentences=sentences,
+ xData=X,
+ yData=Y,
+ comments=comments,
+ model_type=model_type)
+ except Exception as e:
+ logging.error(f"加载舆情预测时发生错误: {e}")
+ return render_template('error.html', error_message="加载舆情预测失败")
+
+@pb.route('/articleCloud')
+@login_required
+def articleCloud():
+ try:
+ username = session.get('username')
+ return render_template('articleContentCloud.html', username=username)
+ except Exception as e:
+ logging.error(f"加载文章云图时发生错误: {e}")
+ return render_template('error.html', error_message="加载文章云图失败")
+
+@pb.route('/page/index')
+def index():
+ """首页路由"""
+ try:
+ hotWordList = getAllHotWords()
+ logging.info("成功获取热词列表")
+ return render_template('index.html', hotWordList=hotWordList)
+ except Exception as e:
+ logging.error(f"渲染首页时发生错误: {e}")
+ return render_template('error.html', error_message="加载首页失败")
+
+@pb.route('/page/article/')
+def article(type):
+ """文章列表页路由"""
+ try:
+ articleList = getArticleByType(type)
+ logging.info(f"成功获取类型为 {type} 的文章列表")
+ return render_template('article.html', articleList=articleList)
+ except Exception as e:
+ logging.error(f"获取文章列表时发生错误: {e}")
+ return render_template('error.html', error_message="加载文章列表失败")
+
+@pb.route('/page/articleChar/')
+def articleChar(id):
+ """文章详情页路由"""
+ try:
+ article = getArticleById(id)
+ if not article:
+ logging.warning(f"未找到ID为 {id} 的文章")
+ return render_template('error.html', error_message="文章不存在")
+ logging.info(f"成功获取ID为 {id} 的文章详情")
+ return render_template('articleChar.html', article=article)
+ except Exception as e:
+ logging.error(f"获取文章详情时发生错误: {e}")
+ return render_template('error.html', error_message="加载文章详情失败")
+
+@pb.route('/api/analyze_messages', methods=['POST'])
+@api_login_required
+@rate_limit
+async def analyze_messages():
+ try:
+ if not validate_csrf_token():
+ return jsonify({'error': 'Invalid CSRF token'}), 403
+
+ data = request.get_json()
+ if not data:
+ return jsonify({'error': 'No data provided'}), 400
+
+ batch_size = min(int(data.get('batch_size', 50)), 100) # 限制批量大小
+ model_type = sanitize_input(data.get('model_type', 'gpt-3.5-turbo'))
+ analysis_depth = sanitize_input(data.get('analysis_depth', 'standard'))
+
+ # 验证参数
+ if model_type not in ['gpt-3.5-turbo', 'gpt-4']:
+ return jsonify({'error': 'Invalid model type'}), 400
+
+ if analysis_depth not in ['basic', 'standard', 'deep']:
+ return jsonify({'error': 'Invalid analysis depth'}), 400
+
+ messages = getRecentMessages(batch_size)
+ if not messages:
+ return jsonify({
+ 'success': False,
+ 'error': '没有找到需要分析的消息'
+ }), 404
+
+ analysis_results = await ai_analyzer.analyze_messages(
+ messages=messages,
+ batch_size=batch_size,
+ model_type=model_type,
+ analysis_depth=analysis_depth
+ )
+
+ if not analysis_results:
+ return jsonify({
+ 'success': False,
+ 'error': '分析过程中出现错误'
+ }), 500
+
+ try:
+ with Session(engine) as session:
+ for result in analysis_results:
+ analysis = AIAnalysis(
+ message_id=result['message_id'],
+ sentiment=result['sentiment'],
+ sentiment_score=float(result['sentiment_score']),
+ keywords=result['keywords'],
+ key_points=result['key_points'],
+ influence_analysis=result['influence_analysis'],
+ risk_level=result['risk_level']
+ )
+ session.add(analysis)
+ session.commit()
+ except Exception as e:
+ logging.error(f"保存分析结果时出错: {e}")
+ return jsonify({
+ 'success': False,
+ 'error': '保存分析结果失败'
+ }), 500
+
+ display_results = [
+ ai_analyzer.format_analysis_for_display(result)
+ for result in analysis_results
+ ]
+
+ return jsonify({
+ 'success': True,
+ 'data': display_results
+ })
+
+ except Exception as e:
+ logging.error(f"分析消息时发生错误: {e}")
+ return jsonify({
+ 'success': False,
+ 'error': str(e)
+ }), 500
+
+@pb.route('/api/get_analysis/')
+@api_login_required
+@rate_limit
+def get_message_analysis(message_id):
+ """获取特定消息的分析结果"""
+ try:
+ if not message_id or message_id < 1:
+ return jsonify({'error': 'Invalid message ID'}), 400
+
+ with Session(engine) as session:
+ analysis = session.query(AIAnalysis)\
+ .filter(AIAnalysis.message_id == message_id)\
+ .order_by(AIAnalysis.created_at.desc())\
+ .first()
+
+ if analysis:
+ return jsonify({
+ 'success': True,
+ 'data': analysis.to_dict()
+ })
+ else:
+ return jsonify({
+ 'success': False,
+ 'error': '未找到分析结果'
+ }), 404
+
+ except Exception as e:
+ logging.error(f"获取分析结果时出错: {e}")
+ return jsonify({
+ 'success': False,
+ 'error': str(e)
+ }), 500
+
+def getRecentMessages(limit=50):
+ """获取最近的消息"""
+ # 这里需要根据你的数据库结构实现具体的查询逻辑
+ messages = []
+ try:
+ # 示例查询逻辑
+ with Session(engine) as session:
+ results = session.execute(
+ """
+ SELECT id, content
+ FROM comments
+ ORDER BY created_at DESC
+ LIMIT :limit
+ """,
+ {'limit': limit}
+ ).fetchall()
+
+ messages = [
+ {'id': row[0], 'content': row[1]}
+ for row in results
+ ]
+ except Exception as e:
+ logging.error(f"获取最近消息时出错: {e}")
+
+ return messages
diff --git a/views/page/templates/articleChar.html b/views/page/templates/articleChar.html
index bbc0243..e10542c 100644
--- a/views/page/templates/articleChar.html
+++ b/views/page/templates/articleChar.html
@@ -1,345 +1,345 @@
-{% extends 'base_page.html' %}
-{% block title %}
- 文章分析
-{% endblock %}
-
-{% block nav %}
-
-{% endblock %}
-
-{% block content %}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-{% endblock %}
-
-{% block echarts %}
-
-
-
+{% extends 'base_page.html' %}
+{% block title %}
+ 文章分析
+{% endblock %}
+
+{% block nav %}
+
+{% endblock %}
+
+{% block content %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{% endblock %}
+
+{% block echarts %}
+
+
+
{% endblock %}
\ No newline at end of file
diff --git a/views/page/templates/articleContentCloud.html b/views/page/templates/articleContentCloud.html
index c252ed3..4644c31 100644
--- a/views/page/templates/articleContentCloud.html
+++ b/views/page/templates/articleContentCloud.html
@@ -1,144 +1,144 @@
-{% extends 'base_page.html' %}
-{% block title %}
- 文章内容词云图
-{% endblock %}
-
-{% block nav %}
-
-{% endblock %}
-
-{% block content %}
-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
-{% endblock %}
-
-{% block echarts %}
-
+{% extends 'base_page.html' %}
+{% block title %}
+ 文章内容词云图
+{% endblock %}
+
+{% block nav %}
+
+{% endblock %}
+
+{% block content %}
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+{% endblock %}
+
+{% block echarts %}
+
{% endblock %}
\ No newline at end of file
diff --git a/views/page/templates/base_page.html b/views/page/templates/base_page.html
index 44e0b85..9f0873e 100644
--- a/views/page/templates/base_page.html
+++ b/views/page/templates/base_page.html
@@ -1,478 +1,505 @@
-
-
-
-
-
-
- {% block title %}首页{% endblock %}
-
-
-
-
-
-
-
-
-
-
-
-
-
- {% block content %}
-
-
-
-
-
-
-
-
-
-
-
文章个数
-
-
{{ articleLenMax }}个
-
-
-
-
-
-
-
-
-
-
-
-
文章爬取规则
-
-
每 5 小时更新一次爬取内容
-
-
-
-
-
-
-
-
-
-
-
-
下次爬取时间
-
-
7-5-18:00
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- {% for i in commentsLikeCountTopFore %}
- -
-
-
- 🧑 {{ i[5] }}
-
-
- {{ i[4] }}
-
-
-
-
👍 {{ i[2] }}
-
-
- {% endfor %}
-
-
-
-
-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
- {% endblock %}
-
-
-
-
-
-
- {% block echarts %}
-
-
-
- {% endblock %}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
diff --git a/views/page/templates/commentChar.html b/views/page/templates/commentChar.html
index 814630d..0ec50c1 100644
--- a/views/page/templates/commentChar.html
+++ b/views/page/templates/commentChar.html
@@ -1,317 +1,317 @@
-{% extends 'base_page.html' %}
-
-{% block title %}
- 评论分析
-{% endblock %}
-{% block nav %}
-
-{% endblock %}
-{% block content %}
-
-
-
-
-
-
-
-
-
-
-

-
-
-
-
-
-
-
-{% endblock %}
-
-{% block echarts %}
-
-
+{% extends 'base_page.html' %}
+
+{% block title %}
+ 评论分析
+{% endblock %}
+{% block nav %}
+
+{% endblock %}
+{% block content %}
+
+
+
+
+
+
+
+
+
+
+

+
+
+
+
+
+
+
+{% endblock %}
+
+{% block echarts %}
+
+
{% endblock %}
\ No newline at end of file
diff --git a/views/page/templates/hotWord.html b/views/page/templates/hotWord.html
index 4600cf1..2c3b269 100644
--- a/views/page/templates/hotWord.html
+++ b/views/page/templates/hotWord.html
@@ -1,503 +1,503 @@
-{% extends 'base_page.html' %}
-{% block title %}
- 热词统计
-{% endblock %}
-{% block nav %}
-
-{% endblock %}
-{% block content %}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
{{ defaultHotWord }}
-
-
- - 热词名称:{{ defaultHotWord }}
- - 出现次数:{{ hotWordLen }}次
- - 热词情感:{{ sentences }}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<table id="datatable" class="table data-table table-striped table-bordered" >
- <thead>
- <tr>
- <th>Name</th>
- <th>Position</th>
- <th>Office</th>
- <th>Age</th>
- <th>Start date</th>
- <th>Salary</th>
- </tr>
- </thead>
- <tbody>
- <tr>
- <td>Tiger Nixon</td>
- <td>System Architect</td>
- <td>Edinburgh</td>
- <td>61</td>
- <td>2011/04/25</td>
- <td>$320,800</td>
- </tr>
- <tr>
- <td>Garrett Winters</td>
- <td>Accountant</td>
- <td>Tokyo</td>
- <td>63</td>
- <td>2011/07/25</td>
- <td>$170,750</td>
- </tr>
- <tr>
- <td>Ashton Cox</td>
- <td>Junior Technical Author</td>
- <td>San Francisco</td>
- <td>66</td>
- <td>2009/01/12</td>
- <td>$86,000</td>
- </tr>
- <tr>
- <td>Cedric Kelly</td>
- <td>Senior Javascript Developer</td>
- <td>Edinburgh</td>
- <td>22</td>
- <td>2012/03/29</td>
- <td>$433,060</td>
- </tr>
- <tr>
- <td>Airi Satou</td>
- <td>Accountant</td>
- <td>Tokyo</td>
- <td>33</td>
- <td>2008/11/28</td>
- <td>$162,700</td>
- </tr>
- <tr>
- <td>Brielle Williamson</td>
- <td>Integration Specialist</td>
- <td>New York</td>
- <td>61</td>
- <td>2012/12/02</td>
- <td>$372,000</td>
- </tr>
- <tr>
- <td>Herrod Chandler</td>
- <td>Sales Assistant</td>
- <td>San Francisco</td>
- <td>59</td>
- <td>2012/08/06</td>
- <td>$137,500</td>
- </tr>
- <tr>
- <td>Rhona Davidson</td>
- <td>Integration Specialist</td>
- <td>Tokyo</td>
- <td>55</td>
- <td>2010/10/14</td>
- <td>$327,900</td>
- </tr>
- <tr>
- <td>Colleen Hurst</td>
- <td>Javascript Developer</td>
- <td>San Francisco</td>
- <td>39</td>
- <td>2009/09/15</td>
- <td>$205,500</td>
- </tr>
- <tr>
- <td>Sonya Frost</td>
- <td>Software Engineer</td>
- <td>Edinburgh</td>
- <td>23</td>
- <td>2008/12/13</td>
- <td>$103,600</td>
- </tr>
- <tr>
- <td>Jena Gaines</td>
- <td>Office Manager</td>
- <td>London</td>
- <td>30</td>
- <td>2008/12/19</td>
- <td>$90,560</td>
- </tr>
- <tr>
- <td>Quinn Flynn</td>
- <td>Support Lead</td>
- <td>Edinburgh</td>
- <td>22</td>
- <td>2013/03/03</td>
- <td>$342,000</td>
- </tr>
- <tr>
- <td>Charde Marshall</td>
- <td>Regional Director</td>
- <td>San Francisco</td>
- <td>36</td>
- <td>2008/10/16</td>
- <td>$470,600</td>
- </tr>
- <tr>
- <td>Haley Kennedy</td>
- <td>Senior Marketing Designer</td>
- <td>London</td>
- <td>43</td>
- <td>2012/12/18</td>
- <td>$313,500</td>
- </tr>
- <tr>
- <td>Tatyana Fitzpatrick</td>
- <td>Regional Director</td>
- <td>London</td>
- <td>19</td>
- <td>2010/03/17</td>
- <td>$385,750</td>
- </tr>
- <tr>
- <td>Michael Silva</td>
- <td>Marketing Designer</td>
- <td>London</td>
- <td>66</td>
- <td>2012/11/27</td>
- <td>$198,500</td>
- </tr>
- <tr>
- <td>Paul Byrd</td>
- <td>Chief Financial Officer (CFO)</td>
- <td>New York</td>
- <td>64</td>
- <td>2010/06/09</td>
- <td>$725,000</td>
- </tr>
- <tr>
- <td>Gloria Little</td>
- <td>Systems Administrator</td>
- <td>New York</td>
- <td>59</td>
- <td>2009/04/10</td>
- <td>$237,500</td>
- </tr>
- <tr>
- <td>Bradley Greer</td>
- <td>Software Engineer</td>
- <td>London</td>
- <td>41</td>
- <td>2012/10/13</td>
- <td>$132,000</td>
- </tr>
- <tr>
- <td>Dai Rios</td>
- <td>Personnel Lead</td>
- <td>Edinburgh</td>
- <td>35</td>
- <td>2012/09/26</td>
- <td>$217,500</td>
- </tr>
-</table>
-
-
-
根据选择的热词从而查询出评论数据
-
-
-
-
-
- | 文章ID |
- 评论用户 |
- 评论性别 |
- 评论地址 |
- 评论内容 |
- 点赞量 |
-
-
- {% for i in comments %}
-
- | {{ i[0] }} |
- {{ i[5] }} |
-
- {% if i[6] =='f' %}
- 女生
- {% else %}
- 男生
- {% endif %}
- |
- {{ i[3] }} |
- {{ i[4] }} |
- 👍{{ i[2] }} |
-
- {% endfor %}
-
-
-
-
-
-
-
-
-
-{% endblock %}
-
-{% block echarts %}
-
+{% extends 'base_page.html' %}
+{% block title %}
+
热词统计
+{% endblock %}
+{% block nav %}
+
+{% endblock %}
+{% block content %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
{{ defaultHotWord }}
+
+
+ - 热词名称:{{ defaultHotWord }}
+ - 出现次数:{{ hotWordLen }}次
+ - 热词情感:{{ sentences }}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<table id="datatable" class="table data-table table-striped table-bordered" >
+ <thead>
+ <tr>
+ <th>Name</th>
+ <th>Position</th>
+ <th>Office</th>
+ <th>Age</th>
+ <th>Start date</th>
+ <th>Salary</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>Tiger Nixon</td>
+ <td>System Architect</td>
+ <td>Edinburgh</td>
+ <td>61</td>
+ <td>2011/04/25</td>
+ <td>$320,800</td>
+ </tr>
+ <tr>
+ <td>Garrett Winters</td>
+ <td>Accountant</td>
+ <td>Tokyo</td>
+ <td>63</td>
+ <td>2011/07/25</td>
+ <td>$170,750</td>
+ </tr>
+ <tr>
+ <td>Ashton Cox</td>
+ <td>Junior Technical Author</td>
+ <td>San Francisco</td>
+ <td>66</td>
+ <td>2009/01/12</td>
+ <td>$86,000</td>
+ </tr>
+ <tr>
+ <td>Cedric Kelly</td>
+ <td>Senior Javascript Developer</td>
+ <td>Edinburgh</td>
+ <td>22</td>
+ <td>2012/03/29</td>
+ <td>$433,060</td>
+ </tr>
+ <tr>
+ <td>Airi Satou</td>
+ <td>Accountant</td>
+ <td>Tokyo</td>
+ <td>33</td>
+ <td>2008/11/28</td>
+ <td>$162,700</td>
+ </tr>
+ <tr>
+ <td>Brielle Williamson</td>
+ <td>Integration Specialist</td>
+ <td>New York</td>
+ <td>61</td>
+ <td>2012/12/02</td>
+ <td>$372,000</td>
+ </tr>
+ <tr>
+ <td>Herrod Chandler</td>
+ <td>Sales Assistant</td>
+ <td>San Francisco</td>
+ <td>59</td>
+ <td>2012/08/06</td>
+ <td>$137,500</td>
+ </tr>
+ <tr>
+ <td>Rhona Davidson</td>
+ <td>Integration Specialist</td>
+ <td>Tokyo</td>
+ <td>55</td>
+ <td>2010/10/14</td>
+ <td>$327,900</td>
+ </tr>
+ <tr>
+ <td>Colleen Hurst</td>
+ <td>Javascript Developer</td>
+ <td>San Francisco</td>
+ <td>39</td>
+ <td>2009/09/15</td>
+ <td>$205,500</td>
+ </tr>
+ <tr>
+ <td>Sonya Frost</td>
+ <td>Software Engineer</td>
+ <td>Edinburgh</td>
+ <td>23</td>
+ <td>2008/12/13</td>
+ <td>$103,600</td>
+ </tr>
+ <tr>
+ <td>Jena Gaines</td>
+ <td>Office Manager</td>
+ <td>London</td>
+ <td>30</td>
+ <td>2008/12/19</td>
+ <td>$90,560</td>
+ </tr>
+ <tr>
+ <td>Quinn Flynn</td>
+ <td>Support Lead</td>
+ <td>Edinburgh</td>
+ <td>22</td>
+ <td>2013/03/03</td>
+ <td>$342,000</td>
+ </tr>
+ <tr>
+ <td>Charde Marshall</td>
+ <td>Regional Director</td>
+ <td>San Francisco</td>
+ <td>36</td>
+ <td>2008/10/16</td>
+ <td>$470,600</td>
+ </tr>
+ <tr>
+ <td>Haley Kennedy</td>
+ <td>Senior Marketing Designer</td>
+ <td>London</td>
+ <td>43</td>
+ <td>2012/12/18</td>
+ <td>$313,500</td>
+ </tr>
+ <tr>
+ <td>Tatyana Fitzpatrick</td>
+ <td>Regional Director</td>
+ <td>London</td>
+ <td>19</td>
+ <td>2010/03/17</td>
+ <td>$385,750</td>
+ </tr>
+ <tr>
+ <td>Michael Silva</td>
+ <td>Marketing Designer</td>
+ <td>London</td>
+ <td>66</td>
+ <td>2012/11/27</td>
+ <td>$198,500</td>
+ </tr>
+ <tr>
+ <td>Paul Byrd</td>
+ <td>Chief Financial Officer (CFO)</td>
+ <td>New York</td>
+ <td>64</td>
+ <td>2010/06/09</td>
+ <td>$725,000</td>
+ </tr>
+ <tr>
+ <td>Gloria Little</td>
+ <td>Systems Administrator</td>
+ <td>New York</td>
+ <td>59</td>
+ <td>2009/04/10</td>
+ <td>$237,500</td>
+ </tr>
+ <tr>
+ <td>Bradley Greer</td>
+ <td>Software Engineer</td>
+ <td>London</td>
+ <td>41</td>
+ <td>2012/10/13</td>
+ <td>$132,000</td>
+ </tr>
+ <tr>
+ <td>Dai Rios</td>
+ <td>Personnel Lead</td>
+ <td>Edinburgh</td>
+ <td>35</td>
+ <td>2012/09/26</td>
+ <td>$217,500</td>
+ </tr>
+</table>
+
+
+
根据选择的热词从而查询出评论数据
+
+
+
+
+
+ | 文章ID |
+ 评论用户 |
+ 评论性别 |
+ 评论地址 |
+ 评论内容 |
+ 点赞量 |
+
+
+ {% for i in comments %}
+
+ | {{ i[0] }} |
+ {{ i[5] }} |
+
+ {% if i[6] =='f' %}
+ 女生
+ {% else %}
+ 男生
+ {% endif %}
+ |
+ {{ i[3] }} |
+ {{ i[4] }} |
+ 👍{{ i[2] }} |
+
+ {% endfor %}
+
+
+
+
+
+
+
+
+
+{% endblock %}
+
+{% block echarts %}
+
{% endblock %}
\ No newline at end of file
diff --git a/views/page/templates/index.html b/views/page/templates/index.html
index 3f4a51d..d3c6cc8 100644
--- a/views/page/templates/index.html
+++ b/views/page/templates/index.html
@@ -1,27 +1,30 @@
-
-{% extends 'base_page.html' %}
\ No newline at end of file
+
+{% extends 'base_page.html' %}
+{% block title %}
+
首页
+{% endblock %}
\ No newline at end of file
diff --git a/views/page/templates/ipChar.html b/views/page/templates/ipChar.html
index 718084e..c0c5afb 100644
--- a/views/page/templates/ipChar.html
+++ b/views/page/templates/ipChar.html
@@ -1,275 +1,275 @@
-{% extends 'base_page.html' %}
-
-{% block title %}
- IP分析
-{% endblock %}
-
-{% block nav %}
-
-{% endblock %}
-{% block content %}
-
-
-{% endblock %}
-
-{% block echarts %}
-
-
+{% extends 'base_page.html' %}
+
+{% block title %}
+
IP分析
+{% endblock %}
+
+{% block nav %}
+
+{% endblock %}
+{% block content %}
+
+
+{% endblock %}
+
+{% block echarts %}
+
+
{% endblock %}
\ No newline at end of file
diff --git a/views/page/templates/tableData.html b/views/page/templates/tableData.html
index c86a6cb..d465ec5 100644
--- a/views/page/templates/tableData.html
+++ b/views/page/templates/tableData.html
@@ -1,225 +1,228 @@
-{% extends 'base_page.html' %}
-
-{% block title %}
-微博舆情统计页
-{% endblock %}
-{% block nav %}
-
-{% endblock %}
-{% block content %}
-
-
-
-
-
-
-
-
-
情感分类
-
-
-
-
-
-
- |
- 文章ID
- |
-
- 文章IP
- |
-
- 点赞量
- |
-
- 转发量
- |
-
- 评论量
- |
-
- 类型
- |
-
- 内容
- |
-
- 发布时间
- |
- {% if defaultFlag %}
-
- 情感分类
- |
- {% endif %}
-
-
-
- {% for article in tableData %}
-
- |
-
- {{ article[0] }}
-
- |
- {{ article[4] }} |
- 👍{{ article[1] }} |
- 🥇{{ article[2] }} |
- 🔥{{ article[3] }} |
- {{ article[8] }} |
- {{ article[5] }} |
- {{ article[7] }} |
- {% if defaultFlag %}
-
- {% if article[-1] == '正面' %}
-
- {{ article[-1] }}
-
-
- {% else %}
-
- {{ article[-1] }}
-
-
- {% endif %}
- |
- {% endif %}
-
- {% endfor %}
-
-
-
-
-
-
-
-
-
-
-
-{% endblock %}
-
-
-{% block echarts %}
-
+{% extends 'base_page.html' %}
+
+{% block title %}
+
微博舆情统计页
+{% endblock %}
+{% block nav %}
+
+{% endblock %}
+{% block content %}
+
+
+
+
+
+
+
+
+
情感分类
+
+
+
+
+
+
+ |
+ 文章ID
+ |
+
+ 文章IP
+ |
+
+ 文章标题
+ |
+
+ 点赞量
+ |
+
+ 转发量
+ |
+
+ 评论量
+ |
+
+ 类型
+ |
+
+ 内容
+ |
+
+ 发布时间
+ |
+ {% if defaultFlag %}
+
+ 情感分类
+ |
+ {% endif %}
+
+
+
+ {% for article in tableData %}
+
+ |
+
+ {{ article[0] }}
+
+ |
+ {{ article[4] }} |
+ {{ article[5] }} |
+ 👍{{ article[1] }} |
+ 🥇{{ article[2] }} |
+ 🔥{{ article[3] }} |
+ {{ article[8] }} |
+ {{ article[5] }} |
+ {{ article[7] }} |
+ {% if defaultFlag %}
+
+ {% if article[-1] == '正面' %}
+
+ {{ article[-1] }}
+
+
+ {% else %}
+
+ {{ article[-1] }}
+
+
+ {% endif %}
+ |
+ {% endif %}
+
+ {% endfor %}
+
+
+
+
+
+
+
+
+
+
+
+{% endblock %}
+
+
+{% block echarts %}
+
{% endblock %}
\ No newline at end of file
diff --git a/views/page/templates/yuqingChar.html b/views/page/templates/yuqingChar.html
index 0972902..25d92d4 100644
--- a/views/page/templates/yuqingChar.html
+++ b/views/page/templates/yuqingChar.html
@@ -1,401 +1,401 @@
-{% extends 'base_page.html' %}
-{% block title %}
- 舆情分析
-{% endblock %}
-{% block nav %}
-
-{% endblock %}
-
-{% block content %}
-
-
-
-
-
-
-
-
-
-
-
-{% endblock %}
-
-{% block echarts %}
-
-
-
-
-
+{% extends 'base_page.html' %}
+{% block title %}
+
舆情分析
+{% endblock %}
+{% block nav %}
+
+{% endblock %}
+
+{% block content %}
+
+
+
+
+
+
+
+
+
+
+
+{% endblock %}
+
+{% block echarts %}
+
+
+
+
+
{% endblock %}
\ No newline at end of file
diff --git a/views/page/templates/yuqingpredict.html b/views/page/templates/yuqingpredict.html
index 50334b4..0653c29 100644
--- a/views/page/templates/yuqingpredict.html
+++ b/views/page/templates/yuqingpredict.html
@@ -1,925 +1,925 @@
-{% extends 'base_page.html' %}
-{% block title %}
- 舆情预测
-{% endblock %}
-{% block nav %}
-
-{% endblock %}
-{% block content %}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
{{ defaultHotWord }}
-
-
- - 话题名称:{{ defaultHotWord }}
- - 出现次数:{{ hotWordLen }}次
- - 话题情感:{{ sentences }}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-<table id="datatable" class="table data-table table-striped table-bordered" >
- <thead>
- <tr>
- <th>Name</th>
- <th>Position</th>
- <th>Office</th>
- <th>Age</th>
- <th>Start date</th>
- <th>Salary</th>
- </tr>
- </thead>
- <tbody>
- <tr>
- <td>Tiger Nixon</td>
- <td>System Architect</td>
- <td>Edinburgh</td>
- <td>61</td>
- <td>2011/04/25</td>
- <td>$320,800</td>
- </tr>
- <tr>
- <td>Garrett Winters</td>
- <td>Accountant</td>
- <td>Tokyo</td>
- <td>63</td>
- <td>2011/07/25</td>
- <td>$170,750</td>
- </tr>
- <tr>
- <td>Ashton Cox</td>
- <td>Junior Technical Author</td>
- <td>San Francisco</td>
- <td>66</td>
- <td>2009/01/12</td>
- <td>$86,000</td>
- </tr>
- <tr>
- <td>Cedric Kelly</td>
- <td>Senior Javascript Developer</td>
- <td>Edinburgh</td>
- <td>22</td>
- <td>2012/03/29</td>
- <td>$433,060</td>
- </tr>
- <tr>
- <td>Airi Satou</td>
- <td>Accountant</td>
- <td>Tokyo</td>
- <td>33</td>
- <td>2008/11/28</td>
- <td>$162,700</td>
- </tr>
- <tr>
- <td>Brielle Williamson</td>
- <td>Integration Specialist</td>
- <td>New York</td>
- <td>61</td>
- <td>2012/12/02</td>
- <td>$372,000</td>
- </tr>
- <tr>
- <td>Herrod Chandler</td>
- <td>Sales Assistant</td>
- <td>San Francisco</td>
- <td>59</td>
- <td>2012/08/06</td>
- <td>$137,500</td>
- </tr>
- <tr>
- <td>Rhona Davidson</td>
- <td>Integration Specialist</td>
- <td>Tokyo</td>
- <td>55</td>
- <td>2010/10/14</td>
- <td>$327,900</td>
- </tr>
- <tr>
- <td>Colleen Hurst</td>
- <td>Javascript Developer</td>
- <td>San Francisco</td>
- <td>39</td>
- <td>2009/09/15</td>
- <td>$205,500</td>
- </tr>
- <tr>
- <td>Sonya Frost</td>
- <td>Software Engineer</td>
- <td>Edinburgh</td>
- <td>23</td>
- <td>2008/12/13</td>
- <td>$103,600</td>
- </tr>
- <tr>
- <td>Jena Gaines</td>
- <td>Office Manager</td>
- <td>London</td>
- <td>30</td>
- <td>2008/12/19</td>
- <td>$90,560</td>
- </tr>
- <tr>
- <td>Quinn Flynn</td>
- <td>Support Lead</td>
- <td>Edinburgh</td>
- <td>22</td>
- <td>2013/03/03</td>
- <td>$342,000</td>
- </tr>
- <tr>
- <td>Charde Marshall</td>
- <td>Regional Director</td>
- <td>San Francisco</td>
- <td>36</td>
- <td>2008/10/16</td>
- <td>$470,600</td>
- </tr>
- <tr>
- <td>Haley Kennedy</td>
- <td>Senior Marketing Designer</td>
- <td>London</td>
- <td>43</td>
- <td>2012/12/18</td>
- <td>$313,500</td>
- </tr>
- <tr>
- <td>Tatyana Fitzpatrick</td>
- <td>Regional Director</td>
- <td>London</td>
- <td>19</td>
- <td>2010/03/17</td>
- <td>$385,750</td>
- </tr>
- <tr>
- <td>Michael Silva</td>
- <td>Marketing Designer</td>
- <td>London</td>
- <td>66</td>
- <td>2012/11/27</td>
- <td>$198,500</td>
- </tr>
- <tr>
- <td>Paul Byrd</td>
- <td>Chief Financial Officer (CFO)</td>
- <td>New York</td>
- <td>64</td>
- <td>2010/06/09</td>
- <td>$725,000</td>
- </tr>
- <tr>
- <td>Gloria Little</td>
- <td>Systems Administrator</td>
- <td>New York</td>
- <td>59</td>
- <td>2009/04/10</td>
- <td>$237,500</td>
- </tr>
- <tr>
- <td>Bradley Greer</td>
- <td>Software Engineer</td>
- <td>London</td>
- <td>41</td>
- <td>2012/10/13</td>
- <td>$132,000</td>
- </tr>
- <tr>
- <td>Dai Rios</td>
- <td>Personnel Lead</td>
- <td>Edinburgh</td>
- <td>35</td>
- <td>2012/09/26</td>
- <td>$217,500</td>
- </tr>
-</table>
-
-
-
根据选择的话题从而查询出评论数据
-
-
-
-
-
- | 文章ID |
- 评论用户 |
- 评论性别 |
- 评论话题 |
- 评论内容 |
- 点赞量 |
-
-
- {% for i in comments %}
-
- | {{ i[0] }} |
- {{ i[5] }} |
-
- {% if i[6] =='f' %}
- 女生
- {% else %}
- 男生
- {% endif %}
- |
- {{ i[9] }} |
- {{ i[4] }} |
- 👍{{ i[2] }} |
-
- {% endfor %}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-{% endblock %}
-
-{% block echarts %}
-
-
+{% extends 'base_page.html' %}
+{% block title %}
+
舆情预测
+{% endblock %}
+{% block nav %}
+
+{% endblock %}
+{% block content %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
{{ defaultHotWord }}
+
+
+ - 话题名称:{{ defaultHotWord }}
+ - 出现次数:{{ hotWordLen }}次
+ - 话题情感:{{ sentences }}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+<table id="datatable" class="table data-table table-striped table-bordered" >
+ <thead>
+ <tr>
+ <th>Name</th>
+ <th>Position</th>
+ <th>Office</th>
+ <th>Age</th>
+ <th>Start date</th>
+ <th>Salary</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td>Tiger Nixon</td>
+ <td>System Architect</td>
+ <td>Edinburgh</td>
+ <td>61</td>
+ <td>2011/04/25</td>
+ <td>$320,800</td>
+ </tr>
+ <tr>
+ <td>Garrett Winters</td>
+ <td>Accountant</td>
+ <td>Tokyo</td>
+ <td>63</td>
+ <td>2011/07/25</td>
+ <td>$170,750</td>
+ </tr>
+ <tr>
+ <td>Ashton Cox</td>
+ <td>Junior Technical Author</td>
+ <td>San Francisco</td>
+ <td>66</td>
+ <td>2009/01/12</td>
+ <td>$86,000</td>
+ </tr>
+ <tr>
+ <td>Cedric Kelly</td>
+ <td>Senior Javascript Developer</td>
+ <td>Edinburgh</td>
+ <td>22</td>
+ <td>2012/03/29</td>
+ <td>$433,060</td>
+ </tr>
+ <tr>
+ <td>Airi Satou</td>
+ <td>Accountant</td>
+ <td>Tokyo</td>
+ <td>33</td>
+ <td>2008/11/28</td>
+ <td>$162,700</td>
+ </tr>
+ <tr>
+ <td>Brielle Williamson</td>
+ <td>Integration Specialist</td>
+ <td>New York</td>
+ <td>61</td>
+ <td>2012/12/02</td>
+ <td>$372,000</td>
+ </tr>
+ <tr>
+ <td>Herrod Chandler</td>
+ <td>Sales Assistant</td>
+ <td>San Francisco</td>
+ <td>59</td>
+ <td>2012/08/06</td>
+ <td>$137,500</td>
+ </tr>
+ <tr>
+ <td>Rhona Davidson</td>
+ <td>Integration Specialist</td>
+ <td>Tokyo</td>
+ <td>55</td>
+ <td>2010/10/14</td>
+ <td>$327,900</td>
+ </tr>
+ <tr>
+ <td>Colleen Hurst</td>
+ <td>Javascript Developer</td>
+ <td>San Francisco</td>
+ <td>39</td>
+ <td>2009/09/15</td>
+ <td>$205,500</td>
+ </tr>
+ <tr>
+ <td>Sonya Frost</td>
+ <td>Software Engineer</td>
+ <td>Edinburgh</td>
+ <td>23</td>
+ <td>2008/12/13</td>
+ <td>$103,600</td>
+ </tr>
+ <tr>
+ <td>Jena Gaines</td>
+ <td>Office Manager</td>
+ <td>London</td>
+ <td>30</td>
+ <td>2008/12/19</td>
+ <td>$90,560</td>
+ </tr>
+ <tr>
+ <td>Quinn Flynn</td>
+ <td>Support Lead</td>
+ <td>Edinburgh</td>
+ <td>22</td>
+ <td>2013/03/03</td>
+ <td>$342,000</td>
+ </tr>
+ <tr>
+ <td>Charde Marshall</td>
+ <td>Regional Director</td>
+ <td>San Francisco</td>
+ <td>36</td>
+ <td>2008/10/16</td>
+ <td>$470,600</td>
+ </tr>
+ <tr>
+ <td>Haley Kennedy</td>
+ <td>Senior Marketing Designer</td>
+ <td>London</td>
+ <td>43</td>
+ <td>2012/12/18</td>
+ <td>$313,500</td>
+ </tr>
+ <tr>
+ <td>Tatyana Fitzpatrick</td>
+ <td>Regional Director</td>
+ <td>London</td>
+ <td>19</td>
+ <td>2010/03/17</td>
+ <td>$385,750</td>
+ </tr>
+ <tr>
+ <td>Michael Silva</td>
+ <td>Marketing Designer</td>
+ <td>London</td>
+ <td>66</td>
+ <td>2012/11/27</td>
+ <td>$198,500</td>
+ </tr>
+ <tr>
+ <td>Paul Byrd</td>
+ <td>Chief Financial Officer (CFO)</td>
+ <td>New York</td>
+ <td>64</td>
+ <td>2010/06/09</td>
+ <td>$725,000</td>
+ </tr>
+ <tr>
+ <td>Gloria Little</td>
+ <td>Systems Administrator</td>
+ <td>New York</td>
+ <td>59</td>
+ <td>2009/04/10</td>
+ <td>$237,500</td>
+ </tr>
+ <tr>
+ <td>Bradley Greer</td>
+ <td>Software Engineer</td>
+ <td>London</td>
+ <td>41</td>
+ <td>2012/10/13</td>
+ <td>$132,000</td>
+ </tr>
+ <tr>
+ <td>Dai Rios</td>
+ <td>Personnel Lead</td>
+ <td>Edinburgh</td>
+ <td>35</td>
+ <td>2012/09/26</td>
+ <td>$217,500</td>
+ </tr>
+</table>
+
+
+
根据选择的话题从而查询出评论数据
+
+
+
+
+
+ | 文章ID |
+ 评论用户 |
+ 评论性别 |
+ 评论话题 |
+ 评论内容 |
+ 点赞量 |
+
+
+ {% for i in comments %}
+
+ | {{ i[0] }} |
+ {{ i[5] }} |
+
+ {% if i[6] =='f' %}
+ 女生
+ {% else %}
+ 男生
+ {% endif %}
+ |
+ {{ i[9] }} |
+ {{ i[4] }} |
+ 👍{{ i[2] }} |
+
+ {% endfor %}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{% endblock %}
+
+{% block echarts %}
+
+
{% endblock %}
\ No newline at end of file
diff --git a/views/spider_control.py b/views/spider_control.py
index bd062a7..67a445f 100644
--- a/views/spider_control.py
+++ b/views/spider_control.py
@@ -1,371 +1,371 @@
-from flask import Blueprint, jsonify, request, render_template
-import json
-import os
-from datetime import datetime
-import threading
-from queue import Queue
-import asyncio
-import websockets
-import logging
-from spider.spiderData import SpiderData
-from openai import OpenAI
-from anthropic import Anthropic
-import aiohttp
-from concurrent.futures import ThreadPoolExecutor
-from ratelimit import limits, sleep_and_retry
-from tenacity import retry, stop_after_attempt, wait_exponential
-
-# 创建蓝图
-spider_bp = Blueprint('spider', __name__)
-
-# 创建日志记录器
-logger = logging.getLogger('spider_control')
-logger.setLevel(logging.INFO)
-
-# 存储WebSocket连接的集合
-websocket_connections = set()
-
-# 创建消息队列
-message_queue = Queue()
-
-# 创建线程池
-thread_pool = ThreadPoolExecutor(max_workers=3)
-
-# 创建异步事件循环
-loop = asyncio.new_event_loop()
-asyncio.set_event_loop(loop)
-
-# 默认配置
-DEFAULT_CONFIG = {
- 'crawlDepth': 3,
- 'interval': 5,
- 'maxRetries': 3,
- 'timeout': 30,
- 'maxConcurrent': 2
-}
-
-# 限流装饰器
-@sleep_and_retry
-@limits(calls=100, period=60) # 每分钟最多100个请求
-def rate_limited_request():
- pass
-
-class SpiderWorker:
- def __init__(self, topics, parameters):
- self.topics = topics
- self.parameters = parameters
- self.total_topics = len(topics)
- self.completed_topics = 0
- self.spider = SpiderData()
- self.message_buffer = []
- self.message_buffer_size = 10
- self.semaphore = asyncio.Semaphore(parameters.get('maxConcurrent', DEFAULT_CONFIG['maxConcurrent']))
-
- async def send_message(self, message):
- """异步发送消息,使用缓冲区优化"""
- self.message_buffer.append(message)
- if len(self.message_buffer) >= self.message_buffer_size:
- await self.flush_messages()
-
- async def flush_messages(self):
- """刷新消息缓冲区"""
- if not self.message_buffer:
- return
-
- try:
- await broadcast_message(self.message_buffer)
- self.message_buffer.clear()
- except Exception as e:
- logger.error(f"发送消息失败: {e}")
-
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
- async def crawl_single_topic(self, topic):
- """爬取单个话题"""
- try:
- rate_limited_request()
-
- await self.send_message({
- 'type': 'log',
- 'message': f'开始爬取话题: {topic}'
- })
-
- async with self.semaphore:
- await asyncio.get_event_loop().run_in_executor(
- thread_pool,
- self.spider.crawl_topic,
- topic,
- self.parameters['crawlDepth'],
- self.parameters['interval'],
- self.parameters['maxRetries'],
- self.parameters['timeout']
- )
-
- self.completed_topics += 1
- progress = int((self.completed_topics / self.total_topics) * 100)
-
- await self.send_message({
- 'type': 'progress',
- 'value': progress
- })
-
- await self.send_message({
- 'type': 'log',
- 'message': f'话题 {topic} 爬取完成'
- })
-
- except Exception as e:
- logger.error(f"爬取话题 {topic} 失败: {e}")
- await self.send_message({
- 'type': 'log',
- 'message': f'爬取话题 {topic} 时出错: {str(e)}'
- })
- raise
-
- async def run(self):
- """运行爬虫任务"""
- try:
- tasks = [self.crawl_single_topic(topic) for topic in self.topics]
- await asyncio.gather(*tasks)
- await self.flush_messages()
-
- await self.send_message({
- 'type': 'log',
- 'message': '所有话题爬取完成'
- })
-
- except Exception as e:
- logger.error(f"爬虫任务执行出错: {e}")
- await self.send_message({
- 'type': 'log',
- 'message': f'爬虫任务执行出错: {str(e)}'
- })
- finally:
- await self.flush_messages()
-
-async def broadcast_message(messages):
- """广播消息到所有WebSocket连接"""
- if not websocket_connections:
- return
-
- for websocket in websocket_connections.copy():
- try:
- if isinstance(messages, list):
- for message in messages:
- await websocket.send(json.dumps(message))
- else:
- await websocket.send(json.dumps(messages))
- except websockets.exceptions.ConnectionClosed:
- websocket_connections.remove(websocket)
- except Exception as e:
- logger.error(f"发送WebSocket消息失败: {e}")
- websocket_connections.remove(websocket)
-
-@spider_bp.route('/spider/control')
-def spider_control():
- """渲染爬虫控制页面"""
- return render_template('spider_control.html')
-
-@spider_bp.route('/api/spider/start', methods=['POST'])
-async def start_spider():
- """启动爬虫任务"""
- try:
- data = request.get_json()
- topics = data.get('topics', [])
- parameters = {**DEFAULT_CONFIG, **data.get('parameters', {})}
-
- if not topics:
- return jsonify({
- 'success': False,
- 'message': '请选择至少一个话题'
- })
-
- # 创建爬虫工作器
- worker = SpiderWorker(topics, parameters)
-
- # 在事件循环中运行爬虫任务
- asyncio.create_task(worker.run())
-
- return jsonify({
- 'success': True,
- 'message': '爬虫任务已启动'
- })
-
- except Exception as e:
- logger.error(f"启动爬虫任务失败: {e}")
- return jsonify({
- 'success': False,
- 'message': str(e)
- })
-
-@spider_bp.route('/api/spider/save-config', methods=['POST'])
-def save_spider_config():
- """保存爬虫配置"""
- try:
- config = request.get_json()
- if save_config(config):
- return jsonify({
- 'success': True,
- 'message': '配置保存成功'
- })
- else:
- return jsonify({
- 'success': False,
- 'message': '配置保存失败'
- })
- except Exception as e:
- logger.error(f"保存配置失败: {e}")
- return jsonify({
- 'success': False,
- 'message': str(e)
- })
-
-@spider_bp.websocket('/ws/spider-status')
-async def spider_status_socket(websocket):
- """WebSocket连接处理"""
- try:
- websocket_connections.add(websocket)
- logging.info("新的WebSocket连接已建立")
-
- try:
- while True:
- # 等待消息,保持连接活跃
- message = await websocket.receive()
- if message is None:
- break
- except websockets.exceptions.ConnectionClosed:
- logging.info("WebSocket连接已关闭")
- finally:
- websocket_connections.remove(websocket)
- logging.info("WebSocket连接已移除")
- except Exception as e:
- logger.error(f"WebSocket连接处理失败: {e}")
- if websocket in websocket_connections:
- websocket_connections.remove(websocket)
-
-def get_ai_client():
- """获取可用的AI客户端"""
- # 按优先级尝试不同的AI服务
- if os.getenv('ANTHROPIC_API_KEY'):
- return {
- 'type': 'anthropic',
- 'client': Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
- }
- elif os.getenv('OPENAI_API_KEY'):
- return {
- 'type': 'openai',
- 'client': OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
- }
- else:
- raise ValueError("未找到可用的AI API密钥")
-
-def parse_ai_response(response_text):
- """解析AI响应中的JSON配置"""
- try:
- # 查找JSON内容
- start = response_text.find('{')
- end = response_text.rfind('}') + 1
- if start == -1 or end == 0:
- raise ValueError("未找到有效的JSON配置")
-
- json_str = response_text[start:end]
- config = json.loads(json_str)
-
- # 验证配置格式
- if not isinstance(config.get('topics'), list):
- raise ValueError("配置必须包含话题列表")
-
- parameters = config.get('parameters', {})
- if not all(key in parameters for key in ['crawlDepth', 'interval', 'maxRetries', 'timeout']):
- raise ValueError("配置缺少必要的参数")
-
- # 提取建议文本(JSON之前的部分)
- suggestion = response_text[:start].strip()
-
- return config, suggestion
- except Exception as e:
- raise ValueError(f"解析AI响应失败: {str(e)}")
-
-@spider_bp.route('/api/spider/ai-config', methods=['POST'])
-def generate_ai_config():
- """使用AI生成爬虫配置"""
- try:
- prompt = request.json.get('prompt', '')
- if not prompt:
- return jsonify({
- 'success': False,
- 'message': '请提供爬虫需求描述'
- })
-
- # 构建AI提示
- system_prompt = """你是一个专业的爬虫配置助手。请根据用户的自然语言描述,生成合适的微博爬虫配置。
-配置应包含以下内容:
-1. 要爬取的话题列表
-2. 爬虫参数(爬取深度、间隔时间、重试次数、超时时间)
-
-请先用通俗易懂的语言解释你的配置建议,然后在最后提供一个JSON格式的具体配置。
-注意:
-- 爬取深度(crawlDepth)范围:1-10页
-- 间隔时间(interval)范围:3-30秒
-- 重试次数(maxRetries)范围:1-5次
-- 超时时间(timeout)范围:10-60秒
-- 所有参数都必须是整数
-
-示例输出格式:
-根据您的需求,我建议...
-
-{
- "topics": ["话题1", "话题2"],
- "parameters": {
- "crawlDepth": 5,
- "interval": 5,
- "maxRetries": 3,
- "timeout": 30
- }
-}"""
-
- # 获取AI客户端
- ai = get_ai_client()
-
- try:
- if ai['type'] == 'anthropic':
- response = ai['client'].messages.create(
- model="claude-3-sonnet-20240229",
- max_tokens=1000,
- messages=[
- {"role": "system", "content": system_prompt},
- {"role": "user", "content": prompt}
- ]
- )
- response_text = response.content[0].text
- else: # OpenAI
- response = ai['client'].chat.completions.create(
- model="gpt-3.5-turbo",
- messages=[
- {"role": "system", "content": system_prompt},
- {"role": "user", "content": prompt}
- ]
- )
- response_text = response.choices[0].message.content
-
- # 解析AI响应
- config, suggestion = parse_ai_response(response_text)
-
- return jsonify({
- 'success': True,
- 'config': config,
- 'suggestion': suggestion
- })
-
- except Exception as e:
- logger.error(f"AI服务调用失败: {e}")
- return jsonify({
- 'success': False,
- 'message': f"AI配置生成失败: {str(e)}"
- })
-
- except Exception as e:
- logger.error(f"生成配置失败: {e}")
- return jsonify({
- 'success': False,
- 'message': str(e)
+from flask import Blueprint, jsonify, request, render_template
+import json
+import os
+from datetime import datetime
+import threading
+from queue import Queue
+import asyncio
+import websockets
+import logging
+from spider.spiderData import SpiderData
+from openai import OpenAI
+from anthropic import Anthropic
+import aiohttp
+from concurrent.futures import ThreadPoolExecutor
+from ratelimit import limits, sleep_and_retry
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+# 创建蓝图
+spider_bp = Blueprint('spider', __name__)
+
+# 创建日志记录器
+logger = logging.getLogger('spider_control')
+logger.setLevel(logging.INFO)
+
+# 存储WebSocket连接的集合
+websocket_connections = set()
+
+# 创建消息队列
+message_queue = Queue()
+
+# 创建线程池
+thread_pool = ThreadPoolExecutor(max_workers=3)
+
+# 创建异步事件循环
+loop = asyncio.new_event_loop()
+asyncio.set_event_loop(loop)
+
+# 默认配置
+DEFAULT_CONFIG = {
+ 'crawlDepth': 3,
+ 'interval': 5,
+ 'maxRetries': 3,
+ 'timeout': 30,
+ 'maxConcurrent': 2
+}
+
+# 限流装饰器
+@sleep_and_retry
+@limits(calls=100, period=60) # 每分钟最多100个请求
+def rate_limited_request():
+ pass
+
+class SpiderWorker:
+ def __init__(self, topics, parameters):
+ self.topics = topics
+ self.parameters = parameters
+ self.total_topics = len(topics)
+ self.completed_topics = 0
+ self.spider = SpiderData()
+ self.message_buffer = []
+ self.message_buffer_size = 10
+ self.semaphore = asyncio.Semaphore(parameters.get('maxConcurrent', DEFAULT_CONFIG['maxConcurrent']))
+
+ async def send_message(self, message):
+ """异步发送消息,使用缓冲区优化"""
+ self.message_buffer.append(message)
+ if len(self.message_buffer) >= self.message_buffer_size:
+ await self.flush_messages()
+
+ async def flush_messages(self):
+ """刷新消息缓冲区"""
+ if not self.message_buffer:
+ return
+
+ try:
+ await broadcast_message(self.message_buffer)
+ self.message_buffer.clear()
+ except Exception as e:
+ logger.error(f"发送消息失败: {e}")
+
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
+ async def crawl_single_topic(self, topic):
+ """爬取单个话题"""
+ try:
+ rate_limited_request()
+
+ await self.send_message({
+ 'type': 'log',
+ 'message': f'开始爬取话题: {topic}'
+ })
+
+ async with self.semaphore:
+ await asyncio.get_event_loop().run_in_executor(
+ thread_pool,
+ self.spider.crawl_topic,
+ topic,
+ self.parameters['crawlDepth'],
+ self.parameters['interval'],
+ self.parameters['maxRetries'],
+ self.parameters['timeout']
+ )
+
+ self.completed_topics += 1
+ progress = int((self.completed_topics / self.total_topics) * 100)
+
+ await self.send_message({
+ 'type': 'progress',
+ 'value': progress
+ })
+
+ await self.send_message({
+ 'type': 'log',
+ 'message': f'话题 {topic} 爬取完成'
+ })
+
+ except Exception as e:
+ logger.error(f"爬取话题 {topic} 失败: {e}")
+ await self.send_message({
+ 'type': 'log',
+ 'message': f'爬取话题 {topic} 时出错: {str(e)}'
+ })
+ raise
+
+ async def run(self):
+ """运行爬虫任务"""
+ try:
+ tasks = [self.crawl_single_topic(topic) for topic in self.topics]
+ await asyncio.gather(*tasks)
+ await self.flush_messages()
+
+ await self.send_message({
+ 'type': 'log',
+ 'message': '所有话题爬取完成'
+ })
+
+ except Exception as e:
+ logger.error(f"爬虫任务执行出错: {e}")
+ await self.send_message({
+ 'type': 'log',
+ 'message': f'爬虫任务执行出错: {str(e)}'
+ })
+ finally:
+ await self.flush_messages()
+
+async def broadcast_message(messages):
+ """广播消息到所有WebSocket连接"""
+ if not websocket_connections:
+ return
+
+ for websocket in websocket_connections.copy():
+ try:
+ if isinstance(messages, list):
+ for message in messages:
+ await websocket.send(json.dumps(message))
+ else:
+ await websocket.send(json.dumps(messages))
+ except websockets.exceptions.ConnectionClosed:
+ websocket_connections.remove(websocket)
+ except Exception as e:
+ logger.error(f"发送WebSocket消息失败: {e}")
+ websocket_connections.remove(websocket)
+
+@spider_bp.route('/spider/control')
+def spider_control():
+ """渲染爬虫控制页面"""
+ return render_template('spider_control.html')
+
+@spider_bp.route('/api/spider/start', methods=['POST'])
+async def start_spider():
+ """启动爬虫任务"""
+ try:
+ data = request.get_json()
+ topics = data.get('topics', [])
+ parameters = {**DEFAULT_CONFIG, **data.get('parameters', {})}
+
+ if not topics:
+ return jsonify({
+ 'success': False,
+ 'message': '请选择至少一个话题'
+ })
+
+ # 创建爬虫工作器
+ worker = SpiderWorker(topics, parameters)
+
+ # 在事件循环中运行爬虫任务
+ asyncio.create_task(worker.run())
+
+ return jsonify({
+ 'success': True,
+ 'message': '爬虫任务已启动'
+ })
+
+ except Exception as e:
+ logger.error(f"启动爬虫任务失败: {e}")
+ return jsonify({
+ 'success': False,
+ 'message': str(e)
+ })
+
+@spider_bp.route('/api/spider/save-config', methods=['POST'])
+def save_spider_config():
+ """保存爬虫配置"""
+ try:
+ config = request.get_json()
+ if save_config(config):
+ return jsonify({
+ 'success': True,
+ 'message': '配置保存成功'
+ })
+ else:
+ return jsonify({
+ 'success': False,
+ 'message': '配置保存失败'
+ })
+ except Exception as e:
+ logger.error(f"保存配置失败: {e}")
+ return jsonify({
+ 'success': False,
+ 'message': str(e)
+ })
+
+@spider_bp.websocket('/ws/spider-status')
+async def spider_status_socket(websocket):
+ """WebSocket连接处理"""
+ try:
+ websocket_connections.add(websocket)
+ logging.info("新的WebSocket连接已建立")
+
+ try:
+ while True:
+ # 等待消息,保持连接活跃
+ message = await websocket.receive()
+ if message is None:
+ break
+ except websockets.exceptions.ConnectionClosed:
+ logging.info("WebSocket连接已关闭")
+ finally:
+ websocket_connections.remove(websocket)
+ logging.info("WebSocket连接已移除")
+ except Exception as e:
+ logger.error(f"WebSocket连接处理失败: {e}")
+ if websocket in websocket_connections:
+ websocket_connections.remove(websocket)
+
+def get_ai_client():
+ """获取可用的AI客户端"""
+ # 按优先级尝试不同的AI服务
+ if os.getenv('ANTHROPIC_API_KEY'):
+ return {
+ 'type': 'anthropic',
+ 'client': Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
+ }
+ elif os.getenv('OPENAI_API_KEY'):
+ return {
+ 'type': 'openai',
+ 'client': OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
+ }
+ else:
+ raise ValueError("未找到可用的AI API密钥")
+
+def parse_ai_response(response_text):
+ """解析AI响应中的JSON配置"""
+ try:
+ # 查找JSON内容
+ start = response_text.find('{')
+ end = response_text.rfind('}') + 1
+ if start == -1 or end == 0:
+ raise ValueError("未找到有效的JSON配置")
+
+ json_str = response_text[start:end]
+ config = json.loads(json_str)
+
+ # 验证配置格式
+ if not isinstance(config.get('topics'), list):
+ raise ValueError("配置必须包含话题列表")
+
+ parameters = config.get('parameters', {})
+ if not all(key in parameters for key in ['crawlDepth', 'interval', 'maxRetries', 'timeout']):
+ raise ValueError("配置缺少必要的参数")
+
+ # 提取建议文本(JSON之前的部分)
+ suggestion = response_text[:start].strip()
+
+ return config, suggestion
+ except Exception as e:
+ raise ValueError(f"解析AI响应失败: {str(e)}")
+
+@spider_bp.route('/api/spider/ai-config', methods=['POST'])
+def generate_ai_config():
+ """使用AI生成爬虫配置"""
+ try:
+ prompt = request.json.get('prompt', '')
+ if not prompt:
+ return jsonify({
+ 'success': False,
+ 'message': '请提供爬虫需求描述'
+ })
+
+ # 构建AI提示
+ system_prompt = """你是一个专业的爬虫配置助手。请根据用户的自然语言描述,生成合适的微博爬虫配置。
+配置应包含以下内容:
+1. 要爬取的话题列表
+2. 爬虫参数(爬取深度、间隔时间、重试次数、超时时间)
+
+请先用通俗易懂的语言解释你的配置建议,然后在最后提供一个JSON格式的具体配置。
+注意:
+- 爬取深度(crawlDepth)范围:1-10页
+- 间隔时间(interval)范围:3-30秒
+- 重试次数(maxRetries)范围:1-5次
+- 超时时间(timeout)范围:10-60秒
+- 所有参数都必须是整数
+
+示例输出格式:
+根据您的需求,我建议...
+
+{
+ "topics": ["话题1", "话题2"],
+ "parameters": {
+ "crawlDepth": 5,
+ "interval": 5,
+ "maxRetries": 3,
+ "timeout": 30
+ }
+}"""
+
+ # 获取AI客户端
+ ai = get_ai_client()
+
+ try:
+ if ai['type'] == 'anthropic':
+ response = ai['client'].messages.create(
+ model="claude-3-sonnet-20240229",
+ max_tokens=1000,
+ messages=[
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": prompt}
+ ]
+ )
+ response_text = response.content[0].text
+ else: # OpenAI
+ response = ai['client'].chat.completions.create(
+ model="gpt-3.5-turbo",
+ messages=[
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": prompt}
+ ]
+ )
+ response_text = response.choices[0].message.content
+
+ # 解析AI响应
+ config, suggestion = parse_ai_response(response_text)
+
+ return jsonify({
+ 'success': True,
+ 'config': config,
+ 'suggestion': suggestion
+ })
+
+ except Exception as e:
+ logger.error(f"AI服务调用失败: {e}")
+ return jsonify({
+ 'success': False,
+ 'message': f"AI配置生成失败: {str(e)}"
+ })
+
+ except Exception as e:
+ logger.error(f"生成配置失败: {e}")
+ return jsonify({
+ 'success': False,
+ 'message': str(e)
})
\ No newline at end of file
diff --git a/views/user/__pycache__/user.cpython-38.pyc b/views/user/__pycache__/user.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..696375f953091c955323d386bbc8ad72b33ee8fb
GIT binary patch
literal 2249
zcmbVO-EUMy6rZ_Yd-v`~zkpH^Tt84&OG~QaM+ia1j|WrGLL{5&X1g=nz4YF_>rB
zP5WSqga?Vn5EI|FBpQqlP1J<{z&~PMO`(18A811GoVhHtgePx1XFkr{Gv}Q7{m$L<
zrBXoP`SFu)TmSlm{DYI7j|Y=e(Bv%;j4&FJTGrDVg}D`3v0bxcr{=_N&5ga97w2lZ
z*su8%_t{ZC4r+lJJ5eDn){2yHo4LHiJeFhr4X0M-<%
&jMDMw^;Fp#Y(Jvm8{a*
zpwSHM&D6VsYf1PrvwEI!ikPA8|xs+kjsZ#BNLM+0d#Dz%2=UjG^j^x@&B^OKX
zBz8LAwsJD^`j$HC4qhvnpX#3+2&uQoEnVEB7Iu~=*PMB<>
z*Zs$afdlOWX!0u%id>|OE?8n%S-m1Mqm80BcvP@TD$BL!A
zW$5K;m18-418n%rA0sNy{1Ze4EN|=($XPHg%5ci|sBy5~=&-ur^PyB+Ote)TH7RKO
zj|Br4+6**#3WOp(aIOcAtg^R@RF0T$6GUFINqJs
zr3+yrxsdYD!l`3ZZ*04%Y8`!B``w0=mlMIXkGbwNVqVSZT)QFLQ8=eP*=|g|d0czE
z(@YrGUfN}Fr2@UvPAd_y&bRp$7S6-{RxN=**JXUWemPX_x@<)1HzE$g^7g0AcW2MT
z8FKdcTgTpcefBh$3o7Z(e!;`Jq<%J?i^AraZrA~2)ZgngqD2|XnMKG@JbNkRm*p%v
zXv9Q!QRiD>1dh~OojqV&4uc?cknW=nwH{c|92z`W+HcZ5aUx2ZjYyuHc=Xctu;dY-
zweo7EC6c&efaYau>0Yq4`PZ)--`}}={ilufYxjEJY~B2MbA4T)W+|BLHs<+cJAqsk
zvQ&5(#sor20LLLY5nQhJvIDgp@s7x&@);C46bQkpEuMo;yLi5=l`?e5wAloTn6zc$
zklAFk2%{`qE1gq)-J^3m7!+_@qfeM|07;JM!et4E5GNj
z5rCzwf?k37y&}sSm^#Z&uLP%*SpYD7xC$_}S;4Fp0jf@KV2!B3Wt)}8pc=9AXf_+b
z*(iB5AI#?S_NoOIhS<;}W({Y|5U^OeF?>36o_H%C^%B*4XJP_
z_MvJN1;#AKP#B;efRVPP5*gZElS=V2tkwz-xoCJ2pqaW#yAc=9;@em;c0eD#E?Kce
z?%J3+W3YEAhcTW>)h_0
literal 0
HcmV?d00001
diff --git a/views/user/templates/login_and_register.html b/views/user/templates/login_and_register.html
index 0102aa0..644f712 100644
--- a/views/user/templates/login_and_register.html
+++ b/views/user/templates/login_and_register.html
@@ -5,6 +5,7 @@
微博舆情分析系统 | 登录
+
+
+ 切换语言
+
+