From b4f14ae3e7e8ee3e44c3e8713ab9a4d9c110e148 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=BF=E5=BD=ADBaileys?= <142196225+apeng0406@users.noreply.github.com> Date: Sun, 9 Mar 2025 19:09:00 +0800 Subject: [PATCH] Bilingual support, with full system support for Chinese and English switching. --- app.py | 18 +- static/js/i18n.js | 239 +++ templates/404.html | 85 +- templates/error.html | 89 +- templates/spider_control.html | 760 +++---- views/page/__pycache__/page.cpython-38.pyc | Bin 0 -> 5041 bytes views/page/page.py | 1096 +++++----- views/page/templates/articleChar.html | 688 +++--- views/page/templates/articleContentCloud.html | 286 +-- views/page/templates/base_page.html | 981 ++++----- views/page/templates/commentChar.html | 632 +++--- views/page/templates/hotWord.html | 1004 ++++----- views/page/templates/index.html | 57 +- views/page/templates/ipChar.html | 548 ++--- views/page/templates/tableData.html | 451 ++-- views/page/templates/yuqingChar.html | 800 +++---- views/page/templates/yuqingpredict.html | 1848 ++++++++--------- views/spider_control.py | 740 +++---- views/user/__pycache__/user.cpython-38.pyc | Bin 0 -> 2249 bytes views/user/templates/login_and_register.html | 23 + views/user/user.py | 744 +++---- 21 files changed, 5725 insertions(+), 5364 deletions(-) create mode 100644 static/js/i18n.js create mode 100644 views/page/__pycache__/page.cpython-38.pyc create mode 100644 views/user/__pycache__/user.cpython-38.pyc diff --git a/app.py b/app.py index 7b18720..8bf6bc3 100644 --- a/app.py +++ b/app.py @@ -142,15 +142,27 @@ def not_found_error(error): @app.errorhandler(500) def internal_error(error): - return render_template('500.html'), 500 + return render_template('error.html', + error_code=500, + error_title='服务器错误', + error_message='服务器遇到了一个问题,请稍后再试。', + error_i18n_key='serverError'), 500 @app.errorhandler(403) def forbidden_error(error): - return render_template('403.html'), 403 + return render_template('error.html', + error_code=403, + error_title='禁止访问', + error_message='您没有权限访问此页面。', + error_i18n_key='forbidden'), 403 @app.errorhandler(400) def bad_request_error(error): - return render_template('400.html'), 400 + return render_template('error.html', + error_code=400, + error_title='错误请求', + error_message='服务器无法理解您的请求。', + error_i18n_key='badRequest'), 400 # 数据库配置 DB_CONFIG = { diff --git a/static/js/i18n.js b/static/js/i18n.js new file mode 100644 index 0000000..db91b8c --- /dev/null +++ b/static/js/i18n.js @@ -0,0 +1,239 @@ +// 多语言支持文件 +const translations = { + 'zh': { + // 导航菜单 + 'home': '首页', + 'hotWord': '热词统计', + 'tableData': '微博舆情统计', + 'articleChar': '文章分析', + 'ipChar': 'IP分析', + 'commentChar': '评论分析', + 'yuqingChar': '舆情分析', + 'yuqingpredict': '舆情预测', + 'articleCloud': '文章内容词云图', + 'dataVisualization': '数据可视化', + 'weiboSystem': '微博舆情分析系统', + 'wordCloud': '词云图', + + // 首页 + 'articleCount': '文章个数', + 'articleCrawlRule': '文章爬取规则', + 'nextCrawlTime': '下次爬取时间', + 'articlePublishTimeCount': '文章发布时间个数', + 'commentLikeCountTopFore': '评论点赞量 Top Fore', + 'viewAll': '查看全部', + 'articleTypeRatio': '文章类型占比', + 'commentUserWordCloud': '评论用户名词云图', + 'commentUserTimeRatio': '评论用户时间占比', + + // 热词页面 + 'hotWordStatistics': '热词统计页', + 'hotWordCloud': '热词词云图', + 'hotWordRanking': '热词查询表格', + 'wordFrequency': '词频', + 'hotWordSelection': '热词选择', + 'hotWordName': '热词名称', + 'occurrenceCount': '出现次数', + 'hotWordSentiment': '热词情感', + 'hotWordYearTrend': '热词年份变化趋势', + 'queryCommentsByHotWord': '根据选择的热词从而查询出评论数据', + 'hotWordTimeDistribution': '热词出现时间分布个数', + + // 舆情分析页面 + 'hotWordSentimentTrendBar': '热词情感趋势柱状图', + 'hotWordSentimentTrendTree': '热词情感趋势树形图', + 'articleCommentSentimentTrendPie': '文章内容与评论内容舆情趋势饼状图', + + // 舆情预测页面 + 'topicStatisticsPage': '话题统计页', + + // 文章分析页面 + 'articleCharPage': '文章分析页', + 'typeSelection': '类型选择', + 'articleLikeAnalysis': '文章点赞量分析 👍', + 'articleCommentAnalysis': '文章评论量分析 🔥', + 'articleForwardAnalysis': '文章转发量分析 🥇', + 'likeRangeStatistics': '点赞区间统计', + 'rangeCount': '区间个数', + + // 评论分析页面 + 'commentLikeRangeChart': '评论点赞次数区间图', + 'commentUserGenderRatio': '评论用户性别占比', + 'userCommentWordCloud': '用户评论词云图', + + // IP分析页面 + 'articleIpLocationAnalysis': '文章IP位置分析图', + 'commentIpLocationAnalysis': '评论IP位置分析图', + + // 评论相关 + 'commentUser': '评论用户', + 'commentGender': '评论性别', + 'commentAddress': '评论地址', + 'commentContent': '评论内容', + 'likeCount': '点赞量', + + // 微博舆情统计页面 + 'weiboArticleStatTable': '微博文章统计表格 - 舆情 情感分类', + 'sentimentClassification': '情感分类', + 'articleId': '文章ID', + 'articleIp': '文章IP', + 'articleTitle': '文章标题', + 'articleLike': '点赞量', + 'articleForward': '转发量', + 'articleComment': '评论量', + 'articleType': '类型', + 'articleContent': '内容', + 'articleTime': '发布时间', + + // 通用 + 'switchToEnglish': '切换到英文', + 'switchToChinese': '切换到中文', + 'semester': '网安小学期', + + // 错误页面 + 'pageNotFound': '页面未找到', + 'backToHome': '返回首页', + 'serverError': '服务器错误', + 'forbidden': '禁止访问', + 'badRequest': '错误请求' + }, + 'en': { + // Navigation menu + 'home': 'Home', + 'hotWord': 'Hot Words', + 'tableData': 'Weibo Public Opinion Stats', + 'articleChar': 'Article Analysis', + 'ipChar': 'IP Analysis', + 'commentChar': 'Comment Analysis', + 'yuqingChar': 'Public Opinion Analysis', + 'yuqingpredict': 'Opinion Prediction', + 'articleCloud': 'Article Content Word Cloud', + 'dataVisualization': 'Data Visualization', + 'weiboSystem': 'Weibo Public Opinion Analysis System', + 'wordCloud': 'Word Cloud', + + // Home page + 'articleCount': 'Article Count', + 'articleCrawlRule': 'Article Crawl Rule', + 'nextCrawlTime': 'Next Crawl Time', + 'articlePublishTimeCount': 'Article Publish Time Count', + 'commentLikeCountTopFore': 'Comment Like Count Top Four', + 'viewAll': 'View All', + 'articleTypeRatio': 'Article Type Ratio', + 'commentUserWordCloud': 'Comment User Word Cloud', + 'commentUserTimeRatio': 'Comment User Time Ratio', + + // Hot word page + 'hotWordStatistics': 'Hot Word Statistics', + 'hotWordCloud': 'Hot Word Cloud', + 'hotWordRanking': 'Hot Word Ranking', + 'wordFrequency': 'Word Frequency', + 'hotWordSelection': 'Hot Word Selection', + 'hotWordName': 'Hot Word Name', + 'occurrenceCount': 'Occurrence Count', + 'hotWordSentiment': 'Hot Word Sentiment', + 'hotWordYearTrend': 'Hot Word Year Trend', + 'queryCommentsByHotWord': 'Query comments based on selected hot word', + 'hotWordTimeDistribution': 'Hot Word Time Distribution Count', + + // Public opinion analysis page + 'hotWordSentimentTrendBar': 'Hot Word Sentiment Trend Bar Chart', + 'hotWordSentimentTrendTree': 'Hot Word Sentiment Trend Tree Chart', + 'articleCommentSentimentTrendPie': 'Article and Comment Sentiment Trend Pie Chart', + + // Opinion prediction page + 'topicStatisticsPage': 'Topic Statistics Page', + + // Article analysis page + 'articleCharPage': 'Article Analysis Page', + 'typeSelection': 'Type Selection', + 'articleLikeAnalysis': 'Article Like Analysis 👍', + 'articleCommentAnalysis': 'Article Comment Analysis 🔥', + 'articleForwardAnalysis': 'Article Forward Analysis 🥇', + 'likeRangeStatistics': 'Like Range Statistics', + 'rangeCount': 'Range Count', + + // Comment analysis page + 'commentLikeRangeChart': 'Comment Like Range Chart', + 'commentUserGenderRatio': 'Comment User Gender Ratio', + 'userCommentWordCloud': 'User Comment Word Cloud', + + // IP analysis page + 'articleIpLocationAnalysis': 'Article IP Location Analysis', + 'commentIpLocationAnalysis': 'Comment IP Location Analysis', + + // Comment related + 'commentUser': 'Comment User', + 'commentGender': 'Gender', + 'commentAddress': 'Address', + 'commentContent': 'Content', + 'likeCount': 'Likes', + + // Weibo public opinion stats page + 'weiboArticleStatTable': 'Weibo Article Statistics Table - Sentiment Classification', + 'sentimentClassification': 'Sentiment Classification', + 'articleId': 'Article ID', + 'articleIp': 'Article IP', + 'articleTitle': 'Article Title', + 'articleLike': 'Likes', + 'articleForward': 'Forwards', + 'articleComment': 'Comments', + 'articleType': 'Type', + 'articleContent': 'Content', + 'articleTime': 'Publish Time', + + // Common + 'switchToEnglish': 'Switch to English', + 'switchToChinese': 'Switch to Chinese', + 'semester': 'Network Security Semester', + + // Error pages + 'pageNotFound': 'Page Not Found', + 'backToHome': 'Back to Home', + 'serverError': 'Server Error', + 'forbidden': 'Forbidden', + 'badRequest': 'Bad Request' + } +}; + +// 获取当前语言 +function getCurrentLanguage() { + return localStorage.getItem('language') || 'zh'; +} + +// 设置语言 +function setLanguage(lang) { + localStorage.setItem('language', lang); + location.reload(); +} + +// 翻译函数 +function t(key) { + const lang = getCurrentLanguage(); + return translations[lang][key] || key; +} + +// 页面加载时应用翻译 +document.addEventListener('DOMContentLoaded', function() { + // 应用当前语言 + applyTranslations(); + + // 添加语言切换按钮事件 + const langSwitcher = document.getElementById('language-switcher'); + if (langSwitcher) { + langSwitcher.addEventListener('click', function() { + const currentLang = getCurrentLanguage(); + const newLang = currentLang === 'zh' ? 'en' : 'zh'; + setLanguage(newLang); + }); + } +}); + +// 应用翻译到页面元素 +function applyTranslations() { + const elements = document.querySelectorAll('[data-i18n]'); + elements.forEach(el => { + const key = el.getAttribute('data-i18n'); + el.textContent = t(key); + }); +} \ No newline at end of file diff --git a/templates/404.html b/templates/404.html index 4fb4dd5..5357503 100644 --- a/templates/404.html +++ b/templates/404.html @@ -1,29 +1,58 @@ - - - - - - 404页面 - - - - - -
-
-
-
-
- - -

噢!该页面没有找到..

-

本次请求没有任何反应.

- 回到登录页 -
-
-
-
-
- / - + + + + + + 404 - 页面未找到 + + + + + + +
+
404
+
页面未找到
+

您请求的页面不存在或已被移除。

+ 返回首页 +
+ + + + \ No newline at end of file diff --git a/templates/error.html b/templates/error.html index 7275342..79f366d 100644 --- a/templates/error.html +++ b/templates/error.html @@ -1,33 +1,58 @@ - - - - - - 错误页面 - - - - - - -
-
-
-
-
-
- -

{{ errorMsg }}

-

请回去再次检查问题并且修改问题.

-

回到登录页

-
-
-
-
-
-
- - - - + + + + + + {{ error_code }} - {{ error_title }} + + + + + + +
+
{{ error_code }}
+
{{ error_title }}
+

{{ error_message }}

+ 返回首页 +
+ + + + \ No newline at end of file diff --git a/templates/spider_control.html b/templates/spider_control.html index af29ce7..37aeefe 100644 --- a/templates/spider_control.html +++ b/templates/spider_control.html @@ -1,381 +1,381 @@ - - - - - - 爬虫控制面板 - - - - - -
-

爬虫控制面板

- - -
-
-
选择话题类型
-
-
-
- -
- -
-
添加自定义话题
-
- - -
-
- -
-
已选择的话题:
-
- -
-
-
-
- - -
-
-
爬虫参数配置
-
-
-
-
-
- - - 每个话题爬取的页数(1-10) -
-
-
-
- - - 每次请求之间的间隔时间 -
-
-
- -
-
-
- - -
-
-
-
- - -
-
-
-
-
- - -
-
-
- AI配置助手 -
-
-
-
- - -
-
- -
- - -
-
- -
-
- - -
- - -
- - -
-
-
爬虫状态
-
-
-
-
-
-
-

-                
-
-
-
- - - - - + + + + + + 爬虫控制面板 + + + + + +
+

爬虫控制面板

+ + +
+
+
选择话题类型
+
+
+
+ +
+ +
+
添加自定义话题
+
+ + +
+
+ +
+
已选择的话题:
+
+ +
+
+
+
+ + +
+
+
爬虫参数配置
+
+
+
+
+
+ + + 每个话题爬取的页数(1-10) +
+
+
+
+ + + 每次请求之间的间隔时间 +
+
+
+ +
+
+
+ + +
+
+
+
+ + +
+
+
+
+
+ + +
+
+
+ AI配置助手 +
+
+
+
+ + +
+
+ +
+ + +
+
+ +
+
+ + +
+ + +
+ + +
+
+
爬虫状态
+
+
+
+
+
+
+

+                
+
+
+
+ + + + + \ No newline at end of file diff --git a/views/page/__pycache__/page.cpython-38.pyc b/views/page/__pycache__/page.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eae39e81bbd6e0d56dfa87cb627217d83a0ecc61 GIT binary patch literal 5041 zcma)A-ESOM6`z@%nVtRc_eX5UPSWpgfjCJ^3njF1oSGDjYZ_OjBMWQ2_u3wN_M>-Z z8}Ditkb?S9DX+W%2}nR6`o;tA{1^8DA%P1015m4qNSt%#&euAXy4IYz_uS9p@ zT)#6mR@UIR^Y;%Ae!Hw`|HMl6XG7%%Ji!fJ)0oC|Uu){F-ZWgJS#S$Y(>0rxYc*}x z*0J62i%rLMs9f+%JE$x8W%aJm+w`l=F?Xyv?vAT|6RLeuy{G7HMg4S*Tg>LutjHWz zx^KEOeCEL{D?ij(g;js0-P7GU(u{#-oK28so-~u7nPStVIY*iq(9E(q(kzf>9yI6J z0%;aWvk00ccAhj#q*(^d3cEm>^Q5^5noH~n(kzqaNzgpSo+ix-X`TViv+Oz2Twu@N z(Q38N5Zjt9tqs2(oXBFpgTQOIcY#>HO9n;pL%a_I=pJi!~GEZy|Gyd%6;D2sx> z-{nDA(`E5at9^HCa~q2K3pGQIAM$X`_iwkuciVymvKrNQ+8wVE)O3MXmS(4Z$otkR zzGNw0D}dqUuJHFdf**LNaw2ZnJ81i`DHw3nZt@0hJ^q!TP=#k5o?r_^s6Eh`{!kxk zp>a>!Ee!RcF)W0Id)m+pP0$!aYiNfS)Qh1F(g};q7-$21UJ; zWo}8Q8*tI8H+jG80XCNeFxZSc0x}zI!KHP2s~*;+)uUG_>~;A15zH>f8iIo*)ufn7ccHEJfR1}>t-9X}yx>kR05a@;;PJb`F0zKit%Rs!_&!kU21-OfxxQg_RPb+C)`{mG75;rj;$CqX6Hi$)fr;sr?(O@cYN>|3&xtufKls z=`a68#ZN!^!)L$wJr)1+m)}A0%c>eg?Ect^*j-V|O)m)LIO7NPt`CGGBL;GRf)4=5u3;XSmZ}U5xbdnaTvH`L^PTi&XOWBRky+1Ii4cZ zg@Z%OxU5I#$(@A>6zw-WKjebCWjXpBF8@4=8j2+p=TW=>LRufx!M}-bpaOXsi4`ne z1c8Htmq8%KY~2Bx;UB(DpqoQF)SL8-5bq0`qhCVWkt@vc4!k@Xb3`FPZ|)y%JX*YizmrINQ?T$O%Q86jyMC zt0=Ca$Q$@^mX%Iyk2hdI@GOYPys>f1x@n?G&zSSu(6e7&4eR?pN22BIiQkL|ZXF)t zOlnVNfEO{23(A5Qz$56PiCzUQNH#LS+V*`DPPMYkhBOcS`r%H0JXtP^L50{h@h)UV z*#@HeVa?8fk0*_ACm+8P4?u%P5Jq!}?_j@IP>h@b)cp*e09`wRWa?Ed!}Uirs$X4I z0TDw)4wPJ^?!X)=C>W`SdPH)7AtAN6l9pqF1GNhf$;r8@SROmrBE)isnF-C)SZ+}) zw=+bSCI;C4WM;Ryxb9#x(LIKA2NWs2rG0u8?Jn(}dY@iXb$Vx+?-JG3eCY{rXMoiz z4ky`wJC`bxAPxs4)Aw!OX+!=1Cob#UUIj$O_fVv?zmC=Kqj(L)>nQSOm6Z$t$IzZR z1K`@U=p3Mn8#w{G@wvDG{rkmLuXD!MxdVWwYSh-0k3r~+GcU-tknm5;s8yyjOcUg9RE1PaiphZ$crD1#lRTKIW%|6pujx5V10dOlGE!h-yX= zKMl7KK{0jT_Q)aSkeEvSCj$3Yi@PJLoxAObPHNheM7Zx!Oe&e(d<5+QCSV8*hQ@>| z{}Z=B+pnzly6=0f!!yX_!X9=VK_-*CM*&Uc?>2-pD`rlbvjbw73dJ0nNLP&jQ$ZpH zthn!ShG*DOg{w*Fnn=!0){PRS-s}EEeNBudrXG1M!0FDP)gB4XBbe1@QkXFSi47D# zK#?pz3Vk{KtY1UN0M9cbUj^*ZDUY1fFT`zV?~f}Rz|D$zjqr?PPO^^&&^vF2*fFbY zH+9Zmk$sG0yX2gYog~|3xolVDvfZCkwp&u!Zux6vyRH@G0heIo;t3RZ4-ikGcp5~1 zIs#06IZ;8NOR7~et^zMoaBHQh(5k7I7B50yN=Kcug10Fg!W=7_iEyMw&jTeA$9Vd+ZwoVSJR!Cuvoa_%_(ot4d<5_U{ z-*9Q%1Ad_R5xlo*7o`O`P;ep)s75nl4`HFOQJ{}?E8WoZgUiic%kR(?!JUgVU|+l_ zV|t19au+h{{~;vg{HjMB0jSi+D)&XrELK${Xy8CWUK= 100: # 每分钟100次请求限制 - return jsonify({'error': 'Too many requests'}), 429 - pipe = redis_client.pipeline() - pipe.incr(key) - pipe.expire(key, 60) # 60秒后重置 - pipe.execute() - return f(*args, **kwargs) - return decorated_function - -# 设置设备 -device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - -# 设置模型路径 -model_save_path = 'model_pro/final_model.pt' -bert_model_path = 'model_pro/bert_model' -ctm_tokenizer_path = 'model_pro/sentence_bert_model' - -# 初始化模型 -try: - model_manager.load_models(model_save_path, bert_model_path, ctm_tokenizer_path) -except Exception as e: - logging.error(f"模型加载失败: {e}") - -# 数据库配置 -DATABASE_URL = os.getenv('DATABASE_URL', "sqlite:///ai_analysis.db") -engine = create_engine(DATABASE_URL) -AIAnalysis.metadata.create_all(engine) - -def predict_sentiment(text): - """使用改进版模型预测单个文本的情感""" - try: - if not text or len(text.strip()) == 0: - return None, None - - # 清理输入 - cleaned_text = sanitize_input(text) - if not cleaned_text: - return None, None - - predictions, probabilities = model_manager.predict_batch([cleaned_text]) - if predictions is not None and len(predictions) > 0: - return predictions[0], probabilities[0][predictions[0]] - return None, None - except Exception as e: - logging.error(f"预测过程中出现错误: {e}") - return None, None - -@pb.route('/home') -@login_required -def home(): - try: - username = session.get('username') - articleLenMax, likeCountMaxAuthorName, cityMax = getHomeTagsData() - commentsLikeCountTopFore = getHomeCommentsLikeCountTopFore() - X, Y = getHomeArticleCreatedAtChart() - typeChart = getHomeTypeChart() - createAtChart = getHomeCommentCreatedChart() - - return render_template('index.html', - username=username, - articleLenMax=articleLenMax, - likeCountMaxAuthorName=likeCountMaxAuthorName, - cityMax=cityMax, - commentsLikeCountTopFore=commentsLikeCountTopFore, - xData=X, - yData=Y, - typeChart=typeChart, - createAtChart=createAtChart) - except Exception as e: - logging.error(f"加载首页时发生错误: {e}") - return render_template('error.html', error_message="加载首页失败") - -@pb.route('/hotWord') -@login_required -def hotWord(): - try: - username = session.get('username') - hotWordList = getAllHotWords() - if not hotWordList: - return render_template('error.html', error_message="无法获取热词列表") - - defaultHotWord = sanitize_input(request.args.get('hotWord', hotWordList[0][0])) - - # 验证热词是否在列表中 - if not any(defaultHotWord in word for word in hotWordList): - return abort(400, "无效的热词") - - hotWordLen = getHotWordLen(defaultHotWord) - X, Y = getHotWordPageCreatedAtCharData(defaultHotWord) - - value = SnowNLP(defaultHotWord).sentiments - if value == 0.5: - sentences = '中性' - elif value > 0.5: - sentences = '正面' - elif value < 0.5: - sentences = '负面' - - comments = getCommentFilterData(defaultHotWord) - - return render_template('hotWord.html', - username=username, - hotWordList=hotWordList, - defaultHotWord=defaultHotWord, - hotWordLen=hotWordLen, - sentences=sentences, - xData=X, - yData=Y, - comments=comments) - except Exception as e: - logging.error(f"加载热词页面时发生错误: {e}") - return render_template('error.html', error_message="加载热词页面失败") - -@pb.route('/hotTopic') -def hotTopic(): - username = session.get('username') - topicList = getAllTopics() - defaultTopic = topicList[0][0] - if request.args.get('topic'): - defaultTopic = request.args.get('topic') - topicLen = getTopicLen(defaultTopic) - X, Y = getTopicPageCreatedAtCharData() - sentences = '' - - # ... 这里要嵌入 topic 相关内容(热度?)来填充 sentences - - comments = getCommentFilterDataTopic(defaultTopic) - return render_template('hotWord.html', - username=username, - topicList=topicList, - defaultTopic=defaultTopic, - topicLen=topicLen, - sentences=sentences, - xData=X, - yData=Y, - comments=comments) - -@pb.route('/tableData') -@login_required -def tableData(): - try: - username = session.get('username') - defaultFlag = bool(request.args.get('flag', False)) - tableData = getTableDataList(defaultFlag) - - return render_template('tableData.html', - username=username, - tableData=tableData, - defaultFlag=defaultFlag) - except Exception as e: - logging.error(f"加载表格数据时发生错误: {e}") - return render_template('error.html', error_message="加载表格数据失败") - -@pb.route('/articleChar') -def articleChar(): - username = session.get('username') - typeList = getTypeList() - defaultType = typeList[0] - if request.args.get('type'): defaultType = request.args.get('type') - X, Y = getArticleLikeCount(defaultType) - x1Data, y1Data = getArticleCommentsLen(defaultType) - x2Data, y2Data = getArticleRepotsLen(defaultType) - return render_template('articleChar.html', - username=username, - typeList=typeList, - defaultType=defaultType, - xData=X, - yData=Y, - x1Data=x1Data, - y1Data=y1Data, - x2Data=x2Data, - y2Data=y2Data) - -@pb.route('/ipChar') -@login_required -def ipChar(): - try: - username = session.get('username') - articleRegionData = getIPByArticleRegion() - commentRegionData = getIPByCommentsRegion() - - return render_template('ipChar.html', - username=username, - articleRegionData=articleRegionData, - commentRegionData=commentRegionData) - except Exception as e: - logging.error(f"加载IP统计时发生错误: {e}") - return render_template('error.html', error_message="加载IP统计失败") - -@pb.route('/commentChar') -@login_required -def commentChar(): - try: - username = session.get('username') - X, Y = getCommentDataOne() - genderPieData = getCommentDataTwo() - - return render_template('commentChar.html', - username=username, - xData=X, - yData=Y, - genderPieData=genderPieData) - except Exception as e: - logging.error(f"加载评论统计时发生错误: {e}") - return render_template('error.html', error_message="加载评论统计失败") - -@pb.route('/yuqingChar') -@login_required -def yuqingChar(): - try: - username = session.get('username') - model_type = sanitize_input(request.args.get('model', 'pro')) - - # 验证模型类型 - if model_type not in ['pro', 'basic']: - return abort(400, "无效的模型类型") - - X, Y, biedata = getYuQingCharDataOne() - biedata1, biedata2 = getYuQingCharDataTwo(model_type) - x1Data, y1Data = getYuQingCharDataThree() - - return render_template('yuqingChar.html', - username=username, - xData=X, - yData=Y, - biedata=biedata, - biedata1=biedata1, - biedata2=biedata2, - x1Data=x1Data, - y1Data=y1Data, - model_type=model_type) - except Exception as e: - logging.error(f"加载舆情统计时发生错误: {e}") - return render_template('error.html', error_message="加载舆情统计失败") - -@pb.route('/yuqingpredict') -@login_required -def yuqingpredict(): - try: - username = session.get('username') - TopicList = getAllTopicData() - if not TopicList: - return render_template('error.html', error_message="无法获取话题列表") - - defaultTopic = sanitize_input(request.args.get('Topic', TopicList[0][0])) - - # 验证话题是否在列表中 - if not any(defaultTopic in topic for topic in TopicList): - return abort(400, "无效的话题") - - TopicLen = getTopicLen(defaultTopic) - X, Y = getTopicCreatedAtandpredictData(defaultTopic) - - model_type = sanitize_input(request.args.get('model', 'pro')) - if model_type not in ['pro', 'basic']: - return abort(400, "无效的模型类型") - - # 尝试从缓存获取预测结果 - cache_key = f"{defaultTopic}_{model_type}" - cached_result = prediction_cache.get(cache_key) - - if cached_result is not None: - sentences = cached_result - else: - if model_type == 'basic': - value = SnowNLP(defaultTopic).sentiments - if value == 0.5: - sentences = '中性' - elif value > 0.5: - sentences = '正面' - elif value < 0.5: - sentences = '负面' - else: - predicted_label, confidence = predict_sentiment(defaultTopic) - if predicted_label is not None: - sentences = '良好' if predicted_label == 0 else '不良' - sentences = f"{sentences} (置信度: {confidence:.2%})" - else: - sentences = '预测失败,请稍后重试' - logging.error(f"预测失败,话题: {defaultTopic}") - - # 将结果存入缓存 - prediction_cache.set(cache_key, sentences) - - comments = getCommentFilterDataTopic(defaultTopic) - - return render_template('yuqingpredict.html', - username=username, - TopicList=TopicList, - defaultTopic=defaultTopic, - TopicLen=TopicLen, - sentences=sentences, - xData=X, - yData=Y, - comments=comments, - model_type=model_type) - except Exception as e: - logging.error(f"加载舆情预测时发生错误: {e}") - return render_template('error.html', error_message="加载舆情预测失败") - -@pb.route('/articleCloud') -@login_required -def articleCloud(): - try: - username = session.get('username') - return render_template('articleContentCloud.html', username=username) - except Exception as e: - logging.error(f"加载文章云图时发生错误: {e}") - return render_template('error.html', error_message="加载文章云图失败") - -@pb.route('/page/index') -def index(): - """首页路由""" - try: - hotWordList = getAllHotWords() - logging.info("成功获取热词列表") - return render_template('index.html', hotWordList=hotWordList) - except Exception as e: - logging.error(f"渲染首页时发生错误: {e}") - return render_template('error.html', error_message="加载首页失败") - -@pb.route('/page/article/') -def article(type): - """文章列表页路由""" - try: - articleList = getArticleByType(type) - logging.info(f"成功获取类型为 {type} 的文章列表") - return render_template('article.html', articleList=articleList) - except Exception as e: - logging.error(f"获取文章列表时发生错误: {e}") - return render_template('error.html', error_message="加载文章列表失败") - -@pb.route('/page/articleChar/') -def articleChar(id): - """文章详情页路由""" - try: - article = getArticleById(id) - if not article: - logging.warning(f"未找到ID为 {id} 的文章") - return render_template('error.html', error_message="文章不存在") - logging.info(f"成功获取ID为 {id} 的文章详情") - return render_template('articleChar.html', article=article) - except Exception as e: - logging.error(f"获取文章详情时发生错误: {e}") - return render_template('error.html', error_message="加载文章详情失败") - -@pb.route('/api/analyze_messages', methods=['POST']) -@api_login_required -@rate_limit -async def analyze_messages(): - try: - if not validate_csrf_token(): - return jsonify({'error': 'Invalid CSRF token'}), 403 - - data = request.get_json() - if not data: - return jsonify({'error': 'No data provided'}), 400 - - batch_size = min(int(data.get('batch_size', 50)), 100) # 限制批量大小 - model_type = sanitize_input(data.get('model_type', 'gpt-3.5-turbo')) - analysis_depth = sanitize_input(data.get('analysis_depth', 'standard')) - - # 验证参数 - if model_type not in ['gpt-3.5-turbo', 'gpt-4']: - return jsonify({'error': 'Invalid model type'}), 400 - - if analysis_depth not in ['basic', 'standard', 'deep']: - return jsonify({'error': 'Invalid analysis depth'}), 400 - - messages = getRecentMessages(batch_size) - if not messages: - return jsonify({ - 'success': False, - 'error': '没有找到需要分析的消息' - }), 404 - - analysis_results = await ai_analyzer.analyze_messages( - messages=messages, - batch_size=batch_size, - model_type=model_type, - analysis_depth=analysis_depth - ) - - if not analysis_results: - return jsonify({ - 'success': False, - 'error': '分析过程中出现错误' - }), 500 - - try: - with Session(engine) as session: - for result in analysis_results: - analysis = AIAnalysis( - message_id=result['message_id'], - sentiment=result['sentiment'], - sentiment_score=float(result['sentiment_score']), - keywords=result['keywords'], - key_points=result['key_points'], - influence_analysis=result['influence_analysis'], - risk_level=result['risk_level'] - ) - session.add(analysis) - session.commit() - except Exception as e: - logging.error(f"保存分析结果时出错: {e}") - return jsonify({ - 'success': False, - 'error': '保存分析结果失败' - }), 500 - - display_results = [ - ai_analyzer.format_analysis_for_display(result) - for result in analysis_results - ] - - return jsonify({ - 'success': True, - 'data': display_results - }) - - except Exception as e: - logging.error(f"分析消息时发生错误: {e}") - return jsonify({ - 'success': False, - 'error': str(e) - }), 500 - -@pb.route('/api/get_analysis/') -@api_login_required -@rate_limit -def get_message_analysis(message_id): - """获取特定消息的分析结果""" - try: - if not message_id or message_id < 1: - return jsonify({'error': 'Invalid message ID'}), 400 - - with Session(engine) as session: - analysis = session.query(AIAnalysis)\ - .filter(AIAnalysis.message_id == message_id)\ - .order_by(AIAnalysis.created_at.desc())\ - .first() - - if analysis: - return jsonify({ - 'success': True, - 'data': analysis.to_dict() - }) - else: - return jsonify({ - 'success': False, - 'error': '未找到分析结果' - }), 404 - - except Exception as e: - logging.error(f"获取分析结果时出错: {e}") - return jsonify({ - 'success': False, - 'error': str(e) - }), 500 - -def getRecentMessages(limit=50): - """获取最近的消息""" - # 这里需要根据你的数据库结构实现具体的查询逻辑 - messages = [] - try: - # 示例查询逻辑 - with Session(engine) as session: - results = session.execute( - """ - SELECT id, content - FROM comments - ORDER BY created_at DESC - LIMIT :limit - """, - {'limit': limit} - ).fetchall() - - messages = [ - {'id': row[0], 'content': row[1]} - for row in results - ] - except Exception as e: - logging.error(f"获取最近消息时出错: {e}") - - return messages +from flask import Flask, session, render_template, redirect, Blueprint, request, jsonify, abort +from utils.mynlp import SnowNLP +from utils.getHomePageData import * +from utils.getHotWordPageData import * +from utils.getTableData import * +from utils.getPublicData import getAllHotWords, getAllTopics, getArticleByType, getArticleById +from utils.getEchartsData import * +from utils.getTopicPageData import * +from utils.yuqingpredict import * +from utils.logger import app_logger as logging +from utils.cache_manager import prediction_cache +from utils.ai_analyzer import ai_analyzer +from utils.ai_analysis import AIAnalysis +from sqlalchemy.orm import Session +from sqlalchemy import create_engine +import asyncio +import torch +from BCAT_front.predict import model_manager +from functools import wraps +import bleach +import re +from datetime import datetime, timedelta + +pb = Blueprint('page', + __name__, + url_prefix='/page', + template_folder='templates') + +def sanitize_input(text): + """清理用户输入,防止XSS攻击""" + if text is None: + return None + return bleach.clean(str(text), strip=True) + +def validate_csrf_token(): + """验证CSRF令牌""" + token = request.form.get('csrf_token') + stored_token = session.get('csrf_token') + if not token or not stored_token or token != stored_token: + return False + return True + +def login_required(f): + @wraps(f) + def decorated_function(*args, **kwargs): + if 'username' not in session: + return redirect('/user/login') + return f(*args, **kwargs) + return decorated_function + +def api_login_required(f): + @wraps(f) + def decorated_function(*args, **kwargs): + if 'username' not in session: + return jsonify({'error': 'Unauthorized'}), 401 + return f(*args, **kwargs) + return decorated_function + +def rate_limit(f): + @wraps(f) + def decorated_function(*args, **kwargs): + key = f"rate_limit:{request.remote_addr}:{f.__name__}" + current = int(redis_client.get(key) or 0) + if current >= 100: # 每分钟100次请求限制 + return jsonify({'error': 'Too many requests'}), 429 + pipe = redis_client.pipeline() + pipe.incr(key) + pipe.expire(key, 60) # 60秒后重置 + pipe.execute() + return f(*args, **kwargs) + return decorated_function + +# 设置设备 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# 设置模型路径 +model_save_path = 'model_pro/final_model.pt' +bert_model_path = 'model_pro/bert_model' +ctm_tokenizer_path = 'model_pro/sentence_bert_model' + +# 初始化模型 +try: + model_manager.load_models(model_save_path, bert_model_path, ctm_tokenizer_path) +except Exception as e: + logging.error(f"模型加载失败: {e}") + +# 数据库配置 +DATABASE_URL = os.getenv('DATABASE_URL', "sqlite:///ai_analysis.db") +engine = create_engine(DATABASE_URL) +AIAnalysis.metadata.create_all(engine) + +def predict_sentiment(text): + """使用改进版模型预测单个文本的情感""" + try: + if not text or len(text.strip()) == 0: + return None, None + + # 清理输入 + cleaned_text = sanitize_input(text) + if not cleaned_text: + return None, None + + predictions, probabilities = model_manager.predict_batch([cleaned_text]) + if predictions is not None and len(predictions) > 0: + return predictions[0], probabilities[0][predictions[0]] + return None, None + except Exception as e: + logging.error(f"预测过程中出现错误: {e}") + return None, None + +@pb.route('/home') +@login_required +def home(): + try: + username = session.get('username') + articleLenMax, likeCountMaxAuthorName, cityMax = getHomeTagsData() + commentsLikeCountTopFore = getHomeCommentsLikeCountTopFore() + X, Y = getHomeArticleCreatedAtChart() + typeChart = getHomeTypeChart() + createAtChart = getHomeCommentCreatedChart() + + return render_template('index.html', + username=username, + articleLenMax=articleLenMax, + likeCountMaxAuthorName=likeCountMaxAuthorName, + cityMax=cityMax, + commentsLikeCountTopFore=commentsLikeCountTopFore, + xData=X, + yData=Y, + typeChart=typeChart, + createAtChart=createAtChart) + except Exception as e: + logging.error(f"加载首页时发生错误: {e}") + return render_template('error.html', error_message="加载首页失败") + +@pb.route('/hotWord') +@login_required +def hotWord(): + try: + username = session.get('username') + hotWordList = getAllHotWords() + if not hotWordList: + return render_template('error.html', error_message="无法获取热词列表") + + defaultHotWord = sanitize_input(request.args.get('hotWord', hotWordList[0][0])) + + # 验证热词是否在列表中 + if not any(defaultHotWord in word for word in hotWordList): + return abort(400, "无效的热词") + + hotWordLen = getHotWordLen(defaultHotWord) + X, Y = getHotWordPageCreatedAtCharData(defaultHotWord) + + value = SnowNLP(defaultHotWord).sentiments + if value == 0.5: + sentences = '中性' + elif value > 0.5: + sentences = '正面' + elif value < 0.5: + sentences = '负面' + + comments = getCommentFilterData(defaultHotWord) + + return render_template('hotWord.html', + username=username, + hotWordList=hotWordList, + defaultHotWord=defaultHotWord, + hotWordLen=hotWordLen, + sentences=sentences, + xData=X, + yData=Y, + comments=comments) + except Exception as e: + logging.error(f"加载热词页面时发生错误: {e}") + return render_template('error.html', error_message="加载热词页面失败") + +@pb.route('/hotTopic') +def hotTopic(): + username = session.get('username') + topicList = getAllTopics() + defaultTopic = topicList[0][0] + if request.args.get('topic'): + defaultTopic = request.args.get('topic') + topicLen = getTopicLen(defaultTopic) + X, Y = getTopicPageCreatedAtCharData() + sentences = '' + + # ... 这里要嵌入 topic 相关内容(热度?)来填充 sentences + + comments = getCommentFilterDataTopic(defaultTopic) + return render_template('hotWord.html', + username=username, + topicList=topicList, + defaultTopic=defaultTopic, + topicLen=topicLen, + sentences=sentences, + xData=X, + yData=Y, + comments=comments) + +@pb.route('/tableData') +@login_required +def tableData(): + try: + username = session.get('username') + defaultFlag = bool(request.args.get('flag', False)) + tableData = getTableDataList(defaultFlag) + + return render_template('tableData.html', + username=username, + tableData=tableData, + defaultFlag=defaultFlag) + except Exception as e: + logging.error(f"加载表格数据时发生错误: {e}") + return render_template('error.html', error_message="加载表格数据失败") + +@pb.route('/articleChar') +def articleChar(): + username = session.get('username') + typeList = getTypeList() + defaultType = typeList[0] + if request.args.get('type'): defaultType = request.args.get('type') + X, Y = getArticleLikeCount(defaultType) + x1Data, y1Data = getArticleCommentsLen(defaultType) + x2Data, y2Data = getArticleRepotsLen(defaultType) + return render_template('articleChar.html', + username=username, + typeList=typeList, + defaultType=defaultType, + xData=X, + yData=Y, + x1Data=x1Data, + y1Data=y1Data, + x2Data=x2Data, + y2Data=y2Data) + +@pb.route('/ipChar') +@login_required +def ipChar(): + try: + username = session.get('username') + articleRegionData = getIPByArticleRegion() + commentRegionData = getIPByCommentsRegion() + + return render_template('ipChar.html', + username=username, + articleRegionData=articleRegionData, + commentRegionData=commentRegionData) + except Exception as e: + logging.error(f"加载IP统计时发生错误: {e}") + return render_template('error.html', error_message="加载IP统计失败") + +@pb.route('/commentChar') +@login_required +def commentChar(): + try: + username = session.get('username') + X, Y = getCommentDataOne() + genderPieData = getCommentDataTwo() + + return render_template('commentChar.html', + username=username, + xData=X, + yData=Y, + genderPieData=genderPieData) + except Exception as e: + logging.error(f"加载评论统计时发生错误: {e}") + return render_template('error.html', error_message="加载评论统计失败") + +@pb.route('/yuqingChar') +@login_required +def yuqingChar(): + try: + username = session.get('username') + model_type = sanitize_input(request.args.get('model', 'pro')) + + # 验证模型类型 + if model_type not in ['pro', 'basic']: + return abort(400, "无效的模型类型") + + X, Y, biedata = getYuQingCharDataOne() + biedata1, biedata2 = getYuQingCharDataTwo(model_type) + x1Data, y1Data = getYuQingCharDataThree() + + return render_template('yuqingChar.html', + username=username, + xData=X, + yData=Y, + biedata=biedata, + biedata1=biedata1, + biedata2=biedata2, + x1Data=x1Data, + y1Data=y1Data, + model_type=model_type) + except Exception as e: + logging.error(f"加载舆情统计时发生错误: {e}") + return render_template('error.html', error_message="加载舆情统计失败") + +@pb.route('/yuqingpredict') +@login_required +def yuqingpredict(): + try: + username = session.get('username') + TopicList = getAllTopicData() + if not TopicList: + return render_template('error.html', error_message="无法获取话题列表") + + defaultTopic = sanitize_input(request.args.get('Topic', TopicList[0][0])) + + # 验证话题是否在列表中 + if not any(defaultTopic in topic for topic in TopicList): + return abort(400, "无效的话题") + + TopicLen = getTopicLen(defaultTopic) + X, Y = getTopicCreatedAtandpredictData(defaultTopic) + + model_type = sanitize_input(request.args.get('model', 'pro')) + if model_type not in ['pro', 'basic']: + return abort(400, "无效的模型类型") + + # 尝试从缓存获取预测结果 + cache_key = f"{defaultTopic}_{model_type}" + cached_result = prediction_cache.get(cache_key) + + if cached_result is not None: + sentences = cached_result + else: + if model_type == 'basic': + value = SnowNLP(defaultTopic).sentiments + if value == 0.5: + sentences = '中性' + elif value > 0.5: + sentences = '正面' + elif value < 0.5: + sentences = '负面' + else: + predicted_label, confidence = predict_sentiment(defaultTopic) + if predicted_label is not None: + sentences = '良好' if predicted_label == 0 else '不良' + sentences = f"{sentences} (置信度: {confidence:.2%})" + else: + sentences = '预测失败,请稍后重试' + logging.error(f"预测失败,话题: {defaultTopic}") + + # 将结果存入缓存 + prediction_cache.set(cache_key, sentences) + + comments = getCommentFilterDataTopic(defaultTopic) + + return render_template('yuqingpredict.html', + username=username, + TopicList=TopicList, + defaultTopic=defaultTopic, + TopicLen=TopicLen, + sentences=sentences, + xData=X, + yData=Y, + comments=comments, + model_type=model_type) + except Exception as e: + logging.error(f"加载舆情预测时发生错误: {e}") + return render_template('error.html', error_message="加载舆情预测失败") + +@pb.route('/articleCloud') +@login_required +def articleCloud(): + try: + username = session.get('username') + return render_template('articleContentCloud.html', username=username) + except Exception as e: + logging.error(f"加载文章云图时发生错误: {e}") + return render_template('error.html', error_message="加载文章云图失败") + +@pb.route('/page/index') +def index(): + """首页路由""" + try: + hotWordList = getAllHotWords() + logging.info("成功获取热词列表") + return render_template('index.html', hotWordList=hotWordList) + except Exception as e: + logging.error(f"渲染首页时发生错误: {e}") + return render_template('error.html', error_message="加载首页失败") + +@pb.route('/page/article/') +def article(type): + """文章列表页路由""" + try: + articleList = getArticleByType(type) + logging.info(f"成功获取类型为 {type} 的文章列表") + return render_template('article.html', articleList=articleList) + except Exception as e: + logging.error(f"获取文章列表时发生错误: {e}") + return render_template('error.html', error_message="加载文章列表失败") + +@pb.route('/page/articleChar/') +def articleChar(id): + """文章详情页路由""" + try: + article = getArticleById(id) + if not article: + logging.warning(f"未找到ID为 {id} 的文章") + return render_template('error.html', error_message="文章不存在") + logging.info(f"成功获取ID为 {id} 的文章详情") + return render_template('articleChar.html', article=article) + except Exception as e: + logging.error(f"获取文章详情时发生错误: {e}") + return render_template('error.html', error_message="加载文章详情失败") + +@pb.route('/api/analyze_messages', methods=['POST']) +@api_login_required +@rate_limit +async def analyze_messages(): + try: + if not validate_csrf_token(): + return jsonify({'error': 'Invalid CSRF token'}), 403 + + data = request.get_json() + if not data: + return jsonify({'error': 'No data provided'}), 400 + + batch_size = min(int(data.get('batch_size', 50)), 100) # 限制批量大小 + model_type = sanitize_input(data.get('model_type', 'gpt-3.5-turbo')) + analysis_depth = sanitize_input(data.get('analysis_depth', 'standard')) + + # 验证参数 + if model_type not in ['gpt-3.5-turbo', 'gpt-4']: + return jsonify({'error': 'Invalid model type'}), 400 + + if analysis_depth not in ['basic', 'standard', 'deep']: + return jsonify({'error': 'Invalid analysis depth'}), 400 + + messages = getRecentMessages(batch_size) + if not messages: + return jsonify({ + 'success': False, + 'error': '没有找到需要分析的消息' + }), 404 + + analysis_results = await ai_analyzer.analyze_messages( + messages=messages, + batch_size=batch_size, + model_type=model_type, + analysis_depth=analysis_depth + ) + + if not analysis_results: + return jsonify({ + 'success': False, + 'error': '分析过程中出现错误' + }), 500 + + try: + with Session(engine) as session: + for result in analysis_results: + analysis = AIAnalysis( + message_id=result['message_id'], + sentiment=result['sentiment'], + sentiment_score=float(result['sentiment_score']), + keywords=result['keywords'], + key_points=result['key_points'], + influence_analysis=result['influence_analysis'], + risk_level=result['risk_level'] + ) + session.add(analysis) + session.commit() + except Exception as e: + logging.error(f"保存分析结果时出错: {e}") + return jsonify({ + 'success': False, + 'error': '保存分析结果失败' + }), 500 + + display_results = [ + ai_analyzer.format_analysis_for_display(result) + for result in analysis_results + ] + + return jsonify({ + 'success': True, + 'data': display_results + }) + + except Exception as e: + logging.error(f"分析消息时发生错误: {e}") + return jsonify({ + 'success': False, + 'error': str(e) + }), 500 + +@pb.route('/api/get_analysis/') +@api_login_required +@rate_limit +def get_message_analysis(message_id): + """获取特定消息的分析结果""" + try: + if not message_id or message_id < 1: + return jsonify({'error': 'Invalid message ID'}), 400 + + with Session(engine) as session: + analysis = session.query(AIAnalysis)\ + .filter(AIAnalysis.message_id == message_id)\ + .order_by(AIAnalysis.created_at.desc())\ + .first() + + if analysis: + return jsonify({ + 'success': True, + 'data': analysis.to_dict() + }) + else: + return jsonify({ + 'success': False, + 'error': '未找到分析结果' + }), 404 + + except Exception as e: + logging.error(f"获取分析结果时出错: {e}") + return jsonify({ + 'success': False, + 'error': str(e) + }), 500 + +def getRecentMessages(limit=50): + """获取最近的消息""" + # 这里需要根据你的数据库结构实现具体的查询逻辑 + messages = [] + try: + # 示例查询逻辑 + with Session(engine) as session: + results = session.execute( + """ + SELECT id, content + FROM comments + ORDER BY created_at DESC + LIMIT :limit + """, + {'limit': limit} + ).fetchall() + + messages = [ + {'id': row[0], 'content': row[1]} + for row in results + ] + except Exception as e: + logging.error(f"获取最近消息时出错: {e}") + + return messages diff --git a/views/page/templates/articleChar.html b/views/page/templates/articleChar.html index bbc0243..e10542c 100644 --- a/views/page/templates/articleChar.html +++ b/views/page/templates/articleChar.html @@ -1,345 +1,345 @@ -{% extends 'base_page.html' %} -{% block title %} - 文章分析 -{% endblock %} - -{% block nav %} - -{% endblock %} - -{% block content %} -
-
-
-
-

文章分析页

-
-
-
-
-
-
-
-
- - - -
-
-
-
-
-
-
-
-
-
-

文章点赞量分析 👍

-
-
-
-
-
-
-
-
-
-
-
-

文章评论量分析 🔥

-
-
-
-
-
-
-
-
-
-
-
-

文章转发量分析 🥇

-
-
-
-
-
-
-
-
-
-{% endblock %} - -{% block echarts %} - - - +{% extends 'base_page.html' %} +{% block title %} + 文章分析 +{% endblock %} + +{% block nav %} + +{% endblock %} + +{% block content %} +
+
+
+
+

文章分析页

+
+
+
+
+
+
+
+
+ + + +
+
+
+
+
+
+
+
+
+
+

文章点赞量分析 👍

+
+
+
+
+
+
+
+
+
+
+
+

文章评论量分析 🔥

+
+
+
+
+
+
+
+
+
+
+
+

文章转发量分析 🥇

+
+
+
+
+
+
+
+
+
+{% endblock %} + +{% block echarts %} + + + {% endblock %} \ No newline at end of file diff --git a/views/page/templates/articleContentCloud.html b/views/page/templates/articleContentCloud.html index c252ed3..4644c31 100644 --- a/views/page/templates/articleContentCloud.html +++ b/views/page/templates/articleContentCloud.html @@ -1,144 +1,144 @@ -{% extends 'base_page.html' %} -{% block title %} - 文章内容词云图 -{% endblock %} - -{% block nav %} - -{% endblock %} - -{% block content %} -
-
-
-
-

文章分析页

-
-
-
-
-
-
-
-
-

文章内容词云图

-
-
-
-
- -
-
-
-
-
-
- -{% endblock %} - -{% block echarts %} - +{% extends 'base_page.html' %} +{% block title %} + 文章内容词云图 +{% endblock %} + +{% block nav %} + +{% endblock %} + +{% block content %} +
+
+
+
+

文章分析页

+
+
+
+
+
+
+
+
+

文章内容词云图

+
+
+
+
+ +
+
+
+
+
+
+ +{% endblock %} + +{% block echarts %} + {% endblock %} \ No newline at end of file diff --git a/views/page/templates/base_page.html b/views/page/templates/base_page.html index 44e0b85..9f0873e 100644 --- a/views/page/templates/base_page.html +++ b/views/page/templates/base_page.html @@ -1,478 +1,505 @@ - - - - - - - {% block title %}首页{% endblock %} - - - - - - - -
-
-
-
- - -
- -
- {% block content %} -
-
-
-
-

首页

-
-
-
-
-
-
-
-
-
-

文章个数

-
-
{{ articleLenMax }}个
-
-
-
-
-
-
-
-
-
-
-
-

文章爬取规则

-
-
每 5 小时更新一次爬取内容
-
-
-
-
-
-
-
-
-
-
-
-

下次爬取时间

-
-
7-5-18:00
-
-
-
-
-
-
-
-
-
-
-

文章发布时间个数

-
-
-
-
-
-
-
-
-
-
-
-

评论点赞量 Top Fore

-
-
-
-
    - {% for i in commentsLikeCountTopFore %} -
  • -
    -

    - 🧑‍ {{ i[5] }} -

    -

    - {{ i[4] }} -

    -
    -
    -
    👍 {{ i[2] }}
    -
    -
  • - {% endfor %} - -
-
-
- -
-
-
-
-
-

文章类型占比

-
-
-
-
- -
-
-
-
-
-
-
-
-

评论用户名词云图

-
-
-
-
- -
-
-
-
-
-
-
-
-

评论用户时间占比

-
-
-
-
- -
-
-
-
-
-
- {% endblock %} -
-
- -
-
-
-
-
-
- 网安小学期 © 2024.郭航江 -
-
-
-
- - - {% block echarts %} - - - - {% endblock %} - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/views/page/templates/commentChar.html b/views/page/templates/commentChar.html index 814630d..0ec50c1 100644 --- a/views/page/templates/commentChar.html +++ b/views/page/templates/commentChar.html @@ -1,317 +1,317 @@ -{% extends 'base_page.html' %} - -{% block title %} - 评论分析 -{% endblock %} -{% block nav %} - -{% endblock %} -{% block content %} -
-
-
-
-

评论分析

-
-
-
-
-
-
-
-
-

评论点赞次数区间图

-
-
-
-
-
-
-
-
-
-
-
-

评论用户性别占比

-
-
-
-
-
-
-
-
-
-
-
-

用户评论词云图

-
-
-
-
- -
-
-
-
-
-
- -{% endblock %} - -{% block echarts %} - - +{% extends 'base_page.html' %} + +{% block title %} + 评论分析 +{% endblock %} +{% block nav %} + +{% endblock %} +{% block content %} +
+
+
+
+

评论分析

+
+
+
+
+
+
+
+
+

评论点赞次数区间图

+
+
+
+
+
+
+
+
+
+
+
+

评论用户性别占比

+
+
+
+
+
+
+
+
+
+
+
+

用户评论词云图

+
+
+
+
+ +
+
+
+
+
+
+ +{% endblock %} + +{% block echarts %} + + {% endblock %} \ No newline at end of file diff --git a/views/page/templates/hotWord.html b/views/page/templates/hotWord.html index 4600cf1..2c3b269 100644 --- a/views/page/templates/hotWord.html +++ b/views/page/templates/hotWord.html @@ -1,503 +1,503 @@ -{% extends 'base_page.html' %} -{% block title %} - 热词统计 -{% endblock %} -{% block nav %} - -{% endblock %} -{% block content %} -
-
-
-
-

热词统计页

-
-
-
-
-
-
-
-
-

-<div class="form-group">
-   <label>Small</label>
-   <select class="form-control form-control-sm mb-3">
-      <option selected="">Open this select menu</option>
-      <option value="1">One</option>
-      <option value="2">Two</option>
-      <option value="3">Three</option>
-   </select>
-</div>
-<div class="form-group">
-   <label>Default</label>
-   <select class="form-control mb-3">
-      <option selected="">Open this select menu</option>
-      <option value="1">One</option>
-      <option value="2">Two</option>
-      <option value="3">Three</option>
-   </select>
-</div>
-<div class="form-group">
-   <label>Large</label>
-   <select class="form-control form-control-lg">
-      <option selected="">Open this select menu</option>
-      <option value="1">One</option>
-      <option value="2">Two</option>
-      <option value="3">Three</option>
-   </select>
-</div>
-
-
-
- - - -
-
-
- -
-
-
-
-
-
-

{{ defaultHotWord }}

-
-
    -
  • 热词名称:{{ defaultHotWord }}
  • -
  • 出现次数:{{ hotWordLen }}次
  • -
  • 热词情感:{{ sentences }}
  • -
-
- -
-
-
-
-
-
-

热词年份变化趋势

-
-
-
-
-
-
-
-
-
-
-

热词查询表格

-
-
- -
-
-
-
-
-

-<table id="datatable" class="table data-table table-striped table-bordered" >
-   <thead>
-      <tr>
-         <th>Name</th>
-         <th>Position</th>
-         <th>Office</th>
-         <th>Age</th>
-         <th>Start date</th>
-         <th>Salary</th>
-      </tr>
-   </thead>
-   <tbody>
-      <tr>
-         <td>Tiger Nixon</td>
-         <td>System Architect</td>
-         <td>Edinburgh</td>
-         <td>61</td>
-         <td>2011/04/25</td>
-         <td>$320,800</td>
-      </tr>
-      <tr>
-         <td>Garrett Winters</td>
-         <td>Accountant</td>
-         <td>Tokyo</td>
-         <td>63</td>
-         <td>2011/07/25</td>
-         <td>$170,750</td>
-      </tr>
-      <tr>
-         <td>Ashton Cox</td>
-         <td>Junior Technical Author</td>
-         <td>San Francisco</td>
-         <td>66</td>
-         <td>2009/01/12</td>
-         <td>$86,000</td>
-      </tr>
-      <tr>
-         <td>Cedric Kelly</td>
-         <td>Senior Javascript Developer</td>
-         <td>Edinburgh</td>
-         <td>22</td>
-         <td>2012/03/29</td>
-         <td>$433,060</td>
-      </tr>
-      <tr>
-         <td>Airi Satou</td>
-         <td>Accountant</td>
-         <td>Tokyo</td>
-         <td>33</td>
-         <td>2008/11/28</td>
-         <td>$162,700</td>
-      </tr>
-      <tr>
-         <td>Brielle Williamson</td>
-         <td>Integration Specialist</td>
-         <td>New York</td>
-         <td>61</td>
-         <td>2012/12/02</td>
-         <td>$372,000</td>
-      </tr>
-      <tr>
-         <td>Herrod Chandler</td>
-         <td>Sales Assistant</td>
-         <td>San Francisco</td>
-         <td>59</td>
-         <td>2012/08/06</td>
-         <td>$137,500</td>
-      </tr>
-      <tr>
-         <td>Rhona Davidson</td>
-         <td>Integration Specialist</td>
-         <td>Tokyo</td>
-         <td>55</td>
-         <td>2010/10/14</td>
-         <td>$327,900</td>
-      </tr>
-      <tr>
-         <td>Colleen Hurst</td>
-         <td>Javascript Developer</td>
-         <td>San Francisco</td>
-         <td>39</td>
-         <td>2009/09/15</td>
-         <td>$205,500</td>
-      </tr>
-      <tr>
-         <td>Sonya Frost</td>
-         <td>Software Engineer</td>
-         <td>Edinburgh</td>
-         <td>23</td>
-         <td>2008/12/13</td>
-         <td>$103,600</td>
-      </tr>
-      <tr>
-         <td>Jena Gaines</td>
-         <td>Office Manager</td>
-         <td>London</td>
-         <td>30</td>
-         <td>2008/12/19</td>
-         <td>$90,560</td>
-      </tr>
-      <tr>
-         <td>Quinn Flynn</td>
-         <td>Support Lead</td>
-         <td>Edinburgh</td>
-         <td>22</td>
-         <td>2013/03/03</td>
-         <td>$342,000</td>
-      </tr>
-      <tr>
-         <td>Charde Marshall</td>
-         <td>Regional Director</td>
-         <td>San Francisco</td>
-         <td>36</td>
-         <td>2008/10/16</td>
-         <td>$470,600</td>
-      </tr>
-      <tr>
-         <td>Haley Kennedy</td>
-         <td>Senior Marketing Designer</td>
-         <td>London</td>
-         <td>43</td>
-         <td>2012/12/18</td>
-         <td>$313,500</td>
-      </tr>
-      <tr>
-         <td>Tatyana Fitzpatrick</td>
-         <td>Regional Director</td>
-         <td>London</td>
-         <td>19</td>
-         <td>2010/03/17</td>
-         <td>$385,750</td>
-      </tr>
-      <tr>
-         <td>Michael Silva</td>
-         <td>Marketing Designer</td>
-         <td>London</td>
-         <td>66</td>
-         <td>2012/11/27</td>
-         <td>$198,500</td>
-      </tr>
-      <tr>
-         <td>Paul Byrd</td>
-         <td>Chief Financial Officer (CFO)</td>
-         <td>New York</td>
-         <td>64</td>
-         <td>2010/06/09</td>
-         <td>$725,000</td>
-      </tr>
-      <tr>
-         <td>Gloria Little</td>
-         <td>Systems Administrator</td>
-         <td>New York</td>
-         <td>59</td>
-         <td>2009/04/10</td>
-         <td>$237,500</td>
-      </tr>
-      <tr>
-         <td>Bradley Greer</td>
-         <td>Software Engineer</td>
-         <td>London</td>
-         <td>41</td>
-         <td>2012/10/13</td>
-         <td>$132,000</td>
-      </tr>
-      <tr>
-         <td>Dai Rios</td>
-         <td>Personnel Lead</td>
-         <td>Edinburgh</td>
-         <td>35</td>
-         <td>2012/09/26</td>
-         <td>$217,500</td>
-      </tr>
-</table>
-
-
-

根据选择的热词从而查询出评论数据

-
-
-
- - - - - - - - - - - {% for i in comments %} - - - - - - - - - {% endfor %} - -
文章ID评论用户评论性别评论地址评论内容点赞量
{{ i[0] }}{{ i[5] }} - {% if i[6] =='f' %} - 女生 - {% else %} - 男生 - {% endif %} - {{ i[3] }}{{ i[4] }}👍{{ i[2] }}
-
-
-
-
-
-
- -{% endblock %} - -{% block echarts %} - +{% extends 'base_page.html' %} +{% block title %} + 热词统计 +{% endblock %} +{% block nav %} + +{% endblock %} +{% block content %} +
+
+
+
+

热词统计页

+
+
+
+
+
+
+
+
+

+<div class="form-group">
+   <label>Small</label>
+   <select class="form-control form-control-sm mb-3">
+      <option selected="">Open this select menu</option>
+      <option value="1">One</option>
+      <option value="2">Two</option>
+      <option value="3">Three</option>
+   </select>
+</div>
+<div class="form-group">
+   <label>Default</label>
+   <select class="form-control mb-3">
+      <option selected="">Open this select menu</option>
+      <option value="1">One</option>
+      <option value="2">Two</option>
+      <option value="3">Three</option>
+   </select>
+</div>
+<div class="form-group">
+   <label>Large</label>
+   <select class="form-control form-control-lg">
+      <option selected="">Open this select menu</option>
+      <option value="1">One</option>
+      <option value="2">Two</option>
+      <option value="3">Three</option>
+   </select>
+</div>
+
+
+
+ + + +
+
+
+ +
+
+
+
+
+
+

{{ defaultHotWord }}

+
+
    +
  • 热词名称:{{ defaultHotWord }}
  • +
  • 出现次数:{{ hotWordLen }}次
  • +
  • 热词情感:{{ sentences }}
  • +
+
+ +
+
+
+
+
+
+

热词年份变化趋势

+
+
+
+
+
+
+
+
+
+
+

热词查询表格

+
+
+ +
+
+
+
+
+

+<table id="datatable" class="table data-table table-striped table-bordered" >
+   <thead>
+      <tr>
+         <th>Name</th>
+         <th>Position</th>
+         <th>Office</th>
+         <th>Age</th>
+         <th>Start date</th>
+         <th>Salary</th>
+      </tr>
+   </thead>
+   <tbody>
+      <tr>
+         <td>Tiger Nixon</td>
+         <td>System Architect</td>
+         <td>Edinburgh</td>
+         <td>61</td>
+         <td>2011/04/25</td>
+         <td>$320,800</td>
+      </tr>
+      <tr>
+         <td>Garrett Winters</td>
+         <td>Accountant</td>
+         <td>Tokyo</td>
+         <td>63</td>
+         <td>2011/07/25</td>
+         <td>$170,750</td>
+      </tr>
+      <tr>
+         <td>Ashton Cox</td>
+         <td>Junior Technical Author</td>
+         <td>San Francisco</td>
+         <td>66</td>
+         <td>2009/01/12</td>
+         <td>$86,000</td>
+      </tr>
+      <tr>
+         <td>Cedric Kelly</td>
+         <td>Senior Javascript Developer</td>
+         <td>Edinburgh</td>
+         <td>22</td>
+         <td>2012/03/29</td>
+         <td>$433,060</td>
+      </tr>
+      <tr>
+         <td>Airi Satou</td>
+         <td>Accountant</td>
+         <td>Tokyo</td>
+         <td>33</td>
+         <td>2008/11/28</td>
+         <td>$162,700</td>
+      </tr>
+      <tr>
+         <td>Brielle Williamson</td>
+         <td>Integration Specialist</td>
+         <td>New York</td>
+         <td>61</td>
+         <td>2012/12/02</td>
+         <td>$372,000</td>
+      </tr>
+      <tr>
+         <td>Herrod Chandler</td>
+         <td>Sales Assistant</td>
+         <td>San Francisco</td>
+         <td>59</td>
+         <td>2012/08/06</td>
+         <td>$137,500</td>
+      </tr>
+      <tr>
+         <td>Rhona Davidson</td>
+         <td>Integration Specialist</td>
+         <td>Tokyo</td>
+         <td>55</td>
+         <td>2010/10/14</td>
+         <td>$327,900</td>
+      </tr>
+      <tr>
+         <td>Colleen Hurst</td>
+         <td>Javascript Developer</td>
+         <td>San Francisco</td>
+         <td>39</td>
+         <td>2009/09/15</td>
+         <td>$205,500</td>
+      </tr>
+      <tr>
+         <td>Sonya Frost</td>
+         <td>Software Engineer</td>
+         <td>Edinburgh</td>
+         <td>23</td>
+         <td>2008/12/13</td>
+         <td>$103,600</td>
+      </tr>
+      <tr>
+         <td>Jena Gaines</td>
+         <td>Office Manager</td>
+         <td>London</td>
+         <td>30</td>
+         <td>2008/12/19</td>
+         <td>$90,560</td>
+      </tr>
+      <tr>
+         <td>Quinn Flynn</td>
+         <td>Support Lead</td>
+         <td>Edinburgh</td>
+         <td>22</td>
+         <td>2013/03/03</td>
+         <td>$342,000</td>
+      </tr>
+      <tr>
+         <td>Charde Marshall</td>
+         <td>Regional Director</td>
+         <td>San Francisco</td>
+         <td>36</td>
+         <td>2008/10/16</td>
+         <td>$470,600</td>
+      </tr>
+      <tr>
+         <td>Haley Kennedy</td>
+         <td>Senior Marketing Designer</td>
+         <td>London</td>
+         <td>43</td>
+         <td>2012/12/18</td>
+         <td>$313,500</td>
+      </tr>
+      <tr>
+         <td>Tatyana Fitzpatrick</td>
+         <td>Regional Director</td>
+         <td>London</td>
+         <td>19</td>
+         <td>2010/03/17</td>
+         <td>$385,750</td>
+      </tr>
+      <tr>
+         <td>Michael Silva</td>
+         <td>Marketing Designer</td>
+         <td>London</td>
+         <td>66</td>
+         <td>2012/11/27</td>
+         <td>$198,500</td>
+      </tr>
+      <tr>
+         <td>Paul Byrd</td>
+         <td>Chief Financial Officer (CFO)</td>
+         <td>New York</td>
+         <td>64</td>
+         <td>2010/06/09</td>
+         <td>$725,000</td>
+      </tr>
+      <tr>
+         <td>Gloria Little</td>
+         <td>Systems Administrator</td>
+         <td>New York</td>
+         <td>59</td>
+         <td>2009/04/10</td>
+         <td>$237,500</td>
+      </tr>
+      <tr>
+         <td>Bradley Greer</td>
+         <td>Software Engineer</td>
+         <td>London</td>
+         <td>41</td>
+         <td>2012/10/13</td>
+         <td>$132,000</td>
+      </tr>
+      <tr>
+         <td>Dai Rios</td>
+         <td>Personnel Lead</td>
+         <td>Edinburgh</td>
+         <td>35</td>
+         <td>2012/09/26</td>
+         <td>$217,500</td>
+      </tr>
+</table>
+
+
+

根据选择的热词从而查询出评论数据

+
+
+
+ + + + + + + + + + + {% for i in comments %} + + + + + + + + + {% endfor %} + +
文章ID评论用户评论性别评论地址评论内容点赞量
{{ i[0] }}{{ i[5] }} + {% if i[6] =='f' %} + 女生 + {% else %} + 男生 + {% endif %} + {{ i[3] }}{{ i[4] }}👍{{ i[2] }}
+
+
+
+
+
+
+ +{% endblock %} + +{% block echarts %} + {% endblock %} \ No newline at end of file diff --git a/views/page/templates/index.html b/views/page/templates/index.html index 3f4a51d..d3c6cc8 100644 --- a/views/page/templates/index.html +++ b/views/page/templates/index.html @@ -1,27 +1,30 @@ - -{% extends 'base_page.html' %} \ No newline at end of file + +{% extends 'base_page.html' %} +{% block title %} + 首页 +{% endblock %} \ No newline at end of file diff --git a/views/page/templates/ipChar.html b/views/page/templates/ipChar.html index 718084e..c0c5afb 100644 --- a/views/page/templates/ipChar.html +++ b/views/page/templates/ipChar.html @@ -1,275 +1,275 @@ -{% extends 'base_page.html' %} - -{% block title %} - IP分析 -{% endblock %} - -{% block nav %} - -{% endblock %} -{% block content %} -
-
-
-
-

IP分析

-
-
-
-
-
-
-
-
-

文章IP位置分析图

-
-
-
-
-
-
-
-
-
-
-
-

评论IP位置分析图

-
-
-
-
-
-
-
-
-
- -{% endblock %} - -{% block echarts %} - - +{% extends 'base_page.html' %} + +{% block title %} + IP分析 +{% endblock %} + +{% block nav %} + +{% endblock %} +{% block content %} +
+
+
+
+

IP分析

+
+
+
+
+
+
+
+
+

文章IP位置分析图

+
+
+
+
+
+
+
+
+
+
+
+

评论IP位置分析图

+
+
+
+
+
+
+
+
+
+ +{% endblock %} + +{% block echarts %} + + {% endblock %} \ No newline at end of file diff --git a/views/page/templates/tableData.html b/views/page/templates/tableData.html index c86a6cb..d465ec5 100644 --- a/views/page/templates/tableData.html +++ b/views/page/templates/tableData.html @@ -1,225 +1,228 @@ -{% extends 'base_page.html' %} - -{% block title %} -微博舆情统计页 -{% endblock %} -{% block nav %} - -{% endblock %} -{% block content %} -
-
-
-
-

微博舆情统计页

-
-
-
- -
-
-
-
-
-

微博文章统计表格 - 舆情 情感分类 -

-
-
- -
-
-
-

情感分类

-
-
-
- - - - - - - - - - - - {% if defaultFlag %} - - {% endif %} - - - - {% for article in tableData %} - - - - - - - - - - {% if defaultFlag %} - - {% endif %} - - {% endfor %} - -
- 文章ID - - 文章IP - - 点赞量 - - 转发量 - - 评论量 - - 类型 - - 内容 - - 发布时间 - - 情感分类 -
- - {{ article[0] }} - - {{ article[4] }}👍{{ article[1] }}🥇{{ article[2] }}🔥{{ article[3] }}{{ article[8] }}{{ article[5] }}{{ article[7] }} - {% if article[-1] == '正面' %} - - {{ article[-1] }} - - - {% else %} - - {{ article[-1] }} - - - {% endif %} -
-
-
-
-
-
-
-
- -
-{% endblock %} - - -{% block echarts %} - +{% extends 'base_page.html' %} + +{% block title %} +微博舆情统计页 +{% endblock %} +{% block nav %} + +{% endblock %} +{% block content %} +
+
+
+
+

微博舆情统计页

+
+
+
+ +
+
+
+
+
+

微博文章统计表格 - 舆情 情感分类

+
+
+ +
+
+
+

情感分类

+
+
+
+ + + + + + + + + + + + + {% if defaultFlag %} + + {% endif %} + + + + {% for article in tableData %} + + + + + + + + + + + {% if defaultFlag %} + + {% endif %} + + {% endfor %} + +
+ 文章ID + + 文章IP + + 文章标题 + + 点赞量 + + 转发量 + + 评论量 + + 类型 + + 内容 + + 发布时间 + + 情感分类 +
+ + {{ article[0] }} + + {{ article[4] }}{{ article[5] }}👍{{ article[1] }}🥇{{ article[2] }}🔥{{ article[3] }}{{ article[8] }}{{ article[5] }}{{ article[7] }} + {% if article[-1] == '正面' %} + + {{ article[-1] }} + + + {% else %} + + {{ article[-1] }} + + + {% endif %} +
+
+
+
+
+
+
+
+ +
+{% endblock %} + + +{% block echarts %} + {% endblock %} \ No newline at end of file diff --git a/views/page/templates/yuqingChar.html b/views/page/templates/yuqingChar.html index 0972902..25d92d4 100644 --- a/views/page/templates/yuqingChar.html +++ b/views/page/templates/yuqingChar.html @@ -1,401 +1,401 @@ -{% extends 'base_page.html' %} -{% block title %} - 舆情分析 -{% endblock %} -{% block nav %} - -{% endblock %} - -{% block content %} -
-
-
-
-

舆情分析

-
-
-
-
-
-
-
-
-

热词情感趋势柱状图

-
-
-
-
-
-
-
-
-
-
-
-

热词情感趋势树形图

-
-
-
-
-
-
-
-
-
-
-
-

文章内容与评论内容舆情趋势饼状图

-
-
-
-
-
-
-
-
-
-
-
-

热词TOP10

-
-
-
-
-
-
-
-
-
-
- - -
-
-
- -{% endblock %} - -{% block echarts %} - - - - - +{% extends 'base_page.html' %} +{% block title %} + 舆情分析 +{% endblock %} +{% block nav %} + +{% endblock %} + +{% block content %} +
+
+
+
+

舆情分析

+
+
+
+
+
+
+
+
+

热词情感趋势柱状图

+
+
+
+
+
+
+
+
+
+
+
+

热词情感趋势树形图

+
+
+
+
+
+
+
+
+
+
+
+

文章内容与评论内容舆情趋势饼状图

+
+
+
+
+
+
+
+
+
+
+
+

热词TOP10

+
+
+
+
+
+
+
+
+
+
+ + +
+
+
+ +{% endblock %} + +{% block echarts %} + + + + + {% endblock %} \ No newline at end of file diff --git a/views/page/templates/yuqingpredict.html b/views/page/templates/yuqingpredict.html index 50334b4..0653c29 100644 --- a/views/page/templates/yuqingpredict.html +++ b/views/page/templates/yuqingpredict.html @@ -1,925 +1,925 @@ -{% extends 'base_page.html' %} -{% block title %} - 舆情预测 -{% endblock %} -{% block nav %} - -{% endblock %} -{% block content %} -
-
-
-
-

话题统计页

-
-
-
-
-
-
-
-
-

-<div class="form-group">
-   <label>Small</label>
-   <select class="form-control form-control-sm mb-3">
-      <option selected="">Open this select menu</option>
-      <option value="1">One</option>
-      <option value="2">Two</option>
-      <option value="3">Three</option>
-   </select>
-</div>
-<div class="form-group">
-   <label>Default</label>
-   <select class="form-control mb-3">
-      <option selected="">Open this select menu</option>
-      <option value="1">One</option>
-      <option value="2">Two</option>
-      <option value="3">Three</option>
-   </select>
-</div>
-<div class="form-group">
-   <label>Large</label>
-   <select class="form-control form-control-lg">
-      <option selected="">Open this select menu</option>
-      <option value="1">One</option>
-      <option value="2">Two</option>
-      <option value="3">Three</option>
-   </select>
-</div>
-
-
-
- - -
-
-
- - -
-
- -
-
- -
-
-
-
-
-
-

{{ defaultHotWord }}

-
-
    -
  • 话题名称:{{ defaultHotWord }}
  • -
  • 出现次数:{{ hotWordLen }}次
  • -
  • 话题情感:{{ sentences }}
  • -
-
- -
-
-
-
-
-
-

话题年份变化趋势

-
-
-
-
-
-
-
-
-
-
-

话题查询表格

-
-
- -
-
-
-
-
-

-<table id="datatable" class="table data-table table-striped table-bordered" >
-   <thead>
-      <tr>
-         <th>Name</th>
-         <th>Position</th>
-         <th>Office</th>
-         <th>Age</th>
-         <th>Start date</th>
-         <th>Salary</th>
-      </tr>
-   </thead>
-   <tbody>
-      <tr>
-         <td>Tiger Nixon</td>
-         <td>System Architect</td>
-         <td>Edinburgh</td>
-         <td>61</td>
-         <td>2011/04/25</td>
-         <td>$320,800</td>
-      </tr>
-      <tr>
-         <td>Garrett Winters</td>
-         <td>Accountant</td>
-         <td>Tokyo</td>
-         <td>63</td>
-         <td>2011/07/25</td>
-         <td>$170,750</td>
-      </tr>
-      <tr>
-         <td>Ashton Cox</td>
-         <td>Junior Technical Author</td>
-         <td>San Francisco</td>
-         <td>66</td>
-         <td>2009/01/12</td>
-         <td>$86,000</td>
-      </tr>
-      <tr>
-         <td>Cedric Kelly</td>
-         <td>Senior Javascript Developer</td>
-         <td>Edinburgh</td>
-         <td>22</td>
-         <td>2012/03/29</td>
-         <td>$433,060</td>
-      </tr>
-      <tr>
-         <td>Airi Satou</td>
-         <td>Accountant</td>
-         <td>Tokyo</td>
-         <td>33</td>
-         <td>2008/11/28</td>
-         <td>$162,700</td>
-      </tr>
-      <tr>
-         <td>Brielle Williamson</td>
-         <td>Integration Specialist</td>
-         <td>New York</td>
-         <td>61</td>
-         <td>2012/12/02</td>
-         <td>$372,000</td>
-      </tr>
-      <tr>
-         <td>Herrod Chandler</td>
-         <td>Sales Assistant</td>
-         <td>San Francisco</td>
-         <td>59</td>
-         <td>2012/08/06</td>
-         <td>$137,500</td>
-      </tr>
-      <tr>
-         <td>Rhona Davidson</td>
-         <td>Integration Specialist</td>
-         <td>Tokyo</td>
-         <td>55</td>
-         <td>2010/10/14</td>
-         <td>$327,900</td>
-      </tr>
-      <tr>
-         <td>Colleen Hurst</td>
-         <td>Javascript Developer</td>
-         <td>San Francisco</td>
-         <td>39</td>
-         <td>2009/09/15</td>
-         <td>$205,500</td>
-      </tr>
-      <tr>
-         <td>Sonya Frost</td>
-         <td>Software Engineer</td>
-         <td>Edinburgh</td>
-         <td>23</td>
-         <td>2008/12/13</td>
-         <td>$103,600</td>
-      </tr>
-      <tr>
-         <td>Jena Gaines</td>
-         <td>Office Manager</td>
-         <td>London</td>
-         <td>30</td>
-         <td>2008/12/19</td>
-         <td>$90,560</td>
-      </tr>
-      <tr>
-         <td>Quinn Flynn</td>
-         <td>Support Lead</td>
-         <td>Edinburgh</td>
-         <td>22</td>
-         <td>2013/03/03</td>
-         <td>$342,000</td>
-      </tr>
-      <tr>
-         <td>Charde Marshall</td>
-         <td>Regional Director</td>
-         <td>San Francisco</td>
-         <td>36</td>
-         <td>2008/10/16</td>
-         <td>$470,600</td>
-      </tr>
-      <tr>
-         <td>Haley Kennedy</td>
-         <td>Senior Marketing Designer</td>
-         <td>London</td>
-         <td>43</td>
-         <td>2012/12/18</td>
-         <td>$313,500</td>
-      </tr>
-      <tr>
-         <td>Tatyana Fitzpatrick</td>
-         <td>Regional Director</td>
-         <td>London</td>
-         <td>19</td>
-         <td>2010/03/17</td>
-         <td>$385,750</td>
-      </tr>
-      <tr>
-         <td>Michael Silva</td>
-         <td>Marketing Designer</td>
-         <td>London</td>
-         <td>66</td>
-         <td>2012/11/27</td>
-         <td>$198,500</td>
-      </tr>
-      <tr>
-         <td>Paul Byrd</td>
-         <td>Chief Financial Officer (CFO)</td>
-         <td>New York</td>
-         <td>64</td>
-         <td>2010/06/09</td>
-         <td>$725,000</td>
-      </tr>
-      <tr>
-         <td>Gloria Little</td>
-         <td>Systems Administrator</td>
-         <td>New York</td>
-         <td>59</td>
-         <td>2009/04/10</td>
-         <td>$237,500</td>
-      </tr>
-      <tr>
-         <td>Bradley Greer</td>
-         <td>Software Engineer</td>
-         <td>London</td>
-         <td>41</td>
-         <td>2012/10/13</td>
-         <td>$132,000</td>
-      </tr>
-      <tr>
-         <td>Dai Rios</td>
-         <td>Personnel Lead</td>
-         <td>Edinburgh</td>
-         <td>35</td>
-         <td>2012/09/26</td>
-         <td>$217,500</td>
-      </tr>
-</table>
-
-
-

根据选择的话题从而查询出评论数据

-
-
-
- - - - - - - - - - - {% for i in comments %} - - - - - - - - - {% endfor %} - -
文章ID评论用户评论性别评论话题评论内容点赞量
{{ i[0] }}{{ i[5] }} - {% if i[6] =='f' %} - 女生 - {% else %} - 男生 - {% endif %} - {{ i[9] }}{{ i[4] }}👍{{ i[2] }}
-
-
-
-
-
- - -
-
-
-
-
-

AI深度分析

-
-
- -
-
- -
-
- -
-
- -
-
- - -
- -
-
-
-
- - - - -
-
-
- -
-
- -
-
- -
-
-
- -
- -
-
-
-
-
- - - - - - - -{% endblock %} - -{% block echarts %} - - +{% extends 'base_page.html' %} +{% block title %} + 舆情预测 +{% endblock %} +{% block nav %} + +{% endblock %} +{% block content %} +
+
+
+
+

话题统计页

+
+
+
+
+
+
+
+
+

+<div class="form-group">
+   <label>Small</label>
+   <select class="form-control form-control-sm mb-3">
+      <option selected="">Open this select menu</option>
+      <option value="1">One</option>
+      <option value="2">Two</option>
+      <option value="3">Three</option>
+   </select>
+</div>
+<div class="form-group">
+   <label>Default</label>
+   <select class="form-control mb-3">
+      <option selected="">Open this select menu</option>
+      <option value="1">One</option>
+      <option value="2">Two</option>
+      <option value="3">Three</option>
+   </select>
+</div>
+<div class="form-group">
+   <label>Large</label>
+   <select class="form-control form-control-lg">
+      <option selected="">Open this select menu</option>
+      <option value="1">One</option>
+      <option value="2">Two</option>
+      <option value="3">Three</option>
+   </select>
+</div>
+
+
+
+ + +
+
+
+ + +
+
+ +
+
+ +
+
+
+
+
+
+

{{ defaultHotWord }}

+
+
    +
  • 话题名称:{{ defaultHotWord }}
  • +
  • 出现次数:{{ hotWordLen }}次
  • +
  • 话题情感:{{ sentences }}
  • +
+
+ +
+
+
+
+
+
+

话题年份变化趋势

+
+
+
+
+
+
+
+
+
+
+

话题查询表格

+
+
+ +
+
+
+
+
+

+<table id="datatable" class="table data-table table-striped table-bordered" >
+   <thead>
+      <tr>
+         <th>Name</th>
+         <th>Position</th>
+         <th>Office</th>
+         <th>Age</th>
+         <th>Start date</th>
+         <th>Salary</th>
+      </tr>
+   </thead>
+   <tbody>
+      <tr>
+         <td>Tiger Nixon</td>
+         <td>System Architect</td>
+         <td>Edinburgh</td>
+         <td>61</td>
+         <td>2011/04/25</td>
+         <td>$320,800</td>
+      </tr>
+      <tr>
+         <td>Garrett Winters</td>
+         <td>Accountant</td>
+         <td>Tokyo</td>
+         <td>63</td>
+         <td>2011/07/25</td>
+         <td>$170,750</td>
+      </tr>
+      <tr>
+         <td>Ashton Cox</td>
+         <td>Junior Technical Author</td>
+         <td>San Francisco</td>
+         <td>66</td>
+         <td>2009/01/12</td>
+         <td>$86,000</td>
+      </tr>
+      <tr>
+         <td>Cedric Kelly</td>
+         <td>Senior Javascript Developer</td>
+         <td>Edinburgh</td>
+         <td>22</td>
+         <td>2012/03/29</td>
+         <td>$433,060</td>
+      </tr>
+      <tr>
+         <td>Airi Satou</td>
+         <td>Accountant</td>
+         <td>Tokyo</td>
+         <td>33</td>
+         <td>2008/11/28</td>
+         <td>$162,700</td>
+      </tr>
+      <tr>
+         <td>Brielle Williamson</td>
+         <td>Integration Specialist</td>
+         <td>New York</td>
+         <td>61</td>
+         <td>2012/12/02</td>
+         <td>$372,000</td>
+      </tr>
+      <tr>
+         <td>Herrod Chandler</td>
+         <td>Sales Assistant</td>
+         <td>San Francisco</td>
+         <td>59</td>
+         <td>2012/08/06</td>
+         <td>$137,500</td>
+      </tr>
+      <tr>
+         <td>Rhona Davidson</td>
+         <td>Integration Specialist</td>
+         <td>Tokyo</td>
+         <td>55</td>
+         <td>2010/10/14</td>
+         <td>$327,900</td>
+      </tr>
+      <tr>
+         <td>Colleen Hurst</td>
+         <td>Javascript Developer</td>
+         <td>San Francisco</td>
+         <td>39</td>
+         <td>2009/09/15</td>
+         <td>$205,500</td>
+      </tr>
+      <tr>
+         <td>Sonya Frost</td>
+         <td>Software Engineer</td>
+         <td>Edinburgh</td>
+         <td>23</td>
+         <td>2008/12/13</td>
+         <td>$103,600</td>
+      </tr>
+      <tr>
+         <td>Jena Gaines</td>
+         <td>Office Manager</td>
+         <td>London</td>
+         <td>30</td>
+         <td>2008/12/19</td>
+         <td>$90,560</td>
+      </tr>
+      <tr>
+         <td>Quinn Flynn</td>
+         <td>Support Lead</td>
+         <td>Edinburgh</td>
+         <td>22</td>
+         <td>2013/03/03</td>
+         <td>$342,000</td>
+      </tr>
+      <tr>
+         <td>Charde Marshall</td>
+         <td>Regional Director</td>
+         <td>San Francisco</td>
+         <td>36</td>
+         <td>2008/10/16</td>
+         <td>$470,600</td>
+      </tr>
+      <tr>
+         <td>Haley Kennedy</td>
+         <td>Senior Marketing Designer</td>
+         <td>London</td>
+         <td>43</td>
+         <td>2012/12/18</td>
+         <td>$313,500</td>
+      </tr>
+      <tr>
+         <td>Tatyana Fitzpatrick</td>
+         <td>Regional Director</td>
+         <td>London</td>
+         <td>19</td>
+         <td>2010/03/17</td>
+         <td>$385,750</td>
+      </tr>
+      <tr>
+         <td>Michael Silva</td>
+         <td>Marketing Designer</td>
+         <td>London</td>
+         <td>66</td>
+         <td>2012/11/27</td>
+         <td>$198,500</td>
+      </tr>
+      <tr>
+         <td>Paul Byrd</td>
+         <td>Chief Financial Officer (CFO)</td>
+         <td>New York</td>
+         <td>64</td>
+         <td>2010/06/09</td>
+         <td>$725,000</td>
+      </tr>
+      <tr>
+         <td>Gloria Little</td>
+         <td>Systems Administrator</td>
+         <td>New York</td>
+         <td>59</td>
+         <td>2009/04/10</td>
+         <td>$237,500</td>
+      </tr>
+      <tr>
+         <td>Bradley Greer</td>
+         <td>Software Engineer</td>
+         <td>London</td>
+         <td>41</td>
+         <td>2012/10/13</td>
+         <td>$132,000</td>
+      </tr>
+      <tr>
+         <td>Dai Rios</td>
+         <td>Personnel Lead</td>
+         <td>Edinburgh</td>
+         <td>35</td>
+         <td>2012/09/26</td>
+         <td>$217,500</td>
+      </tr>
+</table>
+
+
+

根据选择的话题从而查询出评论数据

+
+
+
+ + + + + + + + + + + {% for i in comments %} + + + + + + + + + {% endfor %} + +
文章ID评论用户评论性别评论话题评论内容点赞量
{{ i[0] }}{{ i[5] }} + {% if i[6] =='f' %} + 女生 + {% else %} + 男生 + {% endif %} + {{ i[9] }}{{ i[4] }}👍{{ i[2] }}
+
+
+
+
+
+ + +
+
+
+
+
+

AI深度分析

+
+
+ +
+
+ +
+
+ +
+
+ +
+
+ + +
+ +
+
+
+
+ + + + +
+
+
+ +
+
+ +
+
+ +
+
+
+ +
+ +
+
+
+
+
+ + + + + + + +{% endblock %} + +{% block echarts %} + + {% endblock %} \ No newline at end of file diff --git a/views/spider_control.py b/views/spider_control.py index bd062a7..67a445f 100644 --- a/views/spider_control.py +++ b/views/spider_control.py @@ -1,371 +1,371 @@ -from flask import Blueprint, jsonify, request, render_template -import json -import os -from datetime import datetime -import threading -from queue import Queue -import asyncio -import websockets -import logging -from spider.spiderData import SpiderData -from openai import OpenAI -from anthropic import Anthropic -import aiohttp -from concurrent.futures import ThreadPoolExecutor -from ratelimit import limits, sleep_and_retry -from tenacity import retry, stop_after_attempt, wait_exponential - -# 创建蓝图 -spider_bp = Blueprint('spider', __name__) - -# 创建日志记录器 -logger = logging.getLogger('spider_control') -logger.setLevel(logging.INFO) - -# 存储WebSocket连接的集合 -websocket_connections = set() - -# 创建消息队列 -message_queue = Queue() - -# 创建线程池 -thread_pool = ThreadPoolExecutor(max_workers=3) - -# 创建异步事件循环 -loop = asyncio.new_event_loop() -asyncio.set_event_loop(loop) - -# 默认配置 -DEFAULT_CONFIG = { - 'crawlDepth': 3, - 'interval': 5, - 'maxRetries': 3, - 'timeout': 30, - 'maxConcurrent': 2 -} - -# 限流装饰器 -@sleep_and_retry -@limits(calls=100, period=60) # 每分钟最多100个请求 -def rate_limited_request(): - pass - -class SpiderWorker: - def __init__(self, topics, parameters): - self.topics = topics - self.parameters = parameters - self.total_topics = len(topics) - self.completed_topics = 0 - self.spider = SpiderData() - self.message_buffer = [] - self.message_buffer_size = 10 - self.semaphore = asyncio.Semaphore(parameters.get('maxConcurrent', DEFAULT_CONFIG['maxConcurrent'])) - - async def send_message(self, message): - """异步发送消息,使用缓冲区优化""" - self.message_buffer.append(message) - if len(self.message_buffer) >= self.message_buffer_size: - await self.flush_messages() - - async def flush_messages(self): - """刷新消息缓冲区""" - if not self.message_buffer: - return - - try: - await broadcast_message(self.message_buffer) - self.message_buffer.clear() - except Exception as e: - logger.error(f"发送消息失败: {e}") - - @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) - async def crawl_single_topic(self, topic): - """爬取单个话题""" - try: - rate_limited_request() - - await self.send_message({ - 'type': 'log', - 'message': f'开始爬取话题: {topic}' - }) - - async with self.semaphore: - await asyncio.get_event_loop().run_in_executor( - thread_pool, - self.spider.crawl_topic, - topic, - self.parameters['crawlDepth'], - self.parameters['interval'], - self.parameters['maxRetries'], - self.parameters['timeout'] - ) - - self.completed_topics += 1 - progress = int((self.completed_topics / self.total_topics) * 100) - - await self.send_message({ - 'type': 'progress', - 'value': progress - }) - - await self.send_message({ - 'type': 'log', - 'message': f'话题 {topic} 爬取完成' - }) - - except Exception as e: - logger.error(f"爬取话题 {topic} 失败: {e}") - await self.send_message({ - 'type': 'log', - 'message': f'爬取话题 {topic} 时出错: {str(e)}' - }) - raise - - async def run(self): - """运行爬虫任务""" - try: - tasks = [self.crawl_single_topic(topic) for topic in self.topics] - await asyncio.gather(*tasks) - await self.flush_messages() - - await self.send_message({ - 'type': 'log', - 'message': '所有话题爬取完成' - }) - - except Exception as e: - logger.error(f"爬虫任务执行出错: {e}") - await self.send_message({ - 'type': 'log', - 'message': f'爬虫任务执行出错: {str(e)}' - }) - finally: - await self.flush_messages() - -async def broadcast_message(messages): - """广播消息到所有WebSocket连接""" - if not websocket_connections: - return - - for websocket in websocket_connections.copy(): - try: - if isinstance(messages, list): - for message in messages: - await websocket.send(json.dumps(message)) - else: - await websocket.send(json.dumps(messages)) - except websockets.exceptions.ConnectionClosed: - websocket_connections.remove(websocket) - except Exception as e: - logger.error(f"发送WebSocket消息失败: {e}") - websocket_connections.remove(websocket) - -@spider_bp.route('/spider/control') -def spider_control(): - """渲染爬虫控制页面""" - return render_template('spider_control.html') - -@spider_bp.route('/api/spider/start', methods=['POST']) -async def start_spider(): - """启动爬虫任务""" - try: - data = request.get_json() - topics = data.get('topics', []) - parameters = {**DEFAULT_CONFIG, **data.get('parameters', {})} - - if not topics: - return jsonify({ - 'success': False, - 'message': '请选择至少一个话题' - }) - - # 创建爬虫工作器 - worker = SpiderWorker(topics, parameters) - - # 在事件循环中运行爬虫任务 - asyncio.create_task(worker.run()) - - return jsonify({ - 'success': True, - 'message': '爬虫任务已启动' - }) - - except Exception as e: - logger.error(f"启动爬虫任务失败: {e}") - return jsonify({ - 'success': False, - 'message': str(e) - }) - -@spider_bp.route('/api/spider/save-config', methods=['POST']) -def save_spider_config(): - """保存爬虫配置""" - try: - config = request.get_json() - if save_config(config): - return jsonify({ - 'success': True, - 'message': '配置保存成功' - }) - else: - return jsonify({ - 'success': False, - 'message': '配置保存失败' - }) - except Exception as e: - logger.error(f"保存配置失败: {e}") - return jsonify({ - 'success': False, - 'message': str(e) - }) - -@spider_bp.websocket('/ws/spider-status') -async def spider_status_socket(websocket): - """WebSocket连接处理""" - try: - websocket_connections.add(websocket) - logging.info("新的WebSocket连接已建立") - - try: - while True: - # 等待消息,保持连接活跃 - message = await websocket.receive() - if message is None: - break - except websockets.exceptions.ConnectionClosed: - logging.info("WebSocket连接已关闭") - finally: - websocket_connections.remove(websocket) - logging.info("WebSocket连接已移除") - except Exception as e: - logger.error(f"WebSocket连接处理失败: {e}") - if websocket in websocket_connections: - websocket_connections.remove(websocket) - -def get_ai_client(): - """获取可用的AI客户端""" - # 按优先级尝试不同的AI服务 - if os.getenv('ANTHROPIC_API_KEY'): - return { - 'type': 'anthropic', - 'client': Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY')) - } - elif os.getenv('OPENAI_API_KEY'): - return { - 'type': 'openai', - 'client': OpenAI(api_key=os.getenv('OPENAI_API_KEY')) - } - else: - raise ValueError("未找到可用的AI API密钥") - -def parse_ai_response(response_text): - """解析AI响应中的JSON配置""" - try: - # 查找JSON内容 - start = response_text.find('{') - end = response_text.rfind('}') + 1 - if start == -1 or end == 0: - raise ValueError("未找到有效的JSON配置") - - json_str = response_text[start:end] - config = json.loads(json_str) - - # 验证配置格式 - if not isinstance(config.get('topics'), list): - raise ValueError("配置必须包含话题列表") - - parameters = config.get('parameters', {}) - if not all(key in parameters for key in ['crawlDepth', 'interval', 'maxRetries', 'timeout']): - raise ValueError("配置缺少必要的参数") - - # 提取建议文本(JSON之前的部分) - suggestion = response_text[:start].strip() - - return config, suggestion - except Exception as e: - raise ValueError(f"解析AI响应失败: {str(e)}") - -@spider_bp.route('/api/spider/ai-config', methods=['POST']) -def generate_ai_config(): - """使用AI生成爬虫配置""" - try: - prompt = request.json.get('prompt', '') - if not prompt: - return jsonify({ - 'success': False, - 'message': '请提供爬虫需求描述' - }) - - # 构建AI提示 - system_prompt = """你是一个专业的爬虫配置助手。请根据用户的自然语言描述,生成合适的微博爬虫配置。 -配置应包含以下内容: -1. 要爬取的话题列表 -2. 爬虫参数(爬取深度、间隔时间、重试次数、超时时间) - -请先用通俗易懂的语言解释你的配置建议,然后在最后提供一个JSON格式的具体配置。 -注意: -- 爬取深度(crawlDepth)范围:1-10页 -- 间隔时间(interval)范围:3-30秒 -- 重试次数(maxRetries)范围:1-5次 -- 超时时间(timeout)范围:10-60秒 -- 所有参数都必须是整数 - -示例输出格式: -根据您的需求,我建议... - -{ - "topics": ["话题1", "话题2"], - "parameters": { - "crawlDepth": 5, - "interval": 5, - "maxRetries": 3, - "timeout": 30 - } -}""" - - # 获取AI客户端 - ai = get_ai_client() - - try: - if ai['type'] == 'anthropic': - response = ai['client'].messages.create( - model="claude-3-sonnet-20240229", - max_tokens=1000, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": prompt} - ] - ) - response_text = response.content[0].text - else: # OpenAI - response = ai['client'].chat.completions.create( - model="gpt-3.5-turbo", - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": prompt} - ] - ) - response_text = response.choices[0].message.content - - # 解析AI响应 - config, suggestion = parse_ai_response(response_text) - - return jsonify({ - 'success': True, - 'config': config, - 'suggestion': suggestion - }) - - except Exception as e: - logger.error(f"AI服务调用失败: {e}") - return jsonify({ - 'success': False, - 'message': f"AI配置生成失败: {str(e)}" - }) - - except Exception as e: - logger.error(f"生成配置失败: {e}") - return jsonify({ - 'success': False, - 'message': str(e) +from flask import Blueprint, jsonify, request, render_template +import json +import os +from datetime import datetime +import threading +from queue import Queue +import asyncio +import websockets +import logging +from spider.spiderData import SpiderData +from openai import OpenAI +from anthropic import Anthropic +import aiohttp +from concurrent.futures import ThreadPoolExecutor +from ratelimit import limits, sleep_and_retry +from tenacity import retry, stop_after_attempt, wait_exponential + +# 创建蓝图 +spider_bp = Blueprint('spider', __name__) + +# 创建日志记录器 +logger = logging.getLogger('spider_control') +logger.setLevel(logging.INFO) + +# 存储WebSocket连接的集合 +websocket_connections = set() + +# 创建消息队列 +message_queue = Queue() + +# 创建线程池 +thread_pool = ThreadPoolExecutor(max_workers=3) + +# 创建异步事件循环 +loop = asyncio.new_event_loop() +asyncio.set_event_loop(loop) + +# 默认配置 +DEFAULT_CONFIG = { + 'crawlDepth': 3, + 'interval': 5, + 'maxRetries': 3, + 'timeout': 30, + 'maxConcurrent': 2 +} + +# 限流装饰器 +@sleep_and_retry +@limits(calls=100, period=60) # 每分钟最多100个请求 +def rate_limited_request(): + pass + +class SpiderWorker: + def __init__(self, topics, parameters): + self.topics = topics + self.parameters = parameters + self.total_topics = len(topics) + self.completed_topics = 0 + self.spider = SpiderData() + self.message_buffer = [] + self.message_buffer_size = 10 + self.semaphore = asyncio.Semaphore(parameters.get('maxConcurrent', DEFAULT_CONFIG['maxConcurrent'])) + + async def send_message(self, message): + """异步发送消息,使用缓冲区优化""" + self.message_buffer.append(message) + if len(self.message_buffer) >= self.message_buffer_size: + await self.flush_messages() + + async def flush_messages(self): + """刷新消息缓冲区""" + if not self.message_buffer: + return + + try: + await broadcast_message(self.message_buffer) + self.message_buffer.clear() + except Exception as e: + logger.error(f"发送消息失败: {e}") + + @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10)) + async def crawl_single_topic(self, topic): + """爬取单个话题""" + try: + rate_limited_request() + + await self.send_message({ + 'type': 'log', + 'message': f'开始爬取话题: {topic}' + }) + + async with self.semaphore: + await asyncio.get_event_loop().run_in_executor( + thread_pool, + self.spider.crawl_topic, + topic, + self.parameters['crawlDepth'], + self.parameters['interval'], + self.parameters['maxRetries'], + self.parameters['timeout'] + ) + + self.completed_topics += 1 + progress = int((self.completed_topics / self.total_topics) * 100) + + await self.send_message({ + 'type': 'progress', + 'value': progress + }) + + await self.send_message({ + 'type': 'log', + 'message': f'话题 {topic} 爬取完成' + }) + + except Exception as e: + logger.error(f"爬取话题 {topic} 失败: {e}") + await self.send_message({ + 'type': 'log', + 'message': f'爬取话题 {topic} 时出错: {str(e)}' + }) + raise + + async def run(self): + """运行爬虫任务""" + try: + tasks = [self.crawl_single_topic(topic) for topic in self.topics] + await asyncio.gather(*tasks) + await self.flush_messages() + + await self.send_message({ + 'type': 'log', + 'message': '所有话题爬取完成' + }) + + except Exception as e: + logger.error(f"爬虫任务执行出错: {e}") + await self.send_message({ + 'type': 'log', + 'message': f'爬虫任务执行出错: {str(e)}' + }) + finally: + await self.flush_messages() + +async def broadcast_message(messages): + """广播消息到所有WebSocket连接""" + if not websocket_connections: + return + + for websocket in websocket_connections.copy(): + try: + if isinstance(messages, list): + for message in messages: + await websocket.send(json.dumps(message)) + else: + await websocket.send(json.dumps(messages)) + except websockets.exceptions.ConnectionClosed: + websocket_connections.remove(websocket) + except Exception as e: + logger.error(f"发送WebSocket消息失败: {e}") + websocket_connections.remove(websocket) + +@spider_bp.route('/spider/control') +def spider_control(): + """渲染爬虫控制页面""" + return render_template('spider_control.html') + +@spider_bp.route('/api/spider/start', methods=['POST']) +async def start_spider(): + """启动爬虫任务""" + try: + data = request.get_json() + topics = data.get('topics', []) + parameters = {**DEFAULT_CONFIG, **data.get('parameters', {})} + + if not topics: + return jsonify({ + 'success': False, + 'message': '请选择至少一个话题' + }) + + # 创建爬虫工作器 + worker = SpiderWorker(topics, parameters) + + # 在事件循环中运行爬虫任务 + asyncio.create_task(worker.run()) + + return jsonify({ + 'success': True, + 'message': '爬虫任务已启动' + }) + + except Exception as e: + logger.error(f"启动爬虫任务失败: {e}") + return jsonify({ + 'success': False, + 'message': str(e) + }) + +@spider_bp.route('/api/spider/save-config', methods=['POST']) +def save_spider_config(): + """保存爬虫配置""" + try: + config = request.get_json() + if save_config(config): + return jsonify({ + 'success': True, + 'message': '配置保存成功' + }) + else: + return jsonify({ + 'success': False, + 'message': '配置保存失败' + }) + except Exception as e: + logger.error(f"保存配置失败: {e}") + return jsonify({ + 'success': False, + 'message': str(e) + }) + +@spider_bp.websocket('/ws/spider-status') +async def spider_status_socket(websocket): + """WebSocket连接处理""" + try: + websocket_connections.add(websocket) + logging.info("新的WebSocket连接已建立") + + try: + while True: + # 等待消息,保持连接活跃 + message = await websocket.receive() + if message is None: + break + except websockets.exceptions.ConnectionClosed: + logging.info("WebSocket连接已关闭") + finally: + websocket_connections.remove(websocket) + logging.info("WebSocket连接已移除") + except Exception as e: + logger.error(f"WebSocket连接处理失败: {e}") + if websocket in websocket_connections: + websocket_connections.remove(websocket) + +def get_ai_client(): + """获取可用的AI客户端""" + # 按优先级尝试不同的AI服务 + if os.getenv('ANTHROPIC_API_KEY'): + return { + 'type': 'anthropic', + 'client': Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY')) + } + elif os.getenv('OPENAI_API_KEY'): + return { + 'type': 'openai', + 'client': OpenAI(api_key=os.getenv('OPENAI_API_KEY')) + } + else: + raise ValueError("未找到可用的AI API密钥") + +def parse_ai_response(response_text): + """解析AI响应中的JSON配置""" + try: + # 查找JSON内容 + start = response_text.find('{') + end = response_text.rfind('}') + 1 + if start == -1 or end == 0: + raise ValueError("未找到有效的JSON配置") + + json_str = response_text[start:end] + config = json.loads(json_str) + + # 验证配置格式 + if not isinstance(config.get('topics'), list): + raise ValueError("配置必须包含话题列表") + + parameters = config.get('parameters', {}) + if not all(key in parameters for key in ['crawlDepth', 'interval', 'maxRetries', 'timeout']): + raise ValueError("配置缺少必要的参数") + + # 提取建议文本(JSON之前的部分) + suggestion = response_text[:start].strip() + + return config, suggestion + except Exception as e: + raise ValueError(f"解析AI响应失败: {str(e)}") + +@spider_bp.route('/api/spider/ai-config', methods=['POST']) +def generate_ai_config(): + """使用AI生成爬虫配置""" + try: + prompt = request.json.get('prompt', '') + if not prompt: + return jsonify({ + 'success': False, + 'message': '请提供爬虫需求描述' + }) + + # 构建AI提示 + system_prompt = """你是一个专业的爬虫配置助手。请根据用户的自然语言描述,生成合适的微博爬虫配置。 +配置应包含以下内容: +1. 要爬取的话题列表 +2. 爬虫参数(爬取深度、间隔时间、重试次数、超时时间) + +请先用通俗易懂的语言解释你的配置建议,然后在最后提供一个JSON格式的具体配置。 +注意: +- 爬取深度(crawlDepth)范围:1-10页 +- 间隔时间(interval)范围:3-30秒 +- 重试次数(maxRetries)范围:1-5次 +- 超时时间(timeout)范围:10-60秒 +- 所有参数都必须是整数 + +示例输出格式: +根据您的需求,我建议... + +{ + "topics": ["话题1", "话题2"], + "parameters": { + "crawlDepth": 5, + "interval": 5, + "maxRetries": 3, + "timeout": 30 + } +}""" + + # 获取AI客户端 + ai = get_ai_client() + + try: + if ai['type'] == 'anthropic': + response = ai['client'].messages.create( + model="claude-3-sonnet-20240229", + max_tokens=1000, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ] + ) + response_text = response.content[0].text + else: # OpenAI + response = ai['client'].chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": prompt} + ] + ) + response_text = response.choices[0].message.content + + # 解析AI响应 + config, suggestion = parse_ai_response(response_text) + + return jsonify({ + 'success': True, + 'config': config, + 'suggestion': suggestion + }) + + except Exception as e: + logger.error(f"AI服务调用失败: {e}") + return jsonify({ + 'success': False, + 'message': f"AI配置生成失败: {str(e)}" + }) + + except Exception as e: + logger.error(f"生成配置失败: {e}") + return jsonify({ + 'success': False, + 'message': str(e) }) \ No newline at end of file diff --git a/views/user/__pycache__/user.cpython-38.pyc b/views/user/__pycache__/user.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..696375f953091c955323d386bbc8ad72b33ee8fb GIT binary patch literal 2249 zcmbVO-EUMy6rZ_Yd-v`~zkpH^Tt84&OG~QaM+ia1j|WrGLL{5&X1g=nz4YF_>&#rB zP5WSqga?Vn5EI|FBpQqlP1J<{z&~PMO`(18A811GoVhHtgePx1XFkr{Gv}Q7{m$L< zrBXoP`SFu)TmSlm{DYI7j|Y=e(Bv%;j4&FJTGrDVg}D`3v0bxcr{=_N&5ga97w2lZ z*su8%_t{ZC4r+lJJ5eDn){2yHo4LHiJeFhr4X0M-<%&jMDMw^;Fp#Y(Jvm8{a* zpwSHM&D6VsYf1PrvwEI!ikPA8|xs+kjsZ#BNLM+0d#Dz%2=UjG^j^x@&B^OKX zBz8LAwsJD^`j$HC4qhvnpX#3+2&uQoEnVEB7Iu~=*PMB<> z*Zs$afdlOWX!0u%id>|OE?8n%S-m1Mqm80BcvP@TD$BL!A zW$5K;m18-418n%rA0sNy{1Ze4EN|=($XPHg%5ci|sBy5~=&-ur^PyB+Ote)TH7RKO zj|Br4+6**#3WOp(aIOcAtg^R@RF0T$6GUFINqJs zr3+yrxsdYD!l`3ZZ*04%Y8`!B``w0=mlMIXkGbwNVqVSZT)QFLQ8=eP*=|g|d0czE z(@YrGUfN}Fr2@UvPAd_y&bRp$7S6-{RxN=**JXUWemPX_x@<)1HzE$g^7g0AcW2MT z8FKdcTgTpcefBh$3o7Z(e!;`Jq<%J?i^AraZrA~2)ZgngqD2|XnMKG@JbNkRm*p%v zXv9Q!QRiD>1dh~OojqV&4uc?cknW=nwH{c|92z`W+HcZ5aUx2ZjYyuHc=Xctu;dY- zweo7EC6c&efaYau>0Yq4`PZ)--`}}={ilufYxjEJY~B2MbA4T)W+|BLHs<+cJAqsk zvQ&5(#sor20LLLY5nQhJvIDgp@s7x&@);C46bQkpEuMo;yLi5=l`?e5wAloTn6zc$ zklAFk2%{`qE1gq)-J^3m7!+_@qfeM|07;JM!et4E5GNj z5rCzwf?k37y&}sSm^#Z&uLP%*SpYD7xC$_}S;4Fp0jf@KV2!B3Wt)}8pc=9AXf_+b z*(iB5AI#?S_NoOIhS<;}W({Y|5U^OeF?>36o_H%C^%B*4XJP_ z_MvJN1;#AKP#B;efRVPP5*gZElS=V2tkwz-xoCJ2pqaW#yAc=9;@em;c0eD#E?Kce z?%J3+W3YEAhcTW>)h_0 literal 0 HcmV?d00001 diff --git a/views/user/templates/login_and_register.html b/views/user/templates/login_and_register.html index 0102aa0..644f712 100644 --- a/views/user/templates/login_and_register.html +++ b/views/user/templates/login_and_register.html @@ -5,6 +5,7 @@ 微博舆情分析系统 | 登录 + +
+ 切换语言 +
+