Uploading the AI Crawler System: MindSpider

This commit is contained in:
戒酒的李白
2025-08-27 13:49:07 +08:00
parent 822bad557f
commit 587e709e82
174 changed files with 34562 additions and 25 deletions
@@ -0,0 +1,13 @@
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
# -*- coding: utf-8 -*-
from .core import TieBaCrawler
@@ -0,0 +1,385 @@
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
import asyncio
import json
from typing import Any, Callable, Dict, List, Optional, Union
from urllib.parse import urlencode
import httpx
from playwright.async_api import BrowserContext
from tenacity import RetryError, retry, stop_after_attempt, wait_fixed
import config
from base.base_crawler import AbstractApiClient
from model.m_baidu_tieba import TiebaComment, TiebaCreator, TiebaNote
from proxy.proxy_ip_pool import ProxyIpPool
from tools import utils
from .field import SearchNoteType, SearchSortType
from .help import TieBaExtractor
class BaiduTieBaClient(AbstractApiClient):
def __init__(
self,
timeout=10,
ip_pool=None,
default_ip_proxy=None,
):
self.ip_pool: Optional[ProxyIpPool] = ip_pool
self.timeout = timeout
self.headers = {
"User-Agent": utils.get_user_agent(),
"Cookies": "",
}
self._host = "https://tieba.baidu.com"
self._page_extractor = TieBaExtractor()
self.default_ip_proxy = default_ip_proxy
@retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
async def request(self, method, url, return_ori_content=False, proxy=None, **kwargs) -> Union[str, Any]:
"""
封装httpx的公共请求方法,对请求响应做一些处理
Args:
method: 请求方法
url: 请求的URL
return_ori_content: 是否返回原始内容
proxies: 代理IP
**kwargs: 其他请求参数,例如请求头、请求体等
Returns:
"""
actual_proxy = proxy if proxy else self.default_ip_proxy
async with httpx.AsyncClient(proxy=actual_proxy) as client:
response = await client.request(method, url, timeout=self.timeout, headers=self.headers, **kwargs)
if response.status_code != 200:
utils.logger.error(f"Request failed, method: {method}, url: {url}, status code: {response.status_code}")
utils.logger.error(f"Request failed, response: {response.text}")
raise Exception(f"Request failed, method: {method}, url: {url}, status code: {response.status_code}")
if response.text == "" or response.text == "blocked":
utils.logger.error(f"request params incrr, response.text: {response.text}")
raise Exception("account blocked")
if return_ori_content:
return response.text
return response.json()
async def get(self, uri: str, params=None, return_ori_content=False, **kwargs) -> Any:
"""
GET请求,对请求头签名
Args:
uri: 请求路由
params: 请求参数
return_ori_content: 是否返回原始内容
Returns:
"""
final_uri = uri
if isinstance(params, dict):
final_uri = (f"{uri}?"
f"{urlencode(params)}")
try:
res = await self.request(method="GET", url=f"{self._host}{final_uri}", return_ori_content=return_ori_content, **kwargs)
return res
except RetryError as e:
if self.ip_pool:
proxie_model = await self.ip_pool.get_proxy()
_, proxy = utils.format_proxy_info(proxie_model)
res = await self.request(method="GET", url=f"{self._host}{final_uri}", return_ori_content=return_ori_content, proxy=proxy, **kwargs)
self.default_ip_proxy = proxy
return res
utils.logger.error(f"[BaiduTieBaClient.get] 达到了最大重试次数,IP已经被Block,请尝试更换新的IP代理: {e}")
raise Exception(f"[BaiduTieBaClient.get] 达到了最大重试次数,IP已经被Block,请尝试更换新的IP代理: {e}")
async def post(self, uri: str, data: dict, **kwargs) -> Dict:
"""
POST请求,对请求头签名
Args:
uri: 请求路由
data: 请求体参数
Returns:
"""
json_str = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
return await self.request(method="POST", url=f"{self._host}{uri}", data=json_str, **kwargs)
async def pong(self) -> bool:
"""
用于检查登录态是否失效了
Returns:
"""
utils.logger.info("[BaiduTieBaClient.pong] Begin to pong tieba...")
try:
uri = "/mo/q/sync"
res: Dict = await self.get(uri)
utils.logger.info(f"[BaiduTieBaClient.pong] res: {res}")
if res and res.get("no") == 0:
ping_flag = True
else:
utils.logger.info(f"[BaiduTieBaClient.pong] user not login, will try to login again...")
ping_flag = False
except Exception as e:
utils.logger.error(f"[BaiduTieBaClient.pong] Ping tieba failed: {e}, and try to login again...")
ping_flag = False
return ping_flag
async def update_cookies(self, browser_context: BrowserContext):
"""
API客户端提供的更新cookies方法,一般情况下登录成功后会调用此方法
Args:
browser_context: 浏览器上下文对象
Returns:
"""
pass
async def get_notes_by_keyword(
self,
keyword: str,
page: int = 1,
page_size: int = 10,
sort: SearchSortType = SearchSortType.TIME_DESC,
note_type: SearchNoteType = SearchNoteType.FIXED_THREAD,
) -> List[TiebaNote]:
"""
根据关键词搜索贴吧帖子
Args:
keyword: 关键词
page: 分页第几页
page_size: 每页大小
sort: 结果排序方式
note_type: 帖子类型(主题贴|主题+回复混合模式)
Returns:
"""
uri = "/f/search/res"
params = {
"isnew": 1,
"qw": keyword,
"rn": page_size,
"pn": page,
"sm": sort.value,
"only_thread": note_type.value,
}
page_content = await self.get(uri, params=params, return_ori_content=True)
return self._page_extractor.extract_search_note_list(page_content)
async def get_note_by_id(self, note_id: str) -> TiebaNote:
"""
根据帖子ID获取帖子详情
Args:
note_id:
Returns:
"""
uri = f"/p/{note_id}"
page_content = await self.get(uri, return_ori_content=True)
return self._page_extractor.extract_note_detail(page_content)
async def get_note_all_comments(
self,
note_detail: TiebaNote,
crawl_interval: float = 1.0,
callback: Optional[Callable] = None,
max_count: int = 10,
) -> List[TiebaComment]:
"""
获取指定帖子下的所有一级评论,该方法会一直查找一个帖子下的所有评论信息
Args:
note_detail: 帖子详情对象
crawl_interval: 爬取一次笔记的延迟单位(秒)
callback: 一次笔记爬取结束后
max_count: 一次帖子爬取的最大评论数量
Returns:
"""
uri = f"/p/{note_detail.note_id}"
result: List[TiebaComment] = []
current_page = 1
while note_detail.total_replay_page >= current_page and len(result) < max_count:
params = {
"pn": current_page,
}
page_content = await self.get(uri, params=params, return_ori_content=True)
comments = self._page_extractor.extract_tieba_note_parment_comments(page_content, note_id=note_detail.note_id)
if not comments:
break
if len(result) + len(comments) > max_count:
comments = comments[:max_count - len(result)]
if callback:
await callback(note_detail.note_id, comments)
result.extend(comments)
# 获取所有子评论
await self.get_comments_all_sub_comments(comments, crawl_interval=crawl_interval, callback=callback)
await asyncio.sleep(crawl_interval)
current_page += 1
return result
async def get_comments_all_sub_comments(
self,
comments: List[TiebaComment],
crawl_interval: float = 1.0,
callback: Optional[Callable] = None,
) -> List[TiebaComment]:
"""
获取指定评论下的所有子评论
Args:
comments: 评论列表
crawl_interval: 爬取一次笔记的延迟单位(秒)
callback: 一次笔记爬取结束后
Returns:
"""
uri = "/p/comment"
if not config.ENABLE_GET_SUB_COMMENTS:
return []
# # 贴吧获取所有子评论需要登录态
# if self.headers.get("Cookies") == "" or not self.pong():
# raise Exception(f"[BaiduTieBaClient.pong] Cookies is empty, please login first...")
all_sub_comments: List[TiebaComment] = []
for parment_comment in comments:
if parment_comment.sub_comment_count == 0:
continue
current_page = 1
max_sub_page_num = parment_comment.sub_comment_count // 10 + 1
while max_sub_page_num >= current_page:
params = {
"tid": parment_comment.note_id, # 帖子ID
"pid": parment_comment.comment_id, # 父级评论ID
"fid": parment_comment.tieba_id, # 贴吧ID
"pn": current_page # 页码
}
page_content = await self.get(uri, params=params, return_ori_content=True)
sub_comments = self._page_extractor.extract_tieba_note_sub_comments(page_content, parent_comment=parment_comment)
if not sub_comments:
break
if callback:
await callback(parment_comment.note_id, sub_comments)
all_sub_comments.extend(sub_comments)
await asyncio.sleep(crawl_interval)
current_page += 1
return all_sub_comments
async def get_notes_by_tieba_name(self, tieba_name: str, page_num: int) -> List[TiebaNote]:
"""
根据贴吧名称获取帖子列表
Args:
tieba_name: 贴吧名称
page_num: 分页数量
Returns:
"""
uri = f"/f?kw={tieba_name}&pn={page_num}"
page_content = await self.get(uri, return_ori_content=True)
return self._page_extractor.extract_tieba_note_list(page_content)
async def get_creator_info_by_url(self, creator_url: str) -> str:
"""
根据创作者ID获取创作者信息
Args:
creator_url: 创作者主页URL
Returns:
"""
page_content = await self.request(method="GET", url=creator_url, return_ori_content=True)
return page_content
async def get_notes_by_creator(self, user_name: str, page_number: int) -> Dict:
"""
根据创作者获取创作者的所有帖子
Args:
user_name:
page_number:
Returns:
"""
uri = f"/home/get/getthread"
params = {
"un": user_name,
"pn": page_number,
"id": "utf-8",
"_": utils.get_current_timestamp(),
}
return await self.get(uri, params=params)
async def get_all_notes_by_creator_user_name(
self,
user_name: str,
crawl_interval: float = 1.0,
callback: Optional[Callable] = None,
max_note_count: int = 0,
creator_page_html_content: str = None,
) -> List[TiebaNote]:
"""
根据创作者用户名获取创作者所有帖子
Args:
user_name: 创作者用户名
crawl_interval: 爬取一次笔记的延迟单位(秒)
callback: 一次笔记爬取结束后的回调函数,是一个awaitable类型的函数
max_note_count: 帖子最大获取数量,如果为0则获取所有
creator_page_html_content: 创作者主页HTML内容
Returns:
"""
# 百度贴吧比较特殊一些,前10个帖子是直接展示在主页上的,要单独处理,通过API获取不到
result: List[TiebaNote] = []
if creator_page_html_content:
thread_id_list = (self._page_extractor.extract_tieba_thread_id_list_from_creator_page(creator_page_html_content))
utils.logger.info(f"[BaiduTieBaClient.get_all_notes_by_creator] got user_name:{user_name} thread_id_list len : {len(thread_id_list)}")
note_detail_task = [self.get_note_by_id(thread_id) for thread_id in thread_id_list]
notes = await asyncio.gather(*note_detail_task)
if callback:
await callback(notes)
result.extend(notes)
notes_has_more = 1
page_number = 1
page_per_count = 20
total_get_count = 0
while notes_has_more == 1 and (max_note_count == 0 or total_get_count < max_note_count):
notes_res = await self.get_notes_by_creator(user_name, page_number)
if not notes_res or notes_res.get("no") != 0:
utils.logger.error(f"[WeiboClient.get_notes_by_creator] got user_name:{user_name} notes failed, notes_res: {notes_res}")
break
notes_data = notes_res.get("data")
notes_has_more = notes_data.get("has_more")
notes = notes_data["thread_list"]
utils.logger.info(f"[WeiboClient.get_all_notes_by_creator] got user_name:{user_name} notes len : {len(notes)}")
note_detail_task = [self.get_note_by_id(note['thread_id']) for note in notes]
notes = await asyncio.gather(*note_detail_task)
if callback:
await callback(notes)
await asyncio.sleep(crawl_interval)
result.extend(notes)
page_number += 1
total_get_count += page_per_count
return result
@@ -0,0 +1,418 @@
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
import asyncio
import os
import random
from asyncio import Task
from typing import Dict, List, Optional, Tuple
from playwright.async_api import (
BrowserContext,
BrowserType,
Page,
Playwright,
async_playwright,
)
import config
from base.base_crawler import AbstractCrawler
from model.m_baidu_tieba import TiebaCreator, TiebaNote
from proxy.proxy_ip_pool import IpInfoModel, create_ip_pool
from store import tieba as tieba_store
from tools import utils
from tools.cdp_browser import CDPBrowserManager
from var import crawler_type_var, source_keyword_var
from .client import BaiduTieBaClient
from .field import SearchNoteType, SearchSortType
from .help import TieBaExtractor
from .login import BaiduTieBaLogin
class TieBaCrawler(AbstractCrawler):
context_page: Page
tieba_client: BaiduTieBaClient
browser_context: BrowserContext
cdp_manager: Optional[CDPBrowserManager]
def __init__(self) -> None:
self.index_url = "https://tieba.baidu.com"
self.user_agent = utils.get_user_agent()
self._page_extractor = TieBaExtractor()
self.cdp_manager = None
async def start(self) -> None:
"""
Start the crawler
Returns:
"""
ip_proxy_pool, httpx_proxy_format = None, None
if config.ENABLE_IP_PROXY:
utils.logger.info(
"[BaiduTieBaCrawler.start] Begin create ip proxy pool ..."
)
ip_proxy_pool = await create_ip_pool(
config.IP_PROXY_POOL_COUNT, enable_validate_ip=True
)
ip_proxy_info: IpInfoModel = await ip_proxy_pool.get_proxy()
_, httpx_proxy_format = utils.format_proxy_info(ip_proxy_info)
utils.logger.info(
f"[BaiduTieBaCrawler.start] Init default ip proxy, value: {httpx_proxy_format}"
)
# Create a client to interact with the baidutieba website.
self.tieba_client = BaiduTieBaClient(
ip_pool=ip_proxy_pool,
default_ip_proxy=httpx_proxy_format,
)
crawler_type_var.set(config.CRAWLER_TYPE)
if config.CRAWLER_TYPE == "search":
# Search for notes and retrieve their comment information.
await self.search()
await self.get_specified_tieba_notes()
elif config.CRAWLER_TYPE == "detail":
# Get the information and comments of the specified post
await self.get_specified_notes()
elif config.CRAWLER_TYPE == "creator":
# Get creator's information and their notes and comments
await self.get_creators_and_notes()
else:
pass
utils.logger.info("[BaiduTieBaCrawler.start] Tieba Crawler finished ...")
async def search(self) -> None:
"""
Search for notes and retrieve their comment information.
Returns:
"""
utils.logger.info(
"[BaiduTieBaCrawler.search] Begin search baidu tieba keywords"
)
tieba_limit_count = 10 # tieba limit page fixed value
if config.CRAWLER_MAX_NOTES_COUNT < tieba_limit_count:
config.CRAWLER_MAX_NOTES_COUNT = tieba_limit_count
start_page = config.START_PAGE
for keyword in config.KEYWORDS.split(","):
source_keyword_var.set(keyword)
utils.logger.info(
f"[BaiduTieBaCrawler.search] Current search keyword: {keyword}"
)
page = 1
while (
page - start_page + 1
) * tieba_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
if page < start_page:
utils.logger.info(f"[BaiduTieBaCrawler.search] Skip page {page}")
page += 1
continue
try:
utils.logger.info(
f"[BaiduTieBaCrawler.search] search tieba keyword: {keyword}, page: {page}"
)
notes_list: List[TiebaNote] = (
await self.tieba_client.get_notes_by_keyword(
keyword=keyword,
page=page,
page_size=tieba_limit_count,
sort=SearchSortType.TIME_DESC,
note_type=SearchNoteType.FIXED_THREAD,
)
)
if not notes_list:
utils.logger.info(
f"[BaiduTieBaCrawler.search] Search note list is empty"
)
break
utils.logger.info(
f"[BaiduTieBaCrawler.search] Note list len: {len(notes_list)}"
)
await self.get_specified_notes(
note_id_list=[note_detail.note_id for note_detail in notes_list]
)
page += 1
except Exception as ex:
utils.logger.error(
f"[BaiduTieBaCrawler.search] Search keywords error, current page: {page}, current keyword: {keyword}, err: {ex}"
)
break
async def get_specified_tieba_notes(self):
"""
Get the information and comments of the specified post by tieba name
Returns:
"""
tieba_limit_count = 50
if config.CRAWLER_MAX_NOTES_COUNT < tieba_limit_count:
config.CRAWLER_MAX_NOTES_COUNT = tieba_limit_count
for tieba_name in config.TIEBA_NAME_LIST:
utils.logger.info(
f"[BaiduTieBaCrawler.get_specified_tieba_notes] Begin get tieba name: {tieba_name}"
)
page_number = 0
while page_number <= config.CRAWLER_MAX_NOTES_COUNT:
note_list: List[TiebaNote] = (
await self.tieba_client.get_notes_by_tieba_name(
tieba_name=tieba_name, page_num=page_number
)
)
if not note_list:
utils.logger.info(
f"[BaiduTieBaCrawler.get_specified_tieba_notes] Get note list is empty"
)
break
utils.logger.info(
f"[BaiduTieBaCrawler.get_specified_tieba_notes] tieba name: {tieba_name} note list len: {len(note_list)}"
)
await self.get_specified_notes([note.note_id for note in note_list])
page_number += tieba_limit_count
async def get_specified_notes(
self, note_id_list: List[str] = config.TIEBA_SPECIFIED_ID_LIST
):
"""
Get the information and comments of the specified post
Args:
note_id_list:
Returns:
"""
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
task_list = [
self.get_note_detail_async_task(note_id=note_id, semaphore=semaphore)
for note_id in note_id_list
]
note_details = await asyncio.gather(*task_list)
note_details_model: List[TiebaNote] = []
for note_detail in note_details:
if note_detail is not None:
note_details_model.append(note_detail)
await tieba_store.update_tieba_note(note_detail)
await self.batch_get_note_comments(note_details_model)
async def get_note_detail_async_task(
self, note_id: str, semaphore: asyncio.Semaphore
) -> Optional[TiebaNote]:
"""
Get note detail
Args:
note_id: baidu tieba note id
semaphore: asyncio semaphore
Returns:
"""
async with semaphore:
try:
utils.logger.info(
f"[BaiduTieBaCrawler.get_note_detail] Begin get note detail, note_id: {note_id}"
)
note_detail: TiebaNote = await self.tieba_client.get_note_by_id(note_id)
if not note_detail:
utils.logger.error(
f"[BaiduTieBaCrawler.get_note_detail] Get note detail error, note_id: {note_id}"
)
return None
return note_detail
except Exception as ex:
utils.logger.error(
f"[BaiduTieBaCrawler.get_note_detail] Get note detail error: {ex}"
)
return None
except KeyError as ex:
utils.logger.error(
f"[BaiduTieBaCrawler.get_note_detail] have not fund note detail note_id:{note_id}, err: {ex}"
)
return None
async def batch_get_note_comments(self, note_detail_list: List[TiebaNote]):
"""
Batch get note comments
Args:
note_detail_list:
Returns:
"""
if not config.ENABLE_GET_COMMENTS:
return
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
task_list: List[Task] = []
for note_detail in note_detail_list:
task = asyncio.create_task(
self.get_comments_async_task(note_detail, semaphore),
name=note_detail.note_id,
)
task_list.append(task)
await asyncio.gather(*task_list)
async def get_comments_async_task(
self, note_detail: TiebaNote, semaphore: asyncio.Semaphore
):
"""
Get comments async task
Args:
note_detail:
semaphore:
Returns:
"""
async with semaphore:
utils.logger.info(
f"[BaiduTieBaCrawler.get_comments] Begin get note id comments {note_detail.note_id}"
)
await self.tieba_client.get_note_all_comments(
note_detail=note_detail,
crawl_interval=random.random(),
callback=tieba_store.batch_update_tieba_note_comments,
max_count=config.CRAWLER_MAX_COMMENTS_COUNT_SINGLENOTES,
)
async def get_creators_and_notes(self) -> None:
"""
Get creator's information and their notes and comments
Returns:
"""
utils.logger.info(
"[WeiboCrawler.get_creators_and_notes] Begin get weibo creators"
)
for creator_url in config.TIEBA_CREATOR_URL_LIST:
creator_page_html_content = await self.tieba_client.get_creator_info_by_url(
creator_url=creator_url
)
creator_info: TiebaCreator = self._page_extractor.extract_creator_info(
creator_page_html_content
)
if creator_info:
utils.logger.info(
f"[WeiboCrawler.get_creators_and_notes] creator info: {creator_info}"
)
if not creator_info:
raise Exception("Get creator info error")
await tieba_store.save_creator(user_info=creator_info)
# Get all note information of the creator
all_notes_list = (
await self.tieba_client.get_all_notes_by_creator_user_name(
user_name=creator_info.user_name,
crawl_interval=0,
callback=tieba_store.batch_update_tieba_notes,
max_note_count=config.CRAWLER_MAX_NOTES_COUNT,
creator_page_html_content=creator_page_html_content,
)
)
await self.batch_get_note_comments(all_notes_list)
else:
utils.logger.error(
f"[WeiboCrawler.get_creators_and_notes] get creator info error, creator_url:{creator_url}"
)
async def launch_browser(
self,
chromium: BrowserType,
playwright_proxy: Optional[Dict],
user_agent: Optional[str],
headless: bool = True,
) -> BrowserContext:
"""
Launch browser and create browser
Args:
chromium:
playwright_proxy:
user_agent:
headless:
Returns:
"""
utils.logger.info(
"[BaiduTieBaCrawler.launch_browser] Begin create browser context ..."
)
if config.SAVE_LOGIN_STATE:
# feat issue #14
# we will save login state to avoid login every time
user_data_dir = os.path.join(
os.getcwd(), "browser_data", config.USER_DATA_DIR % config.PLATFORM
) # type: ignore
browser_context = await chromium.launch_persistent_context(
user_data_dir=user_data_dir,
accept_downloads=True,
headless=headless,
proxy=playwright_proxy, # type: ignore
viewport={"width": 1920, "height": 1080},
user_agent=user_agent,
)
return browser_context
else:
browser = await chromium.launch(headless=headless, proxy=playwright_proxy) # type: ignore
browser_context = await browser.new_context(
viewport={"width": 1920, "height": 1080}, user_agent=user_agent
)
return browser_context
async def launch_browser_with_cdp(
self,
playwright: Playwright,
playwright_proxy: Optional[Dict],
user_agent: Optional[str],
headless: bool = True,
) -> BrowserContext:
"""
使用CDP模式启动浏览器
"""
try:
self.cdp_manager = CDPBrowserManager()
browser_context = await self.cdp_manager.launch_and_connect(
playwright=playwright,
playwright_proxy=playwright_proxy,
user_agent=user_agent,
headless=headless,
)
# 显示浏览器信息
browser_info = await self.cdp_manager.get_browser_info()
utils.logger.info(f"[TieBaCrawler] CDP浏览器信息: {browser_info}")
return browser_context
except Exception as e:
utils.logger.error(f"[TieBaCrawler] CDP模式启动失败,回退到标准模式: {e}")
# 回退到标准模式
chromium = playwright.chromium
return await self.launch_browser(
chromium, playwright_proxy, user_agent, headless
)
async def close(self):
"""
Close browser context
Returns:
"""
# 如果使用CDP模式,需要特殊处理
if self.cdp_manager:
await self.cdp_manager.cleanup()
self.cdp_manager = None
else:
await self.browser_context.close()
utils.logger.info("[BaiduTieBaCrawler.close] Browser context closed ...")
@@ -0,0 +1,29 @@
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
from enum import Enum
class SearchSortType(Enum):
"""search sort type"""
# 按时间倒序
TIME_DESC = "1"
# 按时间顺序
TIME_ASC = "0"
# 按相关性顺序
RELEVANCE_ORDER = "2"
class SearchNoteType(Enum):
# 只看主题贴
MAIN_THREAD = "1"
# 混合模式(帖子+回复)
FIXED_THREAD = "0"
@@ -0,0 +1,418 @@
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
# -*- coding: utf-8 -*-
import html
import json
import re
from typing import Dict, List, Tuple
from urllib.parse import parse_qs, unquote
from parsel import Selector
from constant import baidu_tieba as const
from model.m_baidu_tieba import TiebaComment, TiebaCreator, TiebaNote
from tools import utils
GENDER_MALE = "sex_male"
GENDER_FEMALE = "sex_female"
class TieBaExtractor:
def __init__(self):
pass
@staticmethod
def extract_search_note_list(page_content: str) -> List[TiebaNote]:
"""
提取贴吧帖子列表,这里提取的关键词搜索结果页的数据,还缺少帖子的回复数和回复页等数据
Args:
page_content: 页面内容的HTML字符串
Returns:
包含帖子信息的字典列表
"""
xpath_selector = "//div[@class='s_post']"
post_list = Selector(text=page_content).xpath(xpath_selector)
result: List[TiebaNote] = []
for post in post_list:
tieba_note = TiebaNote(note_id=post.xpath(".//span[@class='p_title']/a/@data-tid").get(default='').strip(),
title=post.xpath(".//span[@class='p_title']/a/text()").get(default='').strip(),
desc=post.xpath(".//div[@class='p_content']/text()").get(default='').strip(),
note_url=const.TIEBA_URL + post.xpath(".//span[@class='p_title']/a/@href").get(
default=''),
user_nickname=post.xpath(".//a[starts-with(@href, '/home/main')]/font/text()").get(
default='').strip(), user_link=const.TIEBA_URL + post.xpath(
".//a[starts-with(@href, '/home/main')]/@href").get(default=''),
tieba_name=post.xpath(".//a[@class='p_forum']/font/text()").get(default='').strip(),
tieba_link=const.TIEBA_URL + post.xpath(".//a[@class='p_forum']/@href").get(
default=''),
publish_time=post.xpath(".//font[@class='p_green p_date']/text()").get(
default='').strip(), )
result.append(tieba_note)
return result
def extract_tieba_note_list(self, page_content: str) -> List[TiebaNote]:
"""
提取贴吧帖子列表
Args:
page_content:
Returns:
"""
page_content = page_content.replace('<!--', "")
content_selector = Selector(text=page_content)
xpath_selector = "//ul[@id='thread_list']/li"
post_list = content_selector.xpath(xpath_selector)
result: List[TiebaNote] = []
for post_selector in post_list:
post_field_value: Dict = self.extract_data_field_value(post_selector)
if not post_field_value:
continue
note_id = str(post_field_value.get("id"))
tieba_note = TiebaNote(note_id=note_id,
title=post_selector.xpath(".//a[@class='j_th_tit ']/text()").get(default='').strip(),
desc=post_selector.xpath(
".//div[@class='threadlist_abs threadlist_abs_onlyline ']/text()").get(
default='').strip(), note_url=const.TIEBA_URL + f"/p/{note_id}",
user_link=const.TIEBA_URL + post_selector.xpath(
".//a[@class='frs-author-name j_user_card ']/@href").get(default='').strip(),
user_nickname=post_field_value.get("authoer_nickname") or post_field_value.get(
"author_name"),
tieba_name=content_selector.xpath("//a[@class='card_title_fname']/text()").get(
default='').strip(), tieba_link=const.TIEBA_URL + content_selector.xpath(
"//a[@class='card_title_fname']/@href").get(default=''),
total_replay_num=post_field_value.get("reply_num", 0))
result.append(tieba_note)
return result
def extract_note_detail(self, page_content: str) -> TiebaNote:
"""
提取贴吧帖子详情
Args:
page_content:
Returns:
"""
content_selector = Selector(text=page_content)
first_floor_selector = content_selector.xpath("//div[@class='p_postlist'][1]")
only_view_author_link = content_selector.xpath("//*[@id='lzonly_cntn']/@href").get(default='').strip()
note_id = only_view_author_link.split("?")[0].split("/")[-1]
# 帖子回复数、回复页数
thread_num_infos = content_selector.xpath(
"//div[@id='thread_theme_5']//li[@class='l_reply_num']//span[@class='red']")
# IP地理位置、发表时间
other_info_content = content_selector.xpath(".//div[@class='post-tail-wrap']").get(default="").strip()
ip_location, publish_time = self.extract_ip_and_pub_time(other_info_content)
note = TiebaNote(note_id=note_id, title=content_selector.xpath("//title/text()").get(default='').strip(),
desc=content_selector.xpath("//meta[@name='description']/@content").get(default='').strip(),
note_url=const.TIEBA_URL + f"/p/{note_id}",
user_link=const.TIEBA_URL + first_floor_selector.xpath(
".//a[@class='p_author_face ']/@href").get(default='').strip(),
user_nickname=first_floor_selector.xpath(
".//a[@class='p_author_name j_user_card']/text()").get(default='').strip(),
user_avatar=first_floor_selector.xpath(".//a[@class='p_author_face ']/img/@src").get(
default='').strip(),
tieba_name=content_selector.xpath("//a[@class='card_title_fname']/text()").get(
default='').strip(), tieba_link=const.TIEBA_URL + content_selector.xpath(
"//a[@class='card_title_fname']/@href").get(default=''), ip_location=ip_location,
publish_time=publish_time,
total_replay_num=thread_num_infos[0].xpath("./text()").get(default='').strip(),
total_replay_page=thread_num_infos[1].xpath("./text()").get(default='').strip(), )
note.title = note.title.replace(f"{note.tieba_name}】_百度贴吧", "")
return note
def extract_tieba_note_parment_comments(self, page_content: str, note_id: str) -> List[TiebaComment]:
"""
提取贴吧帖子一级评论
Args:
page_content:
note_id:
Returns:
"""
xpath_selector = "//div[@class='l_post l_post_bright j_l_post clearfix ']"
comment_list = Selector(text=page_content).xpath(xpath_selector)
result: List[TiebaComment] = []
for comment_selector in comment_list:
comment_field_value: Dict = self.extract_data_field_value(comment_selector)
if not comment_field_value:
continue
tieba_name = comment_selector.xpath("//a[@class='card_title_fname']/text()").get(default='').strip()
other_info_content = comment_selector.xpath(".//div[@class='post-tail-wrap']").get(default="").strip()
ip_location, publish_time = self.extract_ip_and_pub_time(other_info_content)
tieba_comment = TiebaComment(comment_id=str(comment_field_value.get("content").get("post_id")),
sub_comment_count=comment_field_value.get("content").get("comment_num"),
content=utils.extract_text_from_html(
comment_field_value.get("content").get("content")),
note_url=const.TIEBA_URL + f"/p/{note_id}",
user_link=const.TIEBA_URL + comment_selector.xpath(
".//a[@class='p_author_face ']/@href").get(default='').strip(),
user_nickname=comment_selector.xpath(
".//a[@class='p_author_name j_user_card']/text()").get(default='').strip(),
user_avatar=comment_selector.xpath(
".//a[@class='p_author_face ']/img/@src").get(default='').strip(),
tieba_id=str(comment_field_value.get("content").get("forum_id", "")),
tieba_name=tieba_name, tieba_link=f"https://tieba.baidu.com/f?kw={tieba_name}",
ip_location=ip_location, publish_time=publish_time, note_id=note_id, )
result.append(tieba_comment)
return result
def extract_tieba_note_sub_comments(self, page_content: str, parent_comment: TiebaComment) -> List[TiebaComment]:
"""
提取贴吧帖子二级评论
Args:
page_content:
parent_comment:
Returns:
"""
selector = Selector(page_content)
comments = []
comment_ele_list = selector.xpath("//li[@class='lzl_single_post j_lzl_s_p first_no_border']")
comment_ele_list.extend(selector.xpath("//li[@class='lzl_single_post j_lzl_s_p ']"))
for comment_ele in comment_ele_list:
comment_value = self.extract_data_field_value(comment_ele)
if not comment_value:
continue
comment_user_a_selector = comment_ele.xpath("./a[@class='j_user_card lzl_p_p']")[0]
content = utils.extract_text_from_html(
comment_ele.xpath(".//span[@class='lzl_content_main']").get(default=""))
comment = TiebaComment(
comment_id=str(comment_value.get("spid")), content=content,
user_link=comment_user_a_selector.xpath("./@href").get(default=""),
user_nickname=comment_value.get("showname"),
user_avatar=comment_user_a_selector.xpath("./img/@src").get(default=""),
publish_time=comment_ele.xpath(".//span[@class='lzl_time']/text()").get(default="").strip(),
parent_comment_id=parent_comment.comment_id,
note_id=parent_comment.note_id, note_url=parent_comment.note_url,
tieba_id=parent_comment.tieba_id, tieba_name=parent_comment.tieba_name,
tieba_link=parent_comment.tieba_link)
comments.append(comment)
return comments
def extract_creator_info(self, html_content: str) -> TiebaCreator:
"""
提取贴吧创作者信息
Args:
html_content:
Returns:
"""
selector = Selector(text=html_content)
user_link_selector = selector.xpath("//p[@class='space']/a")
user_link: str = user_link_selector.xpath("./@href").get(default='')
user_link_params: Dict = parse_qs(unquote(user_link.split("?")[-1]))
user_name = user_link_params.get("un")[0] if user_link_params.get("un") else ""
user_id = user_link_params.get("id")[0] if user_link_params.get("id") else ""
userinfo_userdata_selector = selector.xpath("//div[@class='userinfo_userdata']")
follow_fans_selector = selector.xpath("//span[@class='concern_num']")
follows, fans = 0, 0
if len(follow_fans_selector) == 2:
follows, fans = self.extract_follow_and_fans(follow_fans_selector)
user_content = userinfo_userdata_selector.get(default='')
return TiebaCreator(user_id=user_id, user_name=user_name,
nickname=selector.xpath(".//span[@class='userinfo_username ']/text()").get(
default='').strip(),
avatar=selector.xpath(".//div[@class='userinfo_left_head']//img/@src").get(
default='').strip(),
gender=self.extract_gender(user_content),
ip_location=self.extract_ip(user_content),
follows=follows,
fans=fans,
registration_duration=self.extract_registration_duration(user_content)
)
@staticmethod
def extract_tieba_thread_id_list_from_creator_page(
html_content: str
) -> List[str]:
"""
提取贴吧创作者主页的帖子列表
Args:
html_content:
Returns:
"""
selector = Selector(text=html_content)
thread_id_list = []
xpath_selector = (
"//ul[@class='new_list clearfix']//div[@class='thread_name']/a[1]/@href"
)
thread_url_list = selector.xpath(xpath_selector).getall()
for thread_url in thread_url_list:
thread_id = thread_url.split("?")[0].split("/")[-1]
thread_id_list.append(thread_id)
return thread_id_list
def extract_ip_and_pub_time(self, html_content: str) -> Tuple[str, str]:
"""
提取IP位置和发布时间
Args:
html_content:
Returns:
"""
pattern_pub_time = re.compile(r'<span class="tail-info">(\d{4}-\d{2}-\d{2} \d{2}:\d{2})</span>')
time_match = pattern_pub_time.search(html_content)
pub_time = time_match.group(1) if time_match else ""
return self.extract_ip(html_content), pub_time
@staticmethod
def extract_ip(html_content: str) -> str:
"""
提取IP
Args:
html_content:
Returns:
"""
pattern_ip = re.compile(r'IP属地:(\S+)</span>')
ip_match = pattern_ip.search(html_content)
ip = ip_match.group(1) if ip_match else ""
return ip
@staticmethod
def extract_gender(html_content: str) -> str:
"""
提取性别
Args:
html_content:
Returns:
"""
if GENDER_MALE in html_content:
return ''
elif GENDER_FEMALE in html_content:
return ''
return '未知'
@staticmethod
def extract_follow_and_fans(selectors: List[Selector]) -> Tuple[str, str]:
"""
提取关注数和粉丝数
Args:
selectors:
Returns:
"""
pattern = re.compile(r'<span class="concern_num">\(<a[^>]*>(\d+)</a>\)</span>')
follow_match = pattern.findall(selectors[0].get())
fans_match = pattern.findall(selectors[1].get())
follows = follow_match[0] if follow_match else 0
fans = fans_match[0] if fans_match else 0
return follows, fans
@staticmethod
def extract_registration_duration(html_content: str) -> str:
"""
"<span>吧龄:1.9年</span>"
Returns: 1.9年
"""
pattern = re.compile(r'<span>吧龄:(\S+)</span>')
match = pattern.search(html_content)
return match.group(1) if match else ""
@staticmethod
def extract_data_field_value(selector: Selector) -> Dict:
"""
提取data-field的值
Args:
selector:
Returns:
"""
data_field_value = selector.xpath("./@data-field").get(default='').strip()
if not data_field_value or data_field_value == "{}":
return {}
try:
# 先使用 html.unescape 处理转义字符 再json.loads 将 JSON 字符串转换为 Python 字典
unescaped_json_str = html.unescape(data_field_value)
data_field_dict_value = json.loads(unescaped_json_str)
except Exception as ex:
print(f"extract_data_field_value,错误信息:{ex}, 尝试使用其他方式解析")
data_field_dict_value = {}
return data_field_dict_value
def test_extract_search_note_list():
with open("test_data/search_keyword_notes.html", "r", encoding="utf-8") as f:
content = f.read()
extractor = TieBaExtractor()
result = extractor.extract_search_note_list(content)
print(result)
def test_extract_note_detail():
with open("test_data/note_detail.html", "r", encoding="utf-8") as f:
content = f.read()
extractor = TieBaExtractor()
result = extractor.extract_note_detail(content)
print(result.model_dump())
def test_extract_tieba_note_parment_comments():
with open("test_data/note_comments.html", "r", encoding="utf-8") as f:
content = f.read()
extractor = TieBaExtractor()
result = extractor.extract_tieba_note_parment_comments(content, "123456")
print(result)
def test_extract_tieba_note_sub_comments():
with open("test_data/note_sub_comments.html", "r", encoding="utf-8") as f:
content = f.read()
extractor = TieBaExtractor()
fake_parment_comment = TiebaComment(comment_id="123456", content="content", user_link="user_link",
user_nickname="user_nickname", user_avatar="user_avatar",
publish_time="publish_time", parent_comment_id="parent_comment_id",
note_id="note_id", note_url="note_url", tieba_id="tieba_id",
tieba_name="tieba_name", )
result = extractor.extract_tieba_note_sub_comments(content, fake_parment_comment)
print(result)
def test_extract_tieba_note_list():
with open("test_data/tieba_note_list.html", "r", encoding="utf-8") as f:
content = f.read()
extractor = TieBaExtractor()
result = extractor.extract_tieba_note_list(content)
print(result)
pass
def test_extract_creator_info():
with open("test_data/creator_info.html", "r", encoding="utf-8") as f:
content = f.read()
extractor = TieBaExtractor()
result = extractor.extract_creator_info(content)
print(result.model_dump_json())
if __name__ == '__main__':
# test_extract_search_note_list()
# test_extract_note_detail()
# test_extract_tieba_note_parment_comments()
# test_extract_tieba_note_list()
test_extract_creator_info()
@@ -0,0 +1,123 @@
# 声明:本代码仅供学习和研究目的使用。使用者应遵守以下原则:
# 1. 不得用于任何商业用途。
# 2. 使用时应遵守目标平台的使用条款和robots.txt规则。
# 3. 不得进行大规模爬取或对平台造成运营干扰。
# 4. 应合理控制请求频率,避免给目标平台带来不必要的负担。
# 5. 不得用于任何非法或不当的用途。
#
# 详细许可条款请参阅项目根目录下的LICENSE文件。
# 使用本代码即表示您同意遵守上述原则和LICENSE中的所有条款。
import asyncio
import functools
import sys
from typing import Optional
from playwright.async_api import BrowserContext, Page
from tenacity import (RetryError, retry, retry_if_result, stop_after_attempt,
wait_fixed)
import config
from base.base_crawler import AbstractLogin
from tools import utils
class BaiduTieBaLogin(AbstractLogin):
def __init__(self,
login_type: str,
browser_context: BrowserContext,
context_page: Page,
login_phone: Optional[str] = "",
cookie_str: str = ""
):
config.LOGIN_TYPE = login_type
self.browser_context = browser_context
self.context_page = context_page
self.login_phone = login_phone
self.cookie_str = cookie_str
@retry(stop=stop_after_attempt(600), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
async def check_login_state(self) -> bool:
"""
轮训检查登录状态是否成功,成功返回True否则返回False
Returns:
"""
current_cookie = await self.browser_context.cookies()
_, cookie_dict = utils.convert_cookies(current_cookie)
stoken = cookie_dict.get("STOKEN")
ptoken = cookie_dict.get("PTOKEN")
if stoken or ptoken:
return True
return False
async def begin(self):
"""Start login baidutieba"""
utils.logger.info("[BaiduTieBaLogin.begin] Begin login baidutieba ...")
if config.LOGIN_TYPE == "qrcode":
await self.login_by_qrcode()
elif config.LOGIN_TYPE == "phone":
await self.login_by_mobile()
elif config.LOGIN_TYPE == "cookie":
await self.login_by_cookies()
else:
raise ValueError("[BaiduTieBaLogin.begin]Invalid Login Type Currently only supported qrcode or phone or cookies ...")
async def login_by_mobile(self):
"""Login baidutieba by mobile"""
pass
async def login_by_qrcode(self):
"""login baidutieba website and keep webdriver login state"""
utils.logger.info("[BaiduTieBaLogin.login_by_qrcode] Begin login baidutieba by qrcode ...")
qrcode_img_selector = "xpath=//img[@class='tang-pass-qrcode-img']"
# find login qrcode
base64_qrcode_img = await utils.find_login_qrcode(
self.context_page,
selector=qrcode_img_selector
)
if not base64_qrcode_img:
utils.logger.info("[BaiduTieBaLogin.login_by_qrcode] login failed , have not found qrcode please check ....")
# if this website does not automatically popup login dialog box, we will manual click login button
await asyncio.sleep(0.5)
login_button_ele = self.context_page.locator("xpath=//li[@class='u_login']")
await login_button_ele.click()
base64_qrcode_img = await utils.find_login_qrcode(
self.context_page,
selector=qrcode_img_selector
)
if not base64_qrcode_img:
utils.logger.info("[BaiduTieBaLogin.login_by_qrcode] login failed , have not found qrcode please check ....")
sys.exit()
# show login qrcode
# fix issue #12
# we need to use partial function to call show_qrcode function and run in executor
# then current asyncio event loop will not be blocked
partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)
utils.logger.info(f"[BaiduTieBaLogin.login_by_qrcode] waiting for scan code login, remaining time is 120s")
try:
await self.check_login_state()
except RetryError:
utils.logger.info("[BaiduTieBaLogin.login_by_qrcode] Login baidutieba failed by qrcode login method ...")
sys.exit()
wait_redirect_seconds = 5
utils.logger.info(f"[BaiduTieBaLogin.login_by_qrcode] Login successful then wait for {wait_redirect_seconds} seconds redirect ...")
await asyncio.sleep(wait_redirect_seconds)
async def login_by_cookies(self):
"""login baidutieba website by cookies"""
utils.logger.info("[BaiduTieBaLogin.login_by_cookies] Begin login baidutieba by cookie ...")
for key, value in utils.convert_str_cookie_to_dict(self.cookie_str).items():
await self.browser_context.add_cookies([{
'name': key,
'value': value,
'domain': ".baidu.com",
'path': "/"
}])
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,189 @@
<li class="lzl_single_post j_lzl_s_p first_no_border" data-field='{&quot;spid&quot;:150726504693,&quot;showname&quot;:&quot;heinzfrentzen&quot;,&quot;user_name&quot;:&quot;heinzfrentzen&quot;,&quot;portrait&quot;:&quot;tb.1.b08d8f12.IR-tbLlZ2GkD6ARA-mfGOA&quot;}'>
<a rel="noopener" name="150726504693"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;heinzfrentzen&quot;,&quot;id&quot;:&quot;tb.1.b08d8f12.IR-tbLlZ2GkD6ARA-mfGOA&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.b08d8f12.IR-tbLlZ2GkD6ARA-mfGOA&fr=pb" username="heinzfrentzen">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.b08d8f12.IR-tbLlZ2GkD6ARA-mfGOA"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:[],&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;heinzfrentzen&quot;,&quot;id&quot;:&quot;tb.1.b08d8f12.IR-tbLlZ2GkD6ARA-mfGOA&quot;}' href="/home/main?id=tb.1.b08d8f12.IR-tbLlZ2GkD6ARA-mfGOA&ie=utf-8&fr=pb" target="_blank" username="heinzfrentzen">heinzfrentzen</a>
:
<span class="lzl_content_main" data-username="">
<img class="BDE_Smiley" width="30" height="30" changedsize="false" src="https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon25.png">
<img class="BDE_Smiley" width="30" height="30" changedsize="false" src="https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon25.png">
</span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:11</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_single_post j_lzl_s_p " data-field='{&quot;spid&quot;:150726506822,&quot;showname&quot;:&quot;\u53ef\u7231\u7684\u642c\u8fd0\u5de594&quot;,&quot;user_name&quot;:&quot;\u53ef\u7231\u7684\u642c\u8fd0\u5de594&quot;,&quot;portrait&quot;:&quot;tb.1.f1b47a84.Rixjf6fMP-PfH8fnS1CgRA&quot;}'>
<a rel="noopener" name="150726506822"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;\u53ef\u7231\u7684\u642c\u8fd0\u5de594&quot;,&quot;id&quot;:&quot;tb.1.f1b47a84.Rixjf6fMP-PfH8fnS1CgRA&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.f1b47a84.Rixjf6fMP-PfH8fnS1CgRA&fr=pb" username="可爱的搬运工94">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.f1b47a84.Rixjf6fMP-PfH8fnS1CgRA"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:[],&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;\u53ef\u7231\u7684\u642c\u8fd0\u5de594&quot;,&quot;id&quot;:&quot;tb.1.f1b47a84.Rixjf6fMP-PfH8fnS1CgRA&quot;}' href="/home/main?id=tb.1.f1b47a84.Rixjf6fMP-PfH8fnS1CgRA&ie=utf-8&fr=pb" target="_blank" username="可爱的搬运工94">可爱的搬运工94</a>
:<span class="lzl_content_main" data-username="">陈芋汐水花也不小 </span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:12</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_single_post j_lzl_s_p " data-field='{&quot;spid&quot;:150726508024,&quot;showname&quot;:&quot;\u56fd\u9645\u4f53\u575b\u5de8\u661f\u9752\u6912\u8089\u4e1d&quot;,&quot;user_name&quot;:&quot;\u8682\u8681\u96c5\u864e\u54c8\u54c8&quot;,&quot;portrait&quot;:&quot;tb.1.c5c485ab.Cf5aDgd1NxLxZlej8r4LWg&quot;}'>
<a rel="noopener" name="150726508024"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;\u8682\u8681\u96c5\u864e\u54c8\u54c8&quot;,&quot;id&quot;:&quot;tb.1.c5c485ab.Cf5aDgd1NxLxZlej8r4LWg&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.c5c485ab.Cf5aDgd1NxLxZlej8r4LWg&fr=pb" username="蚂蚁雅虎哈哈">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.c5c485ab.Cf5aDgd1NxLxZlej8r4LWg"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:[],&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;\u8682\u8681\u96c5\u864e\u54c8\u54c8&quot;,&quot;id&quot;:&quot;tb.1.c5c485ab.Cf5aDgd1NxLxZlej8r4LWg&quot;}' href="/home/main?id=tb.1.c5c485ab.Cf5aDgd1NxLxZlej8r4LWg&ie=utf-8&fr=pb" target="_blank" username="蚂蚁雅虎哈哈">国际体坛巨星青椒肉丝</a>
:<span class="lzl_content_main" data-username="">你怀孕了吗 老是呕吐 </span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:12</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_single_post j_lzl_s_p " data-field='{&quot;spid&quot;:150726509762,&quot;showname&quot;:&quot;\u8317\u82b1\u5c11\u5e05&quot;,&quot;user_name&quot;:&quot;\u8317\u82b1\u5c11\u5e05&quot;,&quot;portrait&quot;:&quot;tb.1.a0b6ca3c.54TCKizU2c9oSYWqNF7NqA&quot;}'>
<a rel="noopener" name="150726509762"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;\u8317\u82b1\u5c11\u5e05&quot;,&quot;id&quot;:&quot;tb.1.a0b6ca3c.54TCKizU2c9oSYWqNF7NqA&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.a0b6ca3c.54TCKizU2c9oSYWqNF7NqA&fr=pb" username="茗花少帅">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.a0b6ca3c.54TCKizU2c9oSYWqNF7NqA"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:{&quot;all_level&quot;:{&quot;2&quot;:{&quot;end_time&quot;:&quot;1421248220&quot;,&quot;level&quot;:2,&quot;pic_url&quot;:&quot;http:\/\/imgsrc.baidu.com\/forum\/pic\/item\/6afa80cb39dbb6fdf9de234d0b24ab18962b37f0.jpg&quot;,&quot;score_limit&quot;:8000}},&quot;level&quot;:{&quot;end_time&quot;:&quot;1421248220&quot;,&quot;pic_url&quot;:&quot;http:\/\/imgsrc.baidu.com\/forum\/pic\/item\/6afa80cb39dbb6fdf9de234d0b24ab18962b37f0.jpg&quot;,&quot;props_id&quot;:2}},&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;\u8317\u82b1\u5c11\u5e05&quot;,&quot;id&quot;:&quot;tb.1.a0b6ca3c.54TCKizU2c9oSYWqNF7NqA&quot;}' href="/home/main?id=tb.1.a0b6ca3c.54TCKizU2c9oSYWqNF7NqA&ie=utf-8&fr=pb" target="_blank" username="茗花少帅">茗花少帅</a>
:<span class="lzl_content_main" data-username="">你就只看水花不看空中姿态吗 </span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:12</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_single_post j_lzl_s_p " data-field='{&quot;spid&quot;:150726510645,&quot;showname&quot;:&quot;\u4e1c\u534e\u6b66\u5170&quot;,&quot;user_name&quot;:&quot;\u897f\u5b89\u4ea4\u5927\u524d\u4e00\u767e&quot;,&quot;portrait&quot;:&quot;tb.1.774316af.RqsfwTN2w3AJQFmXAO_MHw&quot;}'>
<a rel="noopener" name="150726510645"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;\u897f\u5b89\u4ea4\u5927\u524d\u4e00\u767e&quot;,&quot;id&quot;:&quot;tb.1.774316af.RqsfwTN2w3AJQFmXAO_MHw&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.774316af.RqsfwTN2w3AJQFmXAO_MHw&fr=pb" username="西安交大前一百">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.774316af.RqsfwTN2w3AJQFmXAO_MHw"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:{&quot;all_level&quot;:{&quot;2&quot;:{&quot;end_time&quot;:&quot;1644033630&quot;,&quot;level&quot;:2,&quot;pic_url&quot;:&quot;http:\/\/imgsrc.baidu.com\/forum\/pic\/item\/6afa80cb39dbb6fdf9de234d0b24ab18962b37f0.jpg&quot;,&quot;score_limit&quot;:8000}},&quot;level&quot;:{&quot;end_time&quot;:&quot;1644033630&quot;,&quot;pic_url&quot;:&quot;http:\/\/imgsrc.baidu.com\/forum\/pic\/item\/6afa80cb39dbb6fdf9de234d0b24ab18962b37f0.jpg&quot;,&quot;props_id&quot;:2}},&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;\u897f\u5b89\u4ea4\u5927\u524d\u4e00\u767e&quot;,&quot;id&quot;:&quot;tb.1.774316af.RqsfwTN2w3AJQFmXAO_MHw&quot;}' href="/home/main?id=tb.1.774316af.RqsfwTN2w3AJQFmXAO_MHw&ie=utf-8&fr=pb" target="_blank" username="西安交大前一百">东华武兰</a>
:<span class="lzl_content_main" data-username="">经典只看水花 </span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:12</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_single_post j_lzl_s_p " data-field='{&quot;spid&quot;:150726514057,&quot;showname&quot;:&quot;\u4e0a\u4e0b\u73ed\u8981\u6ce8\u610f&quot;,&quot;user_name&quot;:&quot;\u4e0a\u4e0b\u73ed\u8981\u6ce8\u610f&quot;,&quot;portrait&quot;:&quot;tb.1.bcab9641.aHxSViAprkm6E0KQWrw3pg&quot;}'>
<a rel="noopener" name="150726514057"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;\u4e0a\u4e0b\u73ed\u8981\u6ce8\u610f&quot;,&quot;id&quot;:&quot;tb.1.bcab9641.aHxSViAprkm6E0KQWrw3pg&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.bcab9641.aHxSViAprkm6E0KQWrw3pg&fr=pb" username="上下班要注意">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.bcab9641.aHxSViAprkm6E0KQWrw3pg"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:[],&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;\u4e0a\u4e0b\u73ed\u8981\u6ce8\u610f&quot;,&quot;id&quot;:&quot;tb.1.bcab9641.aHxSViAprkm6E0KQWrw3pg&quot;}' href="/home/main?id=tb.1.bcab9641.aHxSViAprkm6E0KQWrw3pg&ie=utf-8&fr=pb" target="_blank" username="上下班要注意">上下班要注意</a>
:<span class="lzl_content_main" data-username="">分数正常吧 </span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:13</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_single_post j_lzl_s_p " data-field='{&quot;spid&quot;:150726520372,&quot;showname&quot;:&quot;\u9759\u770b\u8682\u8681\u4e0a\u6811&quot;,&quot;user_name&quot;:&quot;\u9759\u770b\u8682\u8681\u4e0a\u6811&quot;,&quot;portrait&quot;:&quot;tb.1.7ea539b2.dHz6uxKdbItmtGkwZeV6oQ&quot;}'>
<a rel="noopener" name="150726520372"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;\u9759\u770b\u8682\u8681\u4e0a\u6811&quot;,&quot;id&quot;:&quot;tb.1.7ea539b2.dHz6uxKdbItmtGkwZeV6oQ&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.7ea539b2.dHz6uxKdbItmtGkwZeV6oQ&fr=pb" username="静看蚂蚁上树">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.7ea539b2.dHz6uxKdbItmtGkwZeV6oQ"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:[],&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;\u9759\u770b\u8682\u8681\u4e0a\u6811&quot;,&quot;id&quot;:&quot;tb.1.7ea539b2.dHz6uxKdbItmtGkwZeV6oQ&quot;}' href="/home/main?id=tb.1.7ea539b2.dHz6uxKdbItmtGkwZeV6oQ&ie=utf-8&fr=pb" target="_blank" username="静看蚂蚁上树">静看蚂蚁上树</a>
:
<span class="lzl_content_main" data-username="">
回复 <a href="http://tieba.baidu.com/i/sys/jump?un= " onclick="Stats.sendRequest('fr=tb0_forum&st_mod=pb&st_value=atlink');" onmouseover="showattip(this)" onmouseout="hideattip(this)" username=" " portrait="tb.1.c5c485ab.Cf5aDgd1NxLxZlej8r4LWg" target="_blank" class="at">国际体坛巨星青椒肉丝</a>
:吃酸黄瓜吃多了<img class="BDE_Smiley" width="30" height="30" changedsize="false" src="https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon22.png">
<img class="BDE_Smiley" width="30" height="30" changedsize="false" src="https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon22.png">
<img class="BDE_Smiley" width="30" height="30" changedsize="false" src="https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon22.png">
</span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:14</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_single_post j_lzl_s_p " data-field='{&quot;spid&quot;:150726524963,&quot;showname&quot;:&quot;\u4e0d\u61c2\u53d6\u5565\u540d\u5b57\ud83d\ude1c&quot;,&quot;user_name&quot;:&quot;\u9ec4\u5c0f\u6e2forz&quot;,&quot;portrait&quot;:&quot;tb.1.e74fa44d.lLp46IIhj8NhhHk12z_qRA&quot;}'>
<a rel="noopener" name="150726524963"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;\u9ec4\u5c0f\u6e2forz&quot;,&quot;id&quot;:&quot;tb.1.e74fa44d.lLp46IIhj8NhhHk12z_qRA&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.e74fa44d.lLp46IIhj8NhhHk12z_qRA&fr=pb" username="黄小港orz">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.e74fa44d.lLp46IIhj8NhhHk12z_qRA"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:[],&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;\u9ec4\u5c0f\u6e2forz&quot;,&quot;id&quot;:&quot;tb.1.e74fa44d.lLp46IIhj8NhhHk12z_qRA&quot;}' href="/home/main?id=tb.1.e74fa44d.lLp46IIhj8NhhHk12z_qRA&ie=utf-8&fr=pb" target="_blank" username="黄小港orz">不懂取啥名字😜</a>
:
<span class="lzl_content_main" data-username="">
请你去跟国际泳联投诉<img class="BDE_Smiley" width="30" height="30" changedsize="false" src="https://gsp0.baidu.com/5aAHeD3nKhI2p27j8IqW0jdnxx1xbK/tb/editor/images/client/image_emoticon22.png">
</span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:15</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_single_post j_lzl_s_p " data-field='{&quot;spid&quot;:150726535666,&quot;showname&quot;:&quot;\ud83d\udcab\u6cfd\u8d6b\u62c9\ud83d\udcaf&quot;,&quot;user_name&quot;:&quot;\u5feb\u770b\u5361\u5361\u5361\u5361&quot;,&quot;portrait&quot;:&quot;tb.1.5f510507.B4GLS91flqmWc5QXoaRCoQ&quot;}'>
<a rel="noopener" name="150726535666"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;\u5feb\u770b\u5361\u5361\u5361\u5361&quot;,&quot;id&quot;:&quot;tb.1.5f510507.B4GLS91flqmWc5QXoaRCoQ&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.5f510507.B4GLS91flqmWc5QXoaRCoQ&fr=pb" username="快看卡卡卡卡">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.5f510507.B4GLS91flqmWc5QXoaRCoQ"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:{&quot;all_level&quot;:{&quot;2&quot;:{&quot;end_time&quot;:&quot;1539783937&quot;,&quot;level&quot;:2,&quot;pic_url&quot;:&quot;http:\/\/imgsrc.baidu.com\/forum\/pic\/item\/6afa80cb39dbb6fdf9de234d0b24ab18962b37f0.jpg&quot;,&quot;score_limit&quot;:8000}},&quot;level&quot;:{&quot;end_time&quot;:&quot;1539783937&quot;,&quot;pic_url&quot;:&quot;http:\/\/imgsrc.baidu.com\/forum\/pic\/item\/6afa80cb39dbb6fdf9de234d0b24ab18962b37f0.jpg&quot;,&quot;props_id&quot;:2}},&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;\u5feb\u770b\u5361\u5361\u5361\u5361&quot;,&quot;id&quot;:&quot;tb.1.5f510507.B4GLS91flqmWc5QXoaRCoQ&quot;}' href="/home/main?id=tb.1.5f510507.B4GLS91flqmWc5QXoaRCoQ&ie=utf-8&fr=pb" target="_blank" username="快看卡卡卡卡">💫泽赫拉💯</a>
:<span class="lzl_content_main" data-username="">第五跳陈空中分腿了空中姿态明显全红婵更好 </span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:17</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_single_post j_lzl_s_p " data-field='{&quot;spid&quot;:150726536076,&quot;showname&quot;:&quot;\u55ef\u55ef\u54e6\u54e6\u554a\u554a\ud83d\udc36&quot;,&quot;user_name&quot;:&quot;\u55ef\u55ef\u54e6\u54e6\u554a\u554a\u54fc&quot;,&quot;portrait&quot;:&quot;tb.1.ba071e03._M1o8S5FX4p57pZBJa91CQ&quot;}'>
<a rel="noopener" name="150726536076"></a>
<a rel="noopener" data-field='{&quot;un&quot;:&quot;\u55ef\u55ef\u54e6\u54e6\u554a\u554a\u54fc&quot;,&quot;id&quot;:&quot;tb.1.ba071e03._M1o8S5FX4p57pZBJa91CQ&quot;}' target="_blank" class="j_user_card lzl_p_p" href="/home/main?id=tb.1.ba071e03._M1o8S5FX4p57pZBJa91CQ&fr=pb" username="嗯嗯哦哦啊啊哼">
<img src="https://gss0.bdstatic.com/6LZ1dD3d1sgCo2Kml5_Y_D3/sys/portrait/item/tb.1.ba071e03._M1o8S5FX4p57pZBJa91CQ"/>
</a>
<div class="lzl_cnt" data-field='{&quot;iconArr&quot;:null,&quot;free_flag&quot;:null}'>
<a rel="noopener" class="at j_user_card " data-field='{&quot;un&quot;:&quot;\u55ef\u55ef\u54e6\u54e6\u554a\u554a\u54fc&quot;,&quot;id&quot;:&quot;tb.1.ba071e03._M1o8S5FX4p57pZBJa91CQ&quot;}' href="/home/main?id=tb.1.ba071e03._M1o8S5FX4p57pZBJa91CQ&ie=utf-8&fr=pb" target="_blank" username="嗯嗯哦哦啊啊哼">嗯嗯哦哦啊啊🐶</a>
:
<span class="lzl_content_main" data-username="">
回复 <a href="http://tieba.baidu.com/i/sys/jump?un= " onclick="Stats.sendRequest('fr=tb0_forum&st_mod=pb&st_value=atlink');" onmouseover="showattip(this)" onmouseout="hideattip(this)" username=" " portrait="tb.1.84497425.b5GLK5lGm90mTB2BhjrgpA" target="_blank" class="at">美味蟹黄堡💞</a>
:你不会看起跳高度和空中姿态
</span>
<div class="lzl_content_reply">
<span class="lzl_jb" style="display:none;"></span>
<span class="lzl_op_list j_lzl_o_l" style="display:none;"></span>
<span class="lzl_time">2024-8-6 22:17</span>
<a rel="noopener" href="#" class="lzl_s_r">回复</a>
</div>
</div>
</li>
<li class="lzl_li_pager j_lzl_l_p lzl_li_pager_s" data-field='{&quot;total_num&quot;:16,&quot;total_page&quot;:2}'>
<a rel="noopener" class="j_lzl_p btn-sub btn-small pull-right" href="##">
<i class="icon-reply"></i>
我也说一句
</a>
<p class="j_pager l_pager pager_theme_2">
<span class="tP">1</span>
<a href="#2">2</a>
<a href="#2">下一页</a>
<a href="#2">尾页</a>
</p>
</li>
@@ -0,0 +1,96 @@
<div class="s_post_list">
<div class="s_post"><span class="p_title"><a data-tid="9117888152" data-fid="26976424" class="bluelink"
href="/p/9117888152?pid=150718967291&amp;cid=0#150718967291"
target="_blank">武汉交互空间科技富士康10亿加码中国大陆印度为何逐渐失宠</a></span>
<div class="p_content">
全球知名的电子制造服务巨头富士康的母公司鸿海精密工业股份有限公司正式对外发布了一则重大投资公告富士康将在郑州投资建设新事业总部大楼承载新事业总部功能这一战略举措不仅彰显了富士康对中国市场持续深化的承诺与信心也预示着该集团业务版图的新一轮扩张与升级
项目一期选址位于郑东新区建筑面积约700公亩总投资约10亿元人民币主要建设总部管理中心研发中心和工程中心战略产业发展中心战略产业金融平台
</div>
贴吧<a data-fid="26976424" class="p_forum" href="/f?kw=%CE%E4%BA%BA%BD%BB%BB%A5%BF%D5%BC%E4"
target="_blank"><font class="p_violet">武汉交互空间</font></a>作者<a
href="/home/main?un=VR%D0%E9%C4%E2%B4%EF%C8%CB" target="_blank"><font class="p_violet">VR虚拟达人</font></a>
<font class="p_green p_date">2024-08-05 16:45</font></div>
<div class="s_post"><span class="p_title"><a data-tid="9114743782" data-fid="90367" class="bluelink"
href="/p/9114743782?pid=150705176739&amp;cid=0#150705176739"
target="_blank">请各位急用玛尼的小心骗子最多</a></span>
<div class="p_content">
这里面到处是骗子大家小心特别那些叫出村背货的基本是卖园区天下没有那么好的事就是有这好事我们在边境上的人比你们最清楚轮不到你们边境上比你们胆子大的人大把你一不熟悉小路为什么叫你带货东南亚带货的集结地一般在南宁防城港昆明西双版纳临沧然后师机接了走小路出去南宁防城港坐船出去好多都是二十几手的中介之前卖园区一个三十万现在不知道行情但好多园区不收
</div>
贴吧<a data-fid="90367" class="p_forum" href="/f?kw=%B1%B3%B0%FC%BF%CD" target="_blank"><font class="p_violet">背包客</font></a>作者<a
href="/home/main?un=%CC%F9%B0%C9%D3%C3%BB%A7_GC64AUS" target="_blank"><font class="p_violet">贴吧用户_GC64AUS</font></a>
<font class="p_green p_date">2024-08-03 07:35</font></div>
<div class="s_post"><span class="p_title"><a data-tid="9095684158" data-fid="1388265" class="bluelink"
href="/p/9095684158?pid=150616716870&amp;cid=0#150616716870"
target="_blank">*2025泰国冷链制冷运输展*东南亚外贸出口</a></span>
<div class="p_content">**2025泰国曼谷国际冷库空调制冷仓储暨冷链运输展 *2025泰国冷链制冷运输展*东南亚外贸出口-观展游览考察
展出时间2025-7具体时间待定 展出地点泰国曼谷会展中心 展会周期一年一届 组展单位北京励航国际商务会展有限公司
人员跟团观展补贴为您节省成本寻找适合您的市场
本公司为您提供观展考察机会让您在大型展会上获得世界同行**科技的资料同时感受异域文化气息展会现场走展考察当地游览当地相关市
</div>
贴吧<a data-fid="1388265" class="p_forum" href="/f?kw=%B9%FA%BC%CA%D5%B9%BB%E1" target="_blank"><font
class="p_violet">国际展会</font></a>作者<a href="/home/main?un=zhaot_188" target="_blank"><font
class="p_violet">zhaot_188</font></a> <font class="p_green p_date">2024-07-19 15:44</font></div>
<div class="s_post"><span class="p_title"><a data-tid="9093564752" data-fid="27984246" class="bluelink"
href="/p/9093564752?pid=150606964195&amp;cid=0#150606964195"
target="_blank">京湘楼创始人肖鑫创立于北京植根长沙百年美食传承</a></span>
<div class="p_content">来源标题京湘楼创始人肖鑫创立于北京植根长沙百年美食传承 京湘楼KING HERO品牌创始人肖鑫
京湘楼KING
HERO集酱板鸭肥肠鸭头鸭脖鸭肠小龙虾牛蛙捆鸡鸡爪鱼嘴巴鱼尾鱿鱼牛肉猪头肉等特色食品卤制加工包装与生产经营2022年3月在北京朝阳区双井开设了第一家京湘楼·鲜卤集市卤味熟食快餐店2023年5月在湖南省长沙市开福区注册成立了长沙京湘楼品牌管理有限公司京湘楼作为品
</div>
贴吧<a data-fid="27984246" class="p_forum" href="/f?kw=%BE%A9%CF%E6%C2%A5" target="_blank"><font
class="p_violet">京湘楼</font></a>作者<a href="/home/main?un=%CC%EC%C9%F1%B6%C9%B3%BE" target="_blank"><font
class="p_violet">天神渡尘</font></a> <font class="p_green p_date">2024-07-17 23:43</font></div>
<div class="s_post"><span class="p_title"><a data-tid="9088419293" data-fid="310" class="bluelink"
href="/p/9088419293?pid=150582471307&amp;cid=0#150582471307"
target="_blank">广州能争取到迪士尼与环球落户吗</a></span>
<div class="p_content">
不是二选一而是全都要上一组数据上海迪士尼2016年开业就接待游客超过1.2亿人次香港迪士尼2023全年游客人数才640万人次约等于无这么低的入园人次已经引来迪士尼方面的不悦
美国有两个迪士尼说实话迪士尼的门票并不高普通人都去的起中国完全有能力建两到三个迪士尼欧洲只有第一个迪士尼因为它的人口只有中国的一半假设中国人一年吃一包盐一年就是14包那么欧洲就是七亿包盐盐再便宜欧洲人也不可能一人吃
</div>
贴吧<a data-fid="310" class="p_forum" href="/f?kw=%B5%D8%C0%ED" target="_blank"><font
class="p_violet">地理</font></a>作者<a href="/home/main?un=SeaRoutes" target="_blank"><font
class="p_violet">SeaRoutes</font></a> <font class="p_green p_date">2024-07-13 20:17</font></div>
<div class="s_post"><span class="p_title"><a data-tid="9088416365" data-fid="7561034" class="bluelink"
href="/p/9088416365?pid=150582456551&amp;cid=0#150582456551"
target="_blank">#城市GDP#广州应该全力去争取迪士尼和环球影城</a></span>
<div class="p_content">
不是二选一而是全都要上一组数据上海迪士尼2016年开业就接待游客超过1.2亿人次香港迪士尼2023全年游客人数才640万人次约等于无这么低的入园人次已经引来迪士尼方面的不悦
美国有两个迪士尼说实话迪士尼的门票并不高普通人都去的起中国完全有能力建两到三个迪士尼欧洲只有第一个迪士尼因为它的人口只有中国的一半假设中国人一年吃一包盐一年就是14包那么欧洲就是七亿包盐盐再便宜欧洲人也不可能一人吃
</div>
贴吧<a data-fid="7561034" class="p_forum" href="/f?kw=%B3%C7%CA%D0gdp" target="_blank"><font class="p_violet">城市gdp</font></a>作者<a
href="/home/main?un=SeaRoutes" target="_blank"><font class="p_violet">SeaRoutes</font></a> <font
class="p_green p_date">2024-07-13 20:14</font></div>
<div class="s_post"><span class="p_title"><a data-tid="9087419039" data-fid="46374" class="bluelink"
href="/p/9087419039?pid=150577861626&amp;cid=0#150577861626"
target="_blank">云南省首批云南日报昆明新闻头条聚焦阳宗海省级物流枢纽建设</a></span>
<div class="p_content">
7月11日云南日报昆明新闻头条刊发文章阳宗海风景名胜区立足衔接西部陆海新通道与中老铁路优势加速28个物流枢纽设施建设聚焦昆明阳宗海风景名胜区系统推进省级物流枢纽建设和功能提升深挖比较优势壮大物流产业据云南省发展和改革委员会在昆明召开的新闻发布会上公布今年全省共有5地纳入云南省第一批省级物流枢纽和省级骨干冷链物流基地建设名单其中昆明市有两家获批阳宗海物流枢纽上榜一起来看近日云南省
</div>
贴吧<a data-fid="46374" class="p_forum" href="/f?kw=%C0%A5%C3%F7" target="_blank"><font
class="p_violet">昆明</font></a>作者<a href="/home/main?un=%8F%EC" target="_blank"><font
class="p_violet"></font></a> <font class="p_green p_date">2024-07-12 23:04</font></div>
<div class="s_post"><span class="p_title"><a data-tid="9085102046" data-fid="348713" class="bluelink"
href="/p/9085102046?pid=150567555367&amp;cid=0#150567555367"
target="_blank">寻找弟弟很久没跟家里联系</a></span>
<div class="p_content">Kk四期世纪园区寻找弟弟外号大佐F3 2公司cj集团</div>
贴吧<a data-fid="348713" class="p_forum" href="/f?kw=%B6%AB%C4%CF%D1%C7" target="_blank"><font
class="p_violet">东南亚</font></a>作者<a href="/home/main?un=%CC%F9%B0%C9%D3%C3%BB%A7_GC2CtRa"
target="_blank"><font class="p_violet">贴吧用户_GC2CtRa</font></a>
<font class="p_green p_date">2024-07-11 07:53</font></div>
<div class="s_post"><span class="p_title"><a data-tid="9083888071" data-fid="30" class="bluelink"
href="/p/9083888071?pid=150562129935&amp;cid=0#150562129935"
target="_blank">拉美 非洲 东南亚 南亚等发展中国家不太可能普及八小时双休吧</a></span>
<div class="p_content">拉美 东南亚的泰国 之类的连毒枭和黑色产业都管不好感觉普及八小时双休不太可能 缅甸和非洲军阀林立
跟军阀谈八小时双休那么不开玩笑缅北诈骗园区就能看出来
</div>
贴吧<a data-fid="30" class="p_forum" href="/f?kw=%C0%FA%CA%B7" target="_blank"><font
class="p_violet">历史</font></a>作者<a href="/home/main?un=yoursagain" target="_blank"><font
class="p_violet">yoursagain</font></a> <font class="p_green p_date">2024-07-10 09:00</font></div>
<div class="s_post"><span class="p_title"><a data-tid="9071937582" data-fid="8103241" class="bluelink"
href="/p/9071937582?pid=150510120873&amp;cid=0#150510120873"
target="_blank">东南亚园区 </a></span>
<div class="p_content"></div>
贴吧<a data-fid="8103241" class="p_forum" href="/f?kw=%D4%B0%C7%F8%D5%D0%C9%CC" target="_blank"><font
class="p_violet">园区招商</font></a>作者<a href="/home/main?un=QQ59052966" target="_blank"><font
class="p_violet">QQ59052966</font></a> <font class="p_green p_date">2024-06-30 12:09</font></div>
</div>
File diff suppressed because one or more lines are too long