变更
This commit is contained in:
@@ -0,0 +1,29 @@
|
||||
import re
|
||||
|
||||
match = re.search(r'[1-9]\d{5}', 'bad116105sfa', flags=0)
|
||||
print(match.group(0))
|
||||
|
||||
match1 = re.match(r'[1-9]\d{5}', 'bad116105sfa', flags=0)
|
||||
# print(match1.group()) # 返回类型为空变量,不用if判断会报错
|
||||
if match1:
|
||||
print(match1.group()) # if 判断出match1为空,输出结果为空
|
||||
# 原因为字符串开始为字母,match1匹配不到字符串
|
||||
|
||||
match2 = re.match(r'[1-9]\d{5}', '116105sfa', flags=0)
|
||||
if match2:
|
||||
print(match2.group()) # 去掉头部字母,则可以取出
|
||||
|
||||
ls = re.findall(r'[1-9]\d{5}', '116105sfa116110sadf', flags=0)
|
||||
print(ls) # 输出类型为列表
|
||||
|
||||
ls1 = re.split(r'[1-9]\d{5}', 'sad116105ej116110')
|
||||
print(ls1) # ['sad', 'ej', '']
|
||||
ls2 = re.split(r'[1-9]\d{5}', 'sad116105ej116110', maxsplit=1)
|
||||
print(ls2) # ['sad', 'ej116110'],剩余部分不做匹配
|
||||
|
||||
for m1 in re.finditer(r'[1-9]\d{5}', '116105sfa 116110sadf'):
|
||||
if m1:
|
||||
print(m1.group(0))
|
||||
|
||||
match14 = re.sub(r'[1-9]\d{5}', 'helloworld', '116105sfa 116110sadf', count=1)
|
||||
print(match14)
|
||||
@@ -0,0 +1,50 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
|
||||
url = "https://python123.io/ws/demo.html"
|
||||
try:
|
||||
r = requests.get(url, timeout=20)
|
||||
r.raise_for_status()
|
||||
r.encoding = r.apparent_encoding
|
||||
except:
|
||||
print("出现异常")
|
||||
|
||||
demo = r.text
|
||||
|
||||
soup = BeautifulSoup(demo, "html.parser")
|
||||
|
||||
# 标签
|
||||
print(soup.title) # 获取title标签的内容
|
||||
|
||||
# 标签的名字
|
||||
tag = soup.a
|
||||
print(tag) # 获得了第一个a标签的内容
|
||||
print(soup.a.name)
|
||||
print(soup.a.parent.name) # 查询a标签父标签名字
|
||||
print(soup.a.parent.parent.name) # 查询p标签父标签名字
|
||||
|
||||
# 标签的属性
|
||||
print(tag.attrs) # 查询标签的属性
|
||||
# 结果{'href': 'http://www.icourse163.org/course/BIT-268001', 'class': ['py1'], 'id': 'link1'}
|
||||
print(tag.attrs['class']) # 查询class属性内容
|
||||
# 结果['py1']
|
||||
print(type(tag.attrs)) # <class 'dict'>
|
||||
print(type(tag)) # <class 'bs4.element.Tag'>
|
||||
|
||||
# 标签内非属性字符串
|
||||
print(soup.a) # 查询a标签内容
|
||||
# <a class="py1" href="http://www.icourse163.org/course/BIT-268001" id="link1">Basic Python</a>
|
||||
print(soup.a.string) # 查询a标签内非属性字符产
|
||||
# Basic Python
|
||||
print(soup.b) # <b>The demo python introduces several python courses.</b>
|
||||
print(soup.b.string) # The demo python introduces several python courses.
|
||||
# 由于b.string没有<b></b>
|
||||
# 所以Navigable String 可以跨越多个标签属性的
|
||||
print(type(soup.a.string)) # <class 'bs4.element.NavigableString'>
|
||||
|
||||
# 标签注释
|
||||
newsoup = BeautifulSoup("<b><!-- This is a comment--></b><p>This is not a comment</p>", "html.parser")
|
||||
print(newsoup.b.string)
|
||||
print(newsoup.p.string)# 注释并未表明,因此区分注释需要用type()来区分
|
||||
print(type(newsoup.b.string))
|
||||
print(type(newsoup.p.string))
|
||||
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": "# Crawl4AI",
|
||||
"id": "15b6b10304dc52ed"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"id": "initial_id",
|
||||
"metadata": {
|
||||
"collapsed": true,
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-07-10T01:25:20.945463Z",
|
||||
"start_time": "2025-07-10T01:25:18.137803Z"
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"import asyncio\n",
|
||||
"from crawl4ai import AsyncWebCrawler\n",
|
||||
"async def main():\n",
|
||||
" async with AsyncWebCrawler() as crawler:\n",
|
||||
" result = await crawler.arun(\"https://example.com\")\n",
|
||||
" print(result.markdown[:300]) # 打印前 300 个字符\n",
|
||||
"if __name__ == \"__main__\":\n",
|
||||
" asyncio.run(main())"
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "RuntimeError",
|
||||
"evalue": "asyncio.run() cannot be called from a running event loop",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
|
||||
"\u001B[1;31mRuntimeError\u001B[0m Traceback (most recent call last)",
|
||||
"Cell \u001B[1;32mIn[1], line 8\u001B[0m\n\u001B[0;32m 6\u001B[0m \u001B[38;5;28mprint\u001B[39m(result\u001B[38;5;241m.\u001B[39mmarkdown[:\u001B[38;5;241m300\u001B[39m]) \u001B[38;5;66;03m# 打印前 300 个字符\u001B[39;00m\n\u001B[0;32m 7\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;18m__name__\u001B[39m \u001B[38;5;241m==\u001B[39m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124m__main__\u001B[39m\u001B[38;5;124m\"\u001B[39m:\n\u001B[1;32m----> 8\u001B[0m asyncio\u001B[38;5;241m.\u001B[39mrun(main())\n",
|
||||
"File \u001B[1;32mD:\\ProgramTools\\Anaconda\\Lib\\asyncio\\runners.py:190\u001B[0m, in \u001B[0;36mrun\u001B[1;34m(main, debug, loop_factory)\u001B[0m\n\u001B[0;32m 161\u001B[0m \u001B[38;5;250m\u001B[39m\u001B[38;5;124;03m\"\"\"Execute the coroutine and return the result.\u001B[39;00m\n\u001B[0;32m 162\u001B[0m \n\u001B[0;32m 163\u001B[0m \u001B[38;5;124;03mThis function runs the passed coroutine, taking care of\u001B[39;00m\n\u001B[1;32m (...)\u001B[0m\n\u001B[0;32m 186\u001B[0m \u001B[38;5;124;03m asyncio.run(main())\u001B[39;00m\n\u001B[0;32m 187\u001B[0m \u001B[38;5;124;03m\"\"\"\u001B[39;00m\n\u001B[0;32m 188\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m events\u001B[38;5;241m.\u001B[39m_get_running_loop() \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;129;01mnot\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[0;32m 189\u001B[0m \u001B[38;5;66;03m# fail fast with short traceback\u001B[39;00m\n\u001B[1;32m--> 190\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mRuntimeError\u001B[39;00m(\n\u001B[0;32m 191\u001B[0m \u001B[38;5;124m\"\u001B[39m\u001B[38;5;124masyncio.run() cannot be called from a running event loop\u001B[39m\u001B[38;5;124m\"\u001B[39m)\n\u001B[0;32m 193\u001B[0m \u001B[38;5;28;01mwith\u001B[39;00m Runner(debug\u001B[38;5;241m=\u001B[39mdebug, loop_factory\u001B[38;5;241m=\u001B[39mloop_factory) \u001B[38;5;28;01mas\u001B[39;00m runner:\n\u001B[0;32m 194\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m runner\u001B[38;5;241m.\u001B[39mrun(main)\n",
|
||||
"\u001B[1;31mRuntimeError\u001B[0m: asyncio.run() cannot be called from a running event loop"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 1
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 2
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython2",
|
||||
"version": "2.7.6"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
import asyncio
|
||||
from crawl4ai import AsyncWebCrawler
|
||||
async def main():
|
||||
async with AsyncWebCrawler() as crawler:
|
||||
result = await crawler.arun("https://example.com")
|
||||
print(result.markdown[:300]) # 打印前 300 个字符
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,20 @@
|
||||
import asyncio
|
||||
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
||||
|
||||
|
||||
async def main():
|
||||
browser_conf = BrowserConfig(headless=True) # 设为 False 以观察浏览器
|
||||
|
||||
run_conf = CrawlerRunConfig(
|
||||
cache_mode=CacheMode.BYPASS # 此处为获取最新内容,默认为 CacheMode.ENABLED
|
||||
)
|
||||
async with AsyncWebCrawler(config=browser_conf) as crawler:
|
||||
result = await crawler.arun(
|
||||
url="https://example.com",
|
||||
config=run_conf
|
||||
)
|
||||
print(result.markdown)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,26 @@
|
||||
import asyncio
|
||||
from crawl4ai.content_filter_strategy import PruningContentFilter
|
||||
from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerator
|
||||
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig, CacheMode
|
||||
|
||||
# 创建 Markdown 生成器并配置内容过滤器
|
||||
md_generator = DefaultMarkdownGenerator(
|
||||
content_filter=PruningContentFilter(threshold=0.4, threshold_type="fixed")
|
||||
)
|
||||
|
||||
# 配置爬虫运行参数
|
||||
config = CrawlerRunConfig(
|
||||
cache_mode=CacheMode.BYPASS, # 跳过缓存,每次重新抓取页面
|
||||
markdown_generator=md_generator # 使用自定义的 Markdown 生成器
|
||||
)
|
||||
|
||||
# 主函数必须是 async def
|
||||
async def main():
|
||||
async with AsyncWebCrawler() as crawler:
|
||||
result = await crawler.arun("https://news.ycombinator.com", config=config)
|
||||
print("原始 Markdown 长度:", len(result.markdown.raw_markdown))
|
||||
print("过滤后 Markdown 长度:", len(result.markdown.fit_markdown))
|
||||
|
||||
# 启动异步任务
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,69 @@
|
||||
import asyncio
|
||||
import json
|
||||
from crawl4ai import AsyncWebCrawler
|
||||
from crawl4ai.extraction_strategy import JsonCssExtractionStrategy
|
||||
from pprint import pprint as pp
|
||||
|
||||
async def extract_books():
|
||||
schema = {
|
||||
"name": "Douban Book 250",
|
||||
"baseSelector": "tr.item",
|
||||
"type": "list",
|
||||
"fields": [
|
||||
{
|
||||
"name": "title",
|
||||
"type": "text",
|
||||
"selector": ".pl2 > a",
|
||||
},
|
||||
{
|
||||
"name": "url",
|
||||
"type": "attribute",
|
||||
"selector": ".pl2 > a",
|
||||
"attribute": "href",
|
||||
},
|
||||
{
|
||||
"name": "info",
|
||||
"type": "text",
|
||||
"selector": ".pl",
|
||||
},
|
||||
{
|
||||
"name": "rate",
|
||||
"type": "text",
|
||||
"selector": ".rating_nums",
|
||||
},
|
||||
{
|
||||
"name": "quote",
|
||||
"type": "text",
|
||||
"selector": "span.inq",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
extraction_strategy = JsonCssExtractionStrategy(schema, verbose=True)
|
||||
all_books = []
|
||||
|
||||
async with AsyncWebCrawler(verbose=True) as crawler:
|
||||
for i in range(10):
|
||||
result = await crawler.arun(
|
||||
url = f"https://book.douban.com/top250?start={i * 25}",
|
||||
extraction_strategy=extraction_strategy,
|
||||
bypass_cache=True,
|
||||
)
|
||||
assert result.success, "Failed to crawl the page"
|
||||
|
||||
books = json.loads(result.extracted_content)
|
||||
all_books.extend(books)
|
||||
print(f"成功提取第 {i + 1} 页的 {len(books)} 本图书")
|
||||
|
||||
# 避免请求过快
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# 保存所有图书数据到文件
|
||||
with open("books.json", "w", encoding="utf-8") as f:
|
||||
json.dump(all_books, f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"\n总共提取了 {len(all_books)} 本图书")
|
||||
return all_books
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(extract_books())
|
||||
@@ -0,0 +1,12 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class GameItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
pass
|
||||
@@ -0,0 +1,103 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
|
||||
|
||||
class GameSpiderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the spider middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_spider_input(self, response, spider):
|
||||
# Called for each response that goes through the spider
|
||||
# middleware and into the spider.
|
||||
|
||||
# Should return None or raise an exception.
|
||||
return None
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
# Called with the results returned from the Spider, after
|
||||
# it has processed the response.
|
||||
|
||||
# Must return an iterable of Request, or item objects.
|
||||
for i in result:
|
||||
yield i
|
||||
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
# Called when a spider or process_spider_input() method
|
||||
# (from other spider middleware) raises an exception.
|
||||
|
||||
# Should return either None or an iterable of Request or item objects.
|
||||
pass
|
||||
|
||||
def process_start_requests(self, start_requests, spider):
|
||||
# Called with the start requests of the spider, and works
|
||||
# similarly to the process_spider_output() method, except
|
||||
# that it doesn’t have a response associated.
|
||||
|
||||
# Must return only requests (not items).
|
||||
for r in start_requests:
|
||||
yield r
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
|
||||
|
||||
class GameDownloaderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the downloader middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# Called for each request that goes through the downloader
|
||||
# middleware.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this request
|
||||
# - or return a Response object
|
||||
# - or return a Request object
|
||||
# - or raise IgnoreRequest: process_exception() methods of
|
||||
# installed downloader middleware will be called
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
# Called with the response returned from the downloader.
|
||||
|
||||
# Must either;
|
||||
# - return a Response object
|
||||
# - return a Request object
|
||||
# - or raise IgnoreRequest
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
# Called when a download handler or a process_request()
|
||||
# (from other downloader middleware) raises an exception.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this exception
|
||||
# - return a Response object: stops process_exception() chain
|
||||
# - return a Request object: stops process_exception() chain
|
||||
pass
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
@@ -0,0 +1,20 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
# 管道默认不生效,需要在setting中开启管道·
|
||||
class GamePipeline:
|
||||
def process_item(self, item, spider):# item是数据 spider是爬虫
|
||||
print(item)
|
||||
print(spider.name)
|
||||
return item
|
||||
|
||||
class NewPipeline:
|
||||
def process_item(self, item, spider):# item是数据 spider是爬虫
|
||||
item['love']='i love zhou'
|
||||
return item
|
||||
@@ -0,0 +1,96 @@
|
||||
# Scrapy settings for game project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = "game"
|
||||
|
||||
SPIDER_MODULES = ["game.spiders"]
|
||||
NEWSPIDER_MODULE = "game.spiders"
|
||||
|
||||
LOG_LEVEL="WARNING"
|
||||
# 日志级别:DEBUG,INFO,WARNING,ERROR,CRITICAL
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = "game (+http://www.yourdomain.com)"
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = True
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
#DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
# "Accept-Language": "en",
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# "game.middlewares.GameSpiderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
#DOWNLOADER_MIDDLEWARES = {
|
||||
# "game.middlewares.GameDownloaderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# "scrapy.extensions.telnet.TelnetConsole": None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
"game.pipelines.GamePipeline": 300,
|
||||
# "game.pipelines.NewPipeline": 299,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = "httpcache"
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
|
||||
|
||||
# Set settings whose default value is deprecated to a future-proof value
|
||||
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
|
||||
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
||||
FEED_EXPORT_ENCODING = "utf-8"
|
||||
@@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
||||
@@ -0,0 +1,33 @@
|
||||
import scrapy
|
||||
|
||||
|
||||
class XiaoSpider(scrapy.Spider):
|
||||
name = "xiao" # 爬虫名字
|
||||
allowed_domains = ["4399.com"] # 允许的域名
|
||||
start_urls = ["http://www.4399.com/flash/"]
|
||||
|
||||
def parse(self, response): # 该方法默认处理解析
|
||||
# print(response)
|
||||
# 获取页面源代码
|
||||
# print(response.text)
|
||||
# 提取数据
|
||||
# response.xpath()
|
||||
# response.css()
|
||||
|
||||
# 获取页面所有游戏名称
|
||||
# txt = response.xpath('//ul[@class="n-game cf"]/li/a/b/text()').extract()# 提取内容
|
||||
# print(txt)
|
||||
|
||||
# 分块提取数据
|
||||
li_list = response.xpath('//ul[@class="n-game cf"]/li')
|
||||
for li in li_list:
|
||||
# name = li.xpath("./a/b/text()")
|
||||
name = li.xpath("./a/b/text()").extract_first() # 提取一项内容,如果没有,返回none
|
||||
categroy = li.xpath("./em/a/text()").extract_first()
|
||||
|
||||
dic = {
|
||||
"name": name,
|
||||
"category": categroy
|
||||
}
|
||||
# 需要用yield将数据传给管道
|
||||
yield dic # 省内存,如果返回的是数据,可以认为是给管道pipeline
|
||||
@@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = game.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = game
|
||||
@@ -0,0 +1,40 @@
|
||||
import self as self
|
||||
from selenium.webdriver import Chrome, Keys # 键盘操作
|
||||
import time
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
|
||||
web = Chrome()
|
||||
|
||||
option = Options()
|
||||
option.add_argument('--disable_blink_features=AutomationControlled')
|
||||
|
||||
web.get('https://t.bilibili.com/19929202306164175')
|
||||
|
||||
# 点击转发
|
||||
test_poet1 = web.find_element(By.XPATH, r'//*[@id="app"]/div[2]/div/div/div[1]/div[4]/div[1]/div')
|
||||
test_poet1.click()
|
||||
# 防止不能加载不出来
|
||||
time.sleep(2)
|
||||
|
||||
number = 0
|
||||
al_list = web.find_elements(By.CLASS_NAME, 'bili-dyn-forward__item')
|
||||
|
||||
for al in al_list:
|
||||
name = al.find_element(By.CLASS_NAME, 'default').text
|
||||
content = al.find_element(By.CLASS_NAME, 'bili-rich-text__content').text
|
||||
print(name, content)
|
||||
number += 1
|
||||
if number % 7 == 0:
|
||||
web.execute_script('window.scrollTo(0,document.body.scrollHeight)')
|
||||
# time.sleep(50000)
|
||||
try:
|
||||
web.find_element(By.XPATH,"/html/body").send_keys(Keys.UP)
|
||||
|
||||
time.sleep(50000)
|
||||
except:
|
||||
pass
|
||||
web.execute_script('window.scrollTo(0,document.body.scrollHeight)')
|
||||
time.sleep(5)
|
||||
|
||||
time.sleep(10000)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,10 @@
|
||||
import scrapy
|
||||
|
||||
|
||||
class Db250Spider(scrapy.Spider):
|
||||
name = "db250"
|
||||
allowed_domains = ["douban.com"]
|
||||
start_urls = ["http://douban.com/"]
|
||||
|
||||
def parse(self, response):
|
||||
pass
|
||||
@@ -0,0 +1,10 @@
|
||||
import scrapy
|
||||
|
||||
|
||||
class DoubanSpider(scrapy.Spider):
|
||||
name = "douban"
|
||||
allowed_domains = ["douban.com"]
|
||||
start_urls = ["http://douban.com/"]
|
||||
|
||||
def parse(self, response):
|
||||
pass
|
||||
@@ -0,0 +1,15 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class DoubanItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
name = scrapy.Field()
|
||||
info = scrapy.Field()
|
||||
score = scrapy.Field()
|
||||
desc = scrapy.Field()
|
||||
|
||||
@@ -0,0 +1,134 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
import base64
|
||||
|
||||
from scrapy import signals
|
||||
import random
|
||||
from douban.settings import USER_AGENT_LIST
|
||||
from douban.settings import PROXY_LIST
|
||||
from .myextend import pro
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
|
||||
|
||||
class RandomUserAgent(object):
|
||||
def process_request(self, request, spider):
|
||||
ua = random.choice(USER_AGENT_LIST)
|
||||
request.headers["'User-Agent'"] = ua
|
||||
# print(request.headers)
|
||||
|
||||
|
||||
class RandomProxy(object):
|
||||
def process_request(self, request, spider):
|
||||
proxy = random.choice(pro.proxy_list)
|
||||
print(proxy)
|
||||
|
||||
username = "azonhgez"
|
||||
password = "7lvu0dnm"
|
||||
request.meta['proxy'] = "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": proxy}
|
||||
|
||||
# if "user_passwd" in proxy:
|
||||
# # # 对账号密码进行base64编码
|
||||
# # b64_up=base64.b64encode(proxy["user_passwd"].encode())
|
||||
# # #设置认证
|
||||
# # request.headers["Proxy-Authorization"] = "basic "+b64_up.decode()
|
||||
# # #设置代理
|
||||
# # request.meta['proxy']=proxy['ip_port']
|
||||
# else:
|
||||
# request.meta['proxy']=proxy['ip_port']
|
||||
|
||||
class DoubanSpiderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the spider middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_spider_input(self, response, spider):
|
||||
# Called for each response that goes through the spider
|
||||
# middleware and into the spider.
|
||||
|
||||
# Should return None or raise an exception.
|
||||
return None
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
# Called with the results returned from the Spider, after
|
||||
# it has processed the response.
|
||||
|
||||
# Must return an iterable of Request, or item objects.
|
||||
for i in result:
|
||||
yield i
|
||||
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
# Called when a spider or process_spider_input() method
|
||||
# (from other spider middleware) raises an exception.
|
||||
|
||||
# Should return either None or an iterable of Request or item objects.
|
||||
pass
|
||||
|
||||
def process_start_requests(self, start_requests, spider):
|
||||
# Called with the start requests of the spider, and works
|
||||
# similarly to the process_spider_output() method, except
|
||||
# that it doesn’t have a response associated.
|
||||
|
||||
# Must return only requests (not items).
|
||||
for r in start_requests:
|
||||
yield r
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
|
||||
|
||||
class DoubanDownloaderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the downloader middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# Called for each request that goes through the downloader
|
||||
# middleware.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this request
|
||||
# - or return a Response object
|
||||
# - or return a Request object
|
||||
# - or raise IgnoreRequest: process_exception() methods of
|
||||
# installed downloader middleware will be called
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
# Called with the response returned from the downloader.
|
||||
|
||||
# Must either;
|
||||
# - return a Response object
|
||||
# - return a Request object
|
||||
# - or raise IgnoreRequest
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
# Called when a download handler or a process_request()
|
||||
# (from other downloader middleware) raises an exception.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this exception
|
||||
# - return a Response object: stops process_exception() chain
|
||||
# - return a Request object: stops process_exception() chain
|
||||
pass
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
@@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python
|
||||
# -- coding: utf-8 --
|
||||
import time
|
||||
import threading
|
||||
|
||||
import requests
|
||||
from scrapy import signals
|
||||
|
||||
# 提取代理IP的api
|
||||
api_url = 'https://kps.kdlapi.com/api/getkps/?secret_id=ou5nlcm9klazz4rhi8ht&signature=1ffl6otrop2on40eyeuxe46c0cavc9k6&num=10&pt=1&format=json&sep=1'
|
||||
foo = True
|
||||
|
||||
class Proxy:
|
||||
|
||||
def __init__(self, ):
|
||||
self._proxy_list = requests.get(api_url).json().get('data').get('proxy_list')
|
||||
|
||||
@property
|
||||
def proxy_list(self):
|
||||
return self._proxy_list
|
||||
|
||||
@proxy_list.setter
|
||||
def proxy_list(self, list):
|
||||
self._proxy_list = list
|
||||
|
||||
|
||||
pro = Proxy()
|
||||
print(pro.proxy_list)
|
||||
|
||||
|
||||
class MyExtend:
|
||||
|
||||
def __init__(self, crawler):
|
||||
self.crawler = crawler
|
||||
# 将自定义方法绑定到scrapy信号上,使程序与spider引擎同步启动与关闭
|
||||
# scrapy信号文档: https://www.osgeo.cn/scrapy/topics/signals.html
|
||||
# scrapy自定义拓展文档: https://www.osgeo.cn/scrapy/topics/extensions.html
|
||||
crawler.signals.connect(self.start, signals.engine_started)
|
||||
crawler.signals.connect(self.close, signals.spider_closed)
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
return cls(crawler)
|
||||
|
||||
def start(self):
|
||||
t = threading.Thread(target=self.extract_proxy)
|
||||
t.start()
|
||||
|
||||
def extract_proxy(self):
|
||||
while foo:
|
||||
pro.proxy_list = requests.get(api_url).json().get('data').get('proxy_list')
|
||||
#设置每15秒提取一次ip
|
||||
time.sleep(15)
|
||||
|
||||
def close(self):
|
||||
global foo
|
||||
foo = False
|
||||
@@ -0,0 +1,13 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
|
||||
class DoubanPipeline:
|
||||
def process_item(self, item, spider):
|
||||
return item
|
||||
@@ -0,0 +1,119 @@
|
||||
# Scrapy settings for douban project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = "douban"
|
||||
|
||||
SPIDER_MODULES = ["douban.spiders"]
|
||||
NEWSPIDER_MODULE = "douban.spiders"
|
||||
LOG_LEVEL="WARNING"
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
USER_AGENT = """
|
||||
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0"""
|
||||
|
||||
USER_AGENT_LIST = ["Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36"
|
||||
]
|
||||
|
||||
PROXY_LIST=[
|
||||
{"ip_port":"119.96.179.83:16819","user_passwd":"7lvu0dnm"},
|
||||
# {"ip_port":"185.195.107.254:23445"}
|
||||
|
||||
]
|
||||
|
||||
# Obey robots.txt rules
|
||||
# ROBOTSTXT_OBEY = True
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
#DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
# "Accept-Language": "en",
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# "douban.middlewares.DoubanSpiderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
DOWNLOADER_MIDDLEWARES = {
|
||||
# "douban.middlewares.DoubanDownloaderMiddleware": 543,
|
||||
"douban.middlewares.RandomUserAgent": 542,
|
||||
"douban.middlewares.RandomProxy": 541,
|
||||
}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# "scrapy.extensions.telnet.TelnetConsole": None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
#ITEM_PIPELINES = {
|
||||
# "douban.pipelines.DoubanPipeline": 300,
|
||||
#}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = "httpcache"
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
|
||||
|
||||
# Set settings whose default value is deprecated to a future-proof value
|
||||
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
|
||||
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
||||
FEED_EXPORT_ENCODING = "utf-8"
|
||||
@@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
||||
@@ -0,0 +1,28 @@
|
||||
import scrapy
|
||||
from douban.items import DoubanItem
|
||||
|
||||
class Db250Spider(scrapy.Spider):
|
||||
name = "db250"
|
||||
allowed_domains = ["douban.com"]
|
||||
start_urls = ["https://movie.douban.com/top250"]
|
||||
|
||||
def parse(self, response):
|
||||
el_list = response.xpath("//div[@class='info']")
|
||||
|
||||
|
||||
for el in el_list:
|
||||
item = DoubanItem()
|
||||
item['name']=el.xpath("./div[1]/a/span[1]/text()").extract_first()
|
||||
item['info']=el.xpath("./div[2]/p[1]/text()").extract_first()
|
||||
item['score']=el.xpath("./div[2]/div/span[2]/text()").extract_first()
|
||||
item['desc']=el.xpath("./p[2]/span/text()").extract_first()
|
||||
# print(item)
|
||||
yield item
|
||||
|
||||
|
||||
url = response.xpath("//span[@class='next']/a/@href").extract_first()
|
||||
if url != None:
|
||||
url = response.urljoin(url)
|
||||
yield scrapy.Request(
|
||||
url=url
|
||||
)
|
||||
@@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = douban.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = douban
|
||||
@@ -0,0 +1,10 @@
|
||||
import scrapy
|
||||
|
||||
|
||||
class Douban250Spider(scrapy.Spider):
|
||||
name = "douban250"
|
||||
allowed_domains = ["douban.com"]
|
||||
start_urls = ["http://douban.com/"]
|
||||
|
||||
def parse(self, response):
|
||||
pass
|
||||
@@ -0,0 +1,12 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class GitItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
pass
|
||||
@@ -0,0 +1,103 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
|
||||
|
||||
class GitSpiderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the spider middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_spider_input(self, response, spider):
|
||||
# Called for each response that goes through the spider
|
||||
# middleware and into the spider.
|
||||
|
||||
# Should return None or raise an exception.
|
||||
return None
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
# Called with the results returned from the Spider, after
|
||||
# it has processed the response.
|
||||
|
||||
# Must return an iterable of Request, or item objects.
|
||||
for i in result:
|
||||
yield i
|
||||
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
# Called when a spider or process_spider_input() method
|
||||
# (from other spider middleware) raises an exception.
|
||||
|
||||
# Should return either None or an iterable of Request or item objects.
|
||||
pass
|
||||
|
||||
def process_start_requests(self, start_requests, spider):
|
||||
# Called with the start requests of the spider, and works
|
||||
# similarly to the process_spider_output() method, except
|
||||
# that it doesn’t have a response associated.
|
||||
|
||||
# Must return only requests (not items).
|
||||
for r in start_requests:
|
||||
yield r
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
|
||||
|
||||
class GitDownloaderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the downloader middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# Called for each request that goes through the downloader
|
||||
# middleware.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this request
|
||||
# - or return a Response object
|
||||
# - or return a Request object
|
||||
# - or raise IgnoreRequest: process_exception() methods of
|
||||
# installed downloader middleware will be called
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
# Called with the response returned from the downloader.
|
||||
|
||||
# Must either;
|
||||
# - return a Response object
|
||||
# - return a Request object
|
||||
# - or raise IgnoreRequest
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
# Called when a download handler or a process_request()
|
||||
# (from other downloader middleware) raises an exception.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this exception
|
||||
# - return a Response object: stops process_exception() chain
|
||||
# - return a Request object: stops process_exception() chain
|
||||
pass
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
@@ -0,0 +1,13 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
|
||||
class GitPipeline:
|
||||
def process_item(self, item, spider):
|
||||
return item
|
||||
@@ -0,0 +1,95 @@
|
||||
# Scrapy settings for git project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = "git"
|
||||
|
||||
SPIDER_MODULES = ["git.spiders"]
|
||||
NEWSPIDER_MODULE = "git.spiders"
|
||||
LOG_LEVEL="WARNING"
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0"
|
||||
|
||||
TLDEXTRACT_CACHE_DIR = False
|
||||
|
||||
# Obey robots.txt rules
|
||||
# ROBOTSTXT_OBEY = True
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
#DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
# "Accept-Language": "en",
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# "git.middlewares.GitSpiderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
#DOWNLOADER_MIDDLEWARES = {
|
||||
# "git.middlewares.GitDownloaderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# "scrapy.extensions.telnet.TelnetConsole": None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
#ITEM_PIPELINES = {
|
||||
# "git.pipelines.GitPipeline": 300,
|
||||
#}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = "httpcache"
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
|
||||
|
||||
# Set settings whose default value is deprecated to a future-proof value
|
||||
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
|
||||
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
||||
FEED_EXPORT_ENCODING = "utf-8"
|
||||
@@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
||||
@@ -0,0 +1,41 @@
|
||||
import scrapy
|
||||
|
||||
# 使用FormRequest发送post请求
|
||||
|
||||
class Git2Spider(scrapy.Spider):
|
||||
name = "git2"
|
||||
allowed_domains = ["github.com"]
|
||||
start_urls = ["https://github.com/login"]
|
||||
|
||||
def parse(self, response):
|
||||
# 从登录页面解析出post数据
|
||||
token = response.xpath('//input[@name="authenticity_token"]/@value').extract_first()
|
||||
|
||||
post_data = {
|
||||
'commit': 'Sign in',
|
||||
'authenticity_token': token,
|
||||
'login': ' 1415243231@qq.com',
|
||||
'password': ' zy18742526670',
|
||||
'webauthn-conditional': ' undefined',
|
||||
'javascript-support': ' true',
|
||||
'webauthn-support': ' supported',
|
||||
'webauthn-iuvpaa-support': ' supported',
|
||||
'return_to': 'https://github.com/login',
|
||||
'timestamp': ' 1726731919711',
|
||||
'timestamp_secret': '51cd0dd93807a1fdb0095203af8d9f45d864a1f3ca83b1268f6f659624804398'
|
||||
}
|
||||
|
||||
# print(post_data)
|
||||
# 针对url发送post请求
|
||||
yield scrapy.FormRequest(
|
||||
url="https://github.com/session",
|
||||
callback=self.after_login,
|
||||
formdata=post_data
|
||||
)
|
||||
|
||||
def after_login(self, response):
|
||||
yield scrapy.Request('https://github.com/PandaltsGo',callback=self.check_log)
|
||||
|
||||
def check_log(self,response):
|
||||
title = response.xpath('//*[@id="581032515"]/span/text()').extract_first()
|
||||
print(title)
|
||||
@@ -0,0 +1,24 @@
|
||||
import scrapy
|
||||
|
||||
#直接携带cookie字典请求
|
||||
|
||||
class GithubSpider(scrapy.Spider):
|
||||
name = "github"
|
||||
allowed_domains = ["github.com"]
|
||||
start_urls = ["https://github.com/PandaltsGo"]
|
||||
|
||||
def start_requests(self):
|
||||
url =self.start_urls[0]
|
||||
cookies = '''_gh_sess=5ZDPStPAYwzU0t3by%2FpHC4UL4Fs2AiwfHyrhWN6C0vsUEpjtEVW5bjJGNIc6Lc8UfftkYNh2EYJzpcTsFIaEjnn6TfigTP0tcBvenlfICTwnJKidXXqEFgWdo2ou5gCBj%2BZMr0qS%2Bbf437FjJm0nuv0kcZiEI17sqfWkgL9UrTnievN62FcXakNMSXmxGdal12MQTHcA9makPHYPqdAm9J9w2%2BKnJKZXBeHnHB9pvYq5IZYHN%2BhndueA6W5%2B3M5%2FgwMdi0dniIyLiW7kgsD1ntV%2BclB4YUaiSBSut6PwonD9GlEyJYef%2FRlITnwWKyoNKpaWZy9SzLuDrS1vWI5z46Na2DDCjU5TqKdFI2afUfIq0JhTzkvUk40SnIHRZ2hZrzJd7t8cDfCDQronozMZscurF1mIHnITDSnQjHp0FImtVw1lbiuiMHyQ%2Bw41V9GM0TiRmJgZIWZ5F4I5kI68W%2FgM%2F5hRNlRrF26EWVmYTSINIfnkuY5kFThjK04OwoMCVd0d3yCua5qf%2FWX3XXl4Jzoj09JfqovsdqqezdAjzju06oVy5xh2RJz6VjA0BBTrxbVgBB91ybZ61XqdbWxXnm9AI9y6Y7AyY%2FUZp4BemfigBVdSE2gpU3S3VbwrmcjEzoAM8hv%2Bp37ROkl45teQ6aRE4Yh2xF26MYaMY5ffzjrweqfjeqIPJZO4qyIxEPNs7OTmzMTp87w1FodO6ZTdy0PvX1JVuHfVlHQuQUzo6ClrKaNsL0Fld7BahIEIgi8AbHRJyLgS7jsw1xhbRQ4ZCcvMRS%2B%2BBd%2Bh6KSEV27lX1dCNogvPRjFah38bFcbj35tw4pOUqdBkj%2BtvMKmQVLPrBJ%2BAa6lTjJqZa4s3P6uB5Ac%2BIMF--RnCQu9IfXR19B%2B3g--ZjZab0obtWOBIHl5EF%2FWRQ%3D%3D; path=/; secure; HttpOnly;'''
|
||||
cookies = {data.split('=')[0]:data.split('=')[-1] for data in cookies.split(';')}
|
||||
|
||||
yield scrapy.Request(
|
||||
url=url,
|
||||
cookies=cookies,
|
||||
callback=self.parse
|
||||
)
|
||||
|
||||
|
||||
def parse(self, response):
|
||||
title = response.xpath('//*[@id="581032515"]/span/text()').extract_first()
|
||||
print(title)
|
||||
@@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = git.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = git
|
||||
@@ -0,0 +1,17 @@
|
||||
import requests
|
||||
|
||||
|
||||
def get_ip_text():
|
||||
url = "http://www.ip138.com/ip.asp?ip="
|
||||
header = {'User-agent':'Mozilla/5.0'}
|
||||
try:
|
||||
r = requests.get(url + '117.169.187.31', timeout=20,headers = header)
|
||||
r.raise_for_status()
|
||||
r.encoding = r.apparent_encoding
|
||||
return r.text[-500:]
|
||||
except:
|
||||
print("出现错误")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(get_ip_text())
|
||||
@@ -0,0 +1,29 @@
|
||||
import requests
|
||||
import time
|
||||
|
||||
#爬取100次网页所需时间
|
||||
def get_HTML_Text():
|
||||
i = 0
|
||||
while i < 100:
|
||||
try:
|
||||
r = requests.get(url)
|
||||
r.raise_for_status()
|
||||
r.encoding = r.apparent_encoding
|
||||
return r.status_code
|
||||
except:
|
||||
print("出现异常")
|
||||
|
||||
i += 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
time1 = time.time()
|
||||
url = "http://www.baidu.com"
|
||||
u = 0 # 记录爬取异常次数
|
||||
for i in range(100 + u):
|
||||
if get_HTML_Text() == 200:
|
||||
u = u
|
||||
else:
|
||||
u += 1
|
||||
time2 =time.time()
|
||||
print(f"爬取100次所需要的时间为{time2-time1}秒")
|
||||
@@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = scrapy_practise.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = scrapy_practise
|
||||
@@ -0,0 +1,14 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class ScrapyPractiseItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
name = scrapy.Field()
|
||||
title = scrapy.Field()
|
||||
info = scrapy.Field()
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
|
||||
|
||||
class ScrapyPractiseSpiderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the spider middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_spider_input(self, response, spider):
|
||||
# Called for each response that goes through the spider
|
||||
# middleware and into the spider.
|
||||
|
||||
# Should return None or raise an exception.
|
||||
return None
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
# Called with the results returned from the Spider, after
|
||||
# it has processed the response.
|
||||
|
||||
# Must return an iterable of Request, or item objects.
|
||||
for i in result:
|
||||
yield i
|
||||
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
# Called when a spider or process_spider_input() method
|
||||
# (from other spider middleware) raises an exception.
|
||||
|
||||
# Should return either None or an iterable of Request or item objects.
|
||||
pass
|
||||
|
||||
def process_start_requests(self, start_requests, spider):
|
||||
# Called with the start requests of the spider, and works
|
||||
# similarly to the process_spider_output() method, except
|
||||
# that it doesn’t have a response associated.
|
||||
|
||||
# Must return only requests (not items).
|
||||
for r in start_requests:
|
||||
yield r
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
|
||||
|
||||
class ScrapyPractiseDownloaderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the downloader middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# Called for each request that goes through the downloader
|
||||
# middleware.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this request
|
||||
# - or return a Response object
|
||||
# - or return a Request object
|
||||
# - or raise IgnoreRequest: process_exception() methods of
|
||||
# installed downloader middleware will be called
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
# Called with the response returned from the downloader.
|
||||
|
||||
# Must either;
|
||||
# - return a Response object
|
||||
# - return a Request object
|
||||
# - or raise IgnoreRequest
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
# Called when a download handler or a process_request()
|
||||
# (from other downloader middleware) raises an exception.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this exception
|
||||
# - return a Response object: stops process_exception() chain
|
||||
# - return a Request object: stops process_exception() chain
|
||||
pass
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
@@ -0,0 +1,13 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
|
||||
class ScrapyPractisePipeline:
|
||||
def process_item(self, item, spider):
|
||||
return item
|
||||
@@ -0,0 +1,93 @@
|
||||
# Scrapy settings for scrapy_practise project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = "scrapy_practise"
|
||||
|
||||
SPIDER_MODULES = ["scrapy_practise.spiders"]
|
||||
NEWSPIDER_MODULE = "scrapy_practise.spiders"
|
||||
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = "scrapy_practise (+http://www.yourdomain.com)"
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = True
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
#DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
# "Accept-Language": "en",
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# "scrapy_practise.middlewares.ScrapyPractiseSpiderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
#DOWNLOADER_MIDDLEWARES = {
|
||||
# "scrapy_practise.middlewares.ScrapyPractiseDownloaderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# "scrapy.extensions.telnet.TelnetConsole": None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
#ITEM_PIPELINES = {
|
||||
# "scrapy_practise.pipelines.ScrapyPractisePipeline": 300,
|
||||
#}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = "httpcache"
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
|
||||
|
||||
# Set settings whose default value is deprecated to a future-proof value
|
||||
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
|
||||
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
||||
FEED_EXPORT_ENCODING = "utf-8"
|
||||
@@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
||||
@@ -0,0 +1,36 @@
|
||||
import scrapy
|
||||
from scrapy_practise.items import ScrapyPractiseItem
|
||||
|
||||
class ItcastSpider(scrapy.Spider):
|
||||
name = "itcast"
|
||||
allowed_domains = ["itcast.cn"]
|
||||
start_urls = ["http://www.itcast.cn/channel/teacher.shtml"]
|
||||
|
||||
def parse(self, response):
|
||||
# context = response.xpath('/html/head/title/text()')
|
||||
# title = context.extract_first()
|
||||
|
||||
# print(title)
|
||||
# filename = "teacher.html"
|
||||
# open(filename, 'wb').write(response.body)
|
||||
|
||||
items = []
|
||||
|
||||
for each in response.xpath("//div[@class='li_txt']"):
|
||||
# 将我们得到的数据封装到一个 `ItcastItem` 对象
|
||||
item = ScrapyPractiseItem()
|
||||
#extract()方法返回的都是unicode字符串
|
||||
name = each.xpath("h3/text()").extract()
|
||||
title = each.xpath("h4/text()").extract()
|
||||
info = each.xpath("p/text()").extract()
|
||||
|
||||
#xpath返回的是包含一个元素的列表
|
||||
item['name'] = name[0]
|
||||
item['title'] = title[0]
|
||||
item['info'] = info[0]
|
||||
|
||||
items.append(item)
|
||||
|
||||
# 直接返回最后数据
|
||||
return items
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,150 @@
|
||||
info,name,title
|
||||
11年JAVA开发经验,9年技术团队管理经验,7年项目架构经验(系统架构师职称),在电子政务、企业办公、电子商务、社交网络、移动应用、互金等行业软件方面拥有丰富的项目实战经验。先后在飞利信、高伟达、成都融特、日立、人人贷担任部门经理、TeamLeader、技术经理、Senior Consultant、技术经理,负责全国政协门户、Wradio+、Chinaface、居然在线、个贷官网、人人U学、Crawlers、Babel大数据平台等项目研发。 ,罗老师,高级讲师
|
||||
擅长Java EE企业级应用,十余年项目管理经验,曾担任开发工程师,架构师等重要角色。主导多个大型项目的架构设计、管理等工作。在互联网项目领域具备丰富的经验,精通微服务架构,擅长解决高并发,亿级数据量等架构设计,拥有广泛的技术面与实践经验。,姜老师,高级讲师
|
||||
11年互联网项目研发经验及教学经验,曾担任过开发工程师,技术部经理等职务。精通Java技术栈、对SpringBoot、Redis、RabbitMQ、ElasticSearch、Docker、SpringCloud等有深入研究。专注于分布式、高可用、高性能、微服务架构的设计。,陈老师,高级讲师
|
||||
"具有多年开发经验,曾先后在重庆科普、微创、爱奇艺等多家软件公司担任软件开发工程师、项目经理等职务,精通Java EE、Struts2、Spring、Hibernate应用开发。",石老师,高级讲师
|
||||
6年软件开发与教学经验,曾在北京航天四创、中科软等单位担任研发经理职务,曾主持参与浙江省机要系统、国家电网系统、济宁市医疗平台、中国电信网络规划平台、人民日报社扶贫APP的设计开发工作。精通分布式、大数据、微服务、高并发、高可用等相关技术架构。 ,郭老师,高级讲师
|
||||
多年开发和教学经验,曾在三星(中国)参与过大型跨国项目担任开发工程师,授课风趣幽默,精通Java EE、数据库、MyBatis、Spring全家桶等企业级常用技术;分布式系统开发经验丰富,对SpringBoot、SpringCloud、Dubbo、ZooKeeper 、ES、Solr、Redis、MQ等分布式技术深有研究。 ,付老师,高级讲师
|
||||
10年软件开发和教学经验,武汉理工计算机和武汉大学经济学双学位,在Java和大数据开发领域有丰富的经验,曾参与铁路12306数据中台、用户画像、正晚点预测等多个系统的研发工作。 ,杨老师,高级讲师
|
||||
13年软件开发管理与教学经验;软件与大数据开发认证工程师,项目经理。曾开发管理过信诚人寿、美的、民生银行、青岛啤酒、联想、SGS e-learning、国家工商总局市场信用分类监管系统等。熟悉分布式、微服务等J2EE及大数据生态圈技术知识体系。 ,廖老师,高级讲师
|
||||
12年互联网项目研发经验及教学经验,曾担任过开发工程师、技术部经理等职务。参与国家电网系统、饿了么外卖平台、每日优鲜O2O平台的设计以及开发工作。精通Java技术栈、对SpringBoot、Redis、Rabbit MQ、ElasticSearch、Docker、Spring Cloud等有深入研究。专注于分布式、高可用、高性能、微服务架构的设计。 ,蒙老师,高级讲师
|
||||
6年软件开发经验,曾任职医惠科技开发工程师、阿里巴巴开发工程师岗位,担任过高级开发工程师、项目经理、技术经理等职位。对大型TOB项目如His系统中分布式服务、数字化架构、高并发、高可用、数据一致性等有丰富的项目经验和业务知识。 ,何老师,高级讲师
|
||||
"5年软件开发及教学经验,先后在多家IT公司担任Android、Java方向软件开发工程师,研发项目涉及,新闻,理财,社交,教育等方向对Android、Java SE、Java Web、MySQL、前端等技术深入研究。 ",杨老师,高级讲师
|
||||
13年的项目开发和教育培训经验,精通Java EE的主流开发框架、Oracle和MySQL等关系型数据库。曾在中科院遥感应用研究所、慧点科技、达利本斯等公司担任软件开发工程师、项目总监,带团队做过边防部队、人寿集团、平安集团等多个企业的大型项目,之后在互联网公司知果科技担任开发经理,完成知果果网的核心产品开发。 ,师老师,高级讲师
|
||||
北京航空航天大学 计算机毕业。10年技术开发与管理经验。曾在北大方正、知名游戏公司、互联网公司等担任研发工程师、技术经理。参与移动聊天工具企业版、新媒体营销平台、档案信息管理系统等大型项目研发工作。擅长总结和梳理知识点,深入的对每个技术点进行讲解,引导式教学,注重培养学员能力。 ,杨老师,高级讲师
|
||||
多年教学和开发经验对Java EE技术体系结构、Java Web原理有深刻理解。精通RPC、消息中间件、SSM、SpringCloud等技术,曾参与中国移动门户系统、中国移动集团客户对公平台、CRM系统、健康管理系统等。热衷于技术研究,善于用引导方式教学;授课思路清晰,注重学生基础知识掌握扎实,注重培养学生自主解决问题的能力。 ,于老师,高级讲师
|
||||
多年网络安全及密码学开发经验,服务器研发经验,曾任职国家互联网应急中心关口监测系统研发经理职位。精通Golang、C/C++程序设计,Go与区块链领域先驱者。,刘老师,高级讲师
|
||||
8年软件开发及教育相关工作; 有丰富的企业APP、网站、手游开发经验;精通大数据、Java等多种前沿技术,对Linux、MySQL、SSM框架底层、微服务架构 等有一定研究。喜欢钻研,乐于分享。学生遍及阿里、腾讯、苏宁等众多互联网企业。 ,单老师,高级讲师
|
||||
8年Java EE企业级开发经验,5年开发团队管理经验,曾任某大型国有企业项目负责人,对于构建高吞吐、低延迟、分布式架构有丰富经验。精通Java SE、Java Web、Spring、SpringMVC、MyBatis和微服务等主流技术栈。具备丰富的项目重构和性能调优实战经验。秉承“复杂问题简单化,抽象问题具体化”的教学理念,注重学员解决问题能力和个人综合能力的全方位培养。 ,李老师,高级讲师
|
||||
9年开发经验,3年授课经验。精通Oracle、DB2、MySQL等数据库使用,精通SSM、MQ、Security等框架使用,熟练Redis、SOA、微服等高可用架构的搭建。曾就职于联想信息产品(深圳)有限公司研发实验室,从事Think产品研发系统的开发与维护。工作积极认真与强烈的责任感、思维视野开阔。,叶老师,高级讲师
|
||||
"10年以上开发以及教育经验,参与过大型的电子商务购物平台、中国电信在线营业厅等项目的开发,在我友科技、杭州数云信息技术担任开发经理职位。曾参与项目:《NewBI》《我友商城》《中国电信套餐直通车》等。
|
||||
",张老师,高级讲师
|
||||
13年开发管理经验,曾先后就职于东亚银行、京东等知名企业,担任过高级开发工程师、项目经理、技术经理、架构师等职位,对电商、金融风控、反欺诈领域及互联网分布式服务、高并发、高可用、秒杀场景等有丰富项目经验和行业知识。精通JavaSE、JavaEE、SSM、Spring全家桶、分布式缓存、各种消息中间件等。对多线程、高并发的解决方案及各种开源技术有着深厚的研究。 ,李老师,高级讲师
|
||||
8年IT从业经验,先后担任高级Java软件工程师、项目组长等职务。具有扎实的Java基本功底和良好的编程规范,精通Java EE当下主流技术SSM,Spring,SpringMVC,Spring Boot,Spring Cloud,Spring Cloud Alibaba和NoSQL数据库;精通MySQL和Oracle数据库产品;多年分布式架构下的开发经验。具备扎实的技术功底、丰富的软件研发、项目管理经验及IT教学经验。幽默风趣,能充分调动学生学习的积极性与主动性,擅长以学员实际掌握情况灵活调整授课方案,以保证学员吸收最大化。 ,蒋老师,高级讲师
|
||||
11年软件开发及教学经验,曾先后在360、亚信联创、众信旅游担任研发工程师、技术经理岗位。擅长Java后台程序设计、微服务开发等,特别是在互联网领域有很强的开发经验。授课思路清晰,对学生认真负责,注重学生学习效果。研发以及录制大型新闻资讯项目《黑马头条》。 ,于老师,高级讲师
|
||||
7年教学和开发经验。精通Java EE技术体系,对Java SE、Java Web、MySQL、Redis、Spring、SpringMVC以及MyBatis等技术点有深刻理解,授课风格幽默风趣,通俗易懂,条理清晰。关注于学生的成长、极具亲和力! ,姜老师,高级讲师
|
||||
多年互联网项目与游戏项目研发和管理经验,曾带领公司团队研发电商、社交、实时视频等相关领域项目。涉及H5、Unity3D、小程序等不同平台。对Vue,React,微信小程序等都有研究并具备了丰富的项目架构与研发经验。 ,许老师,高级讲师
|
||||
"8年Java后台开发及教育工作经验,熟悉Java SE, Java EE, Spring,MyBatis, MySQL等常用框架,熟悉ELK技术栈,SpringCloud生态圈技术。熟练使用Vue.js, Node.js, Webpack等前端技术。曾在多家大型公司担任软件工程师,项目经理职务,涉及在线教育、证券交易、电商等不同领域。目前授课范围:SpringMVC, MyBatis, Lucene, Solr, SolrCloud, 乐优商城。 ",张老师,高级讲师
|
||||
10余年互联网行业开发经验及教学经验,资深架构师,精通Java、C、Python等开发语言。曾就职大型互联网公司从事电商架构相关的设计和Java开发平台以及基础中间件的设计研发工作;曾主导研发了支持高并发处理能力的API网关、高性能数据库连接池以及海量数据归档平台。授课诙谐幽默,讲解通俗易懂。 ,赵老师,高级讲师
|
||||
10年Java研发和教学经验。先后任职于东方邦信、明医众禾、开薪点、企家有道等大型互联网企业,担任高级Java研发、技术经理等职务。负责和主导过多个互联网项目,如东方汇、医德帮、薪人薪事,涉及互联网金融、PHC基层医疗生态、人资 SaaS等领域。精通Java EE基础、并发编程、Socket网络编程,对Spring全家桶、Netty、MyBatis、MySQL Oracle、Redis、SpringBoot、Dubbo、Spring Cloud & Alibaba 微服务体系、RabbitMQ、Docker、K8S等多种互联网前沿技术有深入研究,熟悉JS 、Jquery、Bootstrap、Seajs、Backbone、ElementUI等前端技术。专注细致、幽默风趣,善于结合生活化场景和实际企业场景传达知识点。 ,刘老师,高级讲师
|
||||
5年的Java软件开发经验,5年Java专业教学经验,曾参与设计与开发多个大型互联网项目,主要有质监抽检,青历商城,E贷网P2P,天艺引资评估系统等等。长期致力于Web应用的开发与设计。擅长Java EE核心技术,SSM、Dubbo、SpringBoot、SpringCloud等。 ,郭老师,高级讲师
|
||||
6年开发经验,2年教学经验。在大型互联网公司担任TL、架构师,参与设计并搭建大数据、物联网等大型项目。精通Java、SSM框架以及SpringCloud、Dubbo、MQ等微服务架构体系。 ,李老师,高级讲师
|
||||
曾任职于中国移动、平安保险。精通Java EE体系、MySQL、Android、鸿蒙等技术。主导研发《三天从0带你写一个Java小游戏》、《9天快速入门Java》、《Java从入门到起飞》、《鸿蒙应用开发》等多套课程。 ,张老师,高级讲师
|
||||
8年Java研发及教学经验,精通JavaEE技术体系,曾担任开发工程师、架构师等重要角色。参与于采帅令外卖系统、中国中铁集团道岔钢轨测量系统、中仕学社等互联网项目设计研发。曾主导过多个大型企业级项目的研发。对流行框架Spring,Redis、ElasticSearch、MQ中间件等有深入研究,熟悉以SpringCloud为核心的微服务技术体系,以及微服务在企业落地的最佳实践。授课幽默风趣,条理清晰,乐于分享,注重培养学生独立思考的能力。 ,申老师,高级讲师
|
||||
"5年的软件开发及教学经验,擅长Java EE企业级应用,参与多个大型项目的架构设计、管理、开发等工作。在互联网项目领域具备丰富的经验,掌握Java SE,Java Web、MySQL、Redis以及Spring、SpringMVC、MyBatis、SpringBoot、SpringCloud等分布式架构技术,拥有广泛的技术面与实践经验。 ",律老师,高级讲师
|
||||
多年互联网开发经验,深入理解Java虚拟机原理,以及参数优化,对SSM、SpringBoot、SpringCloud微服务等主流框架有深入的理解和使用,对实时流处理框架storm,并Kafka有多年使用经验。 ,刘老师,高级讲师
|
||||
15年研发经验,曾担任架构师、项目经理等职位,精通JavaEE技术栈,曾负责全国党员远程教育系统、云南学分银行系统、郑州地铁CCTV网管系统等项目的研发与管理工作。 ,苗老师,高级讲师
|
||||
"精通C#,Java语言开发,精通MySQL,Oracle,SQL Server数据库,SSM框架,JS,jQuery前段框架。先后在广东源恒科技有限公司、武汉付运通科技有限公司任职, 教学严谨,授课思路清晰,善于语言表达和总结行业经验,因材施教。 ",袁老师,高级讲师
|
||||
多年开发与教学经验,精通Java企业平台开发技术,熟练掌握并使用设计模式、常用开源框架、分布式架构及微服务架构,先后担任过工程师、项目经理等职位,曾参与过商城项目,主导过智慧医疗等项目研发,具备一定的项目管理经验,同时具有丰富的教学经验,授课认真负责,条理清晰。 ,李老师,高级讲师
|
||||
"10年Java开发及教学经验,曾在多家公司担任软件开发工程师,技术功底深厚,授课幽默风趣,通俗易懂。精通MySQL,Redis及Spring, MyBatis, SpringMVC, SpringBoot, SpringCloud, SpringData系列框架,并对Solr, ElasticSearch, Dubbo, MQ中间件技术有深入研究。 ",邓老师,高级讲师
|
||||
9年一线研发与教学经验,曾先后在中石油、久其软件等企业担任研发工程师,参与油气管道全生命周期建设、昆仑燃气等项目,并为国资委、兵工、航天科工等研发CI财报产品。精通Java SE、Java EE技术和SSH/SSM、SpringBoot等常用框架,对Dubbo、ZooKeeper、SpringCloud、MQ、Redis、MongoDB、MySQL等流行框架和技术有深入研究。授课逻辑清晰,通俗易懂,受到众多学生的认可和喜爱。 ,刘老师,高级讲师
|
||||
5年Java开发和项目管理经验,3年的授课经验。曾先后于北大医信、泰和诚任开发工程师、项目主管职位,精通Java语言,擅长Java EE技术栈,对MySQL数据库有深入的了解。课堂幽默风趣,善于用生活中的例子讲解Java中晦涩难懂的知识点。,翟老师,高级讲师
|
||||
多年开发和教学经验,曾在中星测控、中软国际担任开发工程师,有移动端、PC端、服务端软件等多类型软件的开发经验,授课不拘一格、深入浅出、风默有趣。精通Java开发,对JVM、数据库、Web规范、数据结构与算法以及主流应用框架均有深入研究。 ,高老师,高级讲师
|
||||
多年开发与教学经验,对Spring生态系统,消息中间件,数据库等技术有深入理解和研究。先后在陕西出版集团,中软、电讯盈科等担任过软件开发工程师职务,参与开发过书海小说网,延安英雄传后端等项目开发。 ,李老师,高级讲师
|
||||
具有8年软件开发与教学经验,具有丰富的开发经验,先后在四方精创、华侨城文旅科技、达普信科技等公司担任技术研发与项目运维等工作,先后经历过中国银行(香港)私人银行业务开发、华侨城文旅科技公司内部网站建设以及海关平台相关业务的需求研发。熟悉使用SSH、SSM、缓存Redis、MySQL、Oracle、数据报表等。 ,李老师,高级讲师
|
||||
从事Java EE开发及教学工作多年,先后在多家公司担任开发工程师,项目经理等职务。精通Redis及Spring、MyBatis、SpringMVC、SpringBoot、SpringCloud系列框架;拥有大数据项目开发经验,对Hadoop生态圈如Hive,Zookeeper,Hbase等技术有深入研究。 ,袁老师,高级讲师
|
||||
毕业于东北师范大学,五年以上开发经验,多年软件开发教育培训经验,精通Java SE、MySQL等编程开发语言和开发技术,熟练掌握各种数据结构和算法,精通Spring、SpringMVC、MyBatis等常用框架,熟练掌握微服务框架,授课风格幽默风趣,可以将知识化繁为简、深入浅出的教授给学员。 ,李老师,高级讲师
|
||||
5年软件研发与项目管理经验,精通Java开发语言以及MySQL、Oracle数据库,对Java流行框架有深入研究,曾在企业中参与主导旅游、活动竞赛等领域项目开发。在传智播客有三年教学经验,授课认真负责,注重学员独立分析能力。 ,王老师,高级讲师
|
||||
5年软件开发及教学经验,Android、Java方向软件开发工程师,研发项目涉及教育、新闻、娱乐直播等。对Android、Java SE、Java Web、MySQL、前端等技术深入研究。 ,郝老师,高级讲师
|
||||
15年研发及技术管理经验,历任Java研发、架构师、技术总监等职位。热衷于研究主流Java技术,擅长微服务、高并发、高可用、大数据处理平台的搭建及主要解决方案的落地实施。曾主持开发电商日志采集系统、电商数据开放平台、电商推荐和搜索分析系统、商家开放平台、物联网冷链监控平台等电商及物联网行业SaaS项目。 ,陈老师,高级讲师
|
||||
"985,211重点大学毕业,拥有10年的开发经验和架构设计经验,曾在几个大型公司担任系统架构师,精通系统架构,曾主持各种大型网站架构设计与开发,主持设计研发ODPS框架,SWORD框架,授课特点:幽默风趣,通俗易懂,条理清晰,层次分明。 ",张老师,高级讲师
|
||||
6年Java软件开发及相关教育工作,熟悉Java EE、Android开发,对主流框架有深入了解。曾参与过联通集成公司的4G系统和结算系统开发;主导过移动健康相关项目应用开发。传智授课3年,讲课风格诙谐幽默,倡导寓教于乐的授课方式。 ,张老师,高级讲师
|
||||
多年J2EE项目研发及管理经验,曾就职于多家大型互联网企业,有着多年的一线开发经验,涉及过互联网电商项目,移动管理系统等,企业ERP系统,手机App项目等。精通Java SE,Java Web,对Spring、MyBatis、SpringMVC、SpringBoot、Android、MySQL、Redis、Solr、Dubbo、FastDFS等有深入研究。,刘老师,高级讲师
|
||||
7年软件开发与教学经验,曾负责游戏大数据平台可视化、国家公开信息采集系统、企业电商架构构建等。精通JavaEE,SSM/SSH、Spring全家桶技术。对NoSQL、消息队列、服务集成、微服务、持续集成及各种开源技术有着深厚的研究。 ,年老师,高级讲师
|
||||
多年开发和教学经验,涉及到手机应用Web后台开发,精通Java SE、 Java EE、Android。曾多次参与开发国家版权局对应版权业务。 ,周老师,高级讲师
|
||||
6年一线研发经验,2年Java教学经验,先后负责国家电网、质检总局、广西海关等单位的多个大型系统研发,后期主做在线教育互联网项目。精通MVC框架、擅长分布式框架、微服务框架,授课有热情、思路清晰、深入浅出。主讲项目阶段及就业冲刺阶段课程。 ,陈老师,高级讲师
|
||||
"拥有5年的企业开发和教学经验,对于目前主流的SSM, SpingBoot, SpingCloud, SQL优化等常用框架和技术都有深入的理解。熟练掌握Redis缓存技术和缓存方案。对于常用的消息中间件RabbitMQ和ActiveMQ等以及定时任务等都有相关的实际开发经验。对于前端框架Vue.jsp以及其他前端基础都有所涉及。 ",黄老师,高级讲师
|
||||
"14年软件开发与教学经验,具有丰富的技术团队管理经验,精通Java
|
||||
EE(JSPServlet、JDBC、Struts2、Spring、Hibernate、MyBatis),以及 WebService、Ajax、Oracle、DB2、MySQL、NoSQL、分布式RPC框架Dubbo、SpringCloud、ZooKeeper、MQ、Redis等。曾先后参与江苏媒资系统、中信银行大客户对公平台、胜利油田办公自动化平台、武警部队信息化系统的研发与管理工作。",赵老师,高级讲师
|
||||
JAVA开发工程师。有着多年的软件开发经验,精通Java EE企业级应用技术,精通MySql、Oracle、DB2等主流关系型数据库,有着丰富的数据库开发经验。曾经参与过移动公司的实时人流监控、用户缴费日志实时分析等大数据项目,熟悉大数据相关开发。 ,徐老师,高级讲师
|
||||
10年软件研发、项目管理与教学经验,精通Java开发技术,曾先后担任过技术经理、技术主管、项目经理等职位。主导研发超过5个以上大型项目与平台。精通微服务架构,擅长解决高并发,亿级数据量等架构设计,拥有广泛的技术面与实践经验。 ,陈老师,高级讲师
|
||||
10年企业一线开发经验,在多家企业担任开发工程师,架构师,项目经理等职位,参与或主持航信-航意险,中彩网,网上商城等项目的开发。专注于JavaEE开发,深入研究Spring全家桶、MyBatis、Dobuo等流行框架,对分布式,微服务架构体系有独到的见解。 ,张老师,高级讲师
|
||||
"多年开发和教学经验,对Java EE,iOS,Android多平台的开发有丰富经验,熟练掌握SSM、Dubbo,Spring Security,SpringBoot等框架,能熟练操作Oracle,MySQL等关系型数据库, 授课幽默风趣,通俗易懂,关注学员消化吸收,注重培养学员独立分析能力及动手能力。 ",李老师,高级讲师
|
||||
12年工作经验,架构师,多年IT领域研发经验、深入的理解SSM、Redis、RocketMQ、Memcached、Quartz、Dubbo、SpringBoot、SpringCloud等框架,主要负责参与的项目《全国数字物价监测中心》、《上海浦发银行信用卡客户服务中心》、《某金融平台统一支付中心》等项目。 ,束老师,高级讲师
|
||||
"13年项目开发和教学经验,对SpringData、SpringCloud、Docker等技术有深入的研究,熟练掌握 MySQL、Redis,SSM框架、Dubbo、ZooKeeper等技术,热衷主流Java开源框架,曾参与中国电信计费系统、银行自动服务后台操作系统等。 ",唐老师,高级讲师
|
||||
"负责公司后台核心系统、资质审核系统、风控审核系统、标的审核系统的设计与搭建,对接存管银行的API,对接第三方银行机构。后台系统基于大数据的车辆对比系统、车辆实时信息联网查询、车辆电池信息以及前台基于百度地图的路径规划、动态围栏等功能。",刘老师,高级讲师
|
||||
"10年软件开发经验,曾在用友网络担任项目经理、项目总监等职务,先后主导过佛山美的集团互联网系统应用、白云国际机场商旅电商系统和深圳民太安公估保险移动互联网应用等。 精通Java EE、并发编程、SSM、SpringBoot。对CDN、NoSQL、消息队列、微服务、JVM性能调优、爬虫级各种开源框架等技术。
|
||||
",宋老师,高级讲师
|
||||
10年Java从业经验,曾负责安防视频监控系统、华为VMS语言邮箱系统、电信网络质量监控系统、溯源APP等。精通Java SE,Java EE,SSM/SSH,Spring全家桶。对NoSQL、消息队列、服务集成、流程引擎、微服务、持续集成及各种开源技术有着深厚的研究。 ,徐老师,高级讲师
|
||||
从事Java工作多年,有着丰富的开发和教学经验,Java,JSP,Servlet开发,精通SSM、Dubbo,Spring Security,Struts2,Hibernate等框架,能熟练操作Oracle,MySQL等关系型数据库。授课幽默风趣,通俗易懂。 ,杨老师,高级讲师
|
||||
从事十年大型互联网软件开发和教学工作,具有丰富的软件开发和项目管理经验,精通Java 、Swift、Python等语言,曾主导并参与多个大型项目,包括W3Mobile华为协同办公平台,迪士尼蓝牙点读笔等。具有丰富的教学经验,授课思路清晰,风趣幽默,通俗易懂。 ,张老师,高级讲师
|
||||
10多年IT从业经验,北大计算机毕业,曾在中科软从事系统分析与架构工作,精通OOM、PD、DDD建模;对计算机原理、体系结构、常用算法有深入研究;熟练分布式和微服务环境下的主流技术架构。 ,王老师,高级讲师
|
||||
"9年Java开发管理和教学,先后在STS(国外)、艾默生网络能源、华胜天成等担任过软件开发工程师、项目主管等职务。精通MySQL、Oracle等数据库以及市场流行的Web框架(SSM、Spring全家桶等)。参与完成的项目主要有:移动国漫项目、短信平台、流量平台、海外物流等。 ",王老师,高级讲师
|
||||
先后在宜信、顺丰、美菜担任研发工程师,具有近10年的Java实战研发经验,熟练掌握当今主流技术(MyBatis/iBatis、Spring、SpringMVC、Dubbo、Elasticsearch、SpringBoot、SpringCloud),曾参与研发基础医疗His、CRM、美菜搜索、顺丰优选等项目。 ,孔老师,高级讲师
|
||||
"曾担任过Android应用开发,Java企业级开发工程师,智能家居行业曾获取App软件专利,精通MyBatis,SpringMVC,Spring,Vue等当下主流的框架,以及其底层实现的原理,熟练使用MySQL,Oracle,Redis主流的数据库,有丰富的授课经验。 ",梅老师,高级讲师
|
||||
上海交大软件工程毕业,10年大型项目实战经验,精通C++、Java、JavaScript等多种主流开发语言。曾在多家500强企业任职,参与金融、物联网、通信等多个领域系统的设计和开发,对常用分布式技术和微服务架构均有深入研究。 ,夏老师,高级讲师
|
||||
曾经主导和参与过:中国银行网点规划系统、河南移动网上营业厅、郑州市科技局综合性门户网站、河南地税缴费系统等等项目。曾经参与策划和编写过多本书籍并出版发行,例如《Ajax+JSP巧学巧用》《Struts2框架开发从入门到精通》《EJB3从入门到精通》等等。 ,张老师,高级讲师
|
||||
拥有多年开发和教学经验,精通Java EE领域编程语言。 深入研究流行框架 SpringMVC、Spring、Hibernate、MyBatis、缓存系统Redis等。 教学擅长理论与实际相结合,对复杂简单化。 曾参与研发大型项目《万达慧云系统》、《建筑需求响应运营管理系统》等。 ,陆老师,高级讲师
|
||||
2015年加入黑马,精通Servlet、JSP、JavaMail、JDBC、JMS、WebService、Struts2、Spring、SpringMVC框架、MyBatis等技术;精通MySQL、Oracle数据库,Tomcat、JBoss等服务器的搭建和部署。在中软等企业从事大型Web项目的设计开发多年,曾任项目经理等职位。 ,周老师,高级讲师
|
||||
"多年软件开发及项目管理经验,精通Java、PHP、C++等编程语言;先后在北京K2地产、由米定制等企业担任开发及管理工作;精通Struts2、Spring、Hibernate、SpringMVC、MyBatis等常用框架;对Oracle,MySQL,Redis,MongoDB等数据库技术有深入研究。",王老师,高级讲师
|
||||
"拥有多年开发经验,先后在中科大洋、易宝支付等公司担任软件开发工程师等职务。热衷于研究主流Java开源框架、Web开发技术。擅长J2EE技术栈中的Spring、SpringMVC, MyBatis,SpringBoot等流行框架,以及对Tomcat, ElasticSearch, Oracle, MySQL,Redis等有深入研究。对待学生循循善诱,讲解知识深入浅出。",何老师,高级讲师
|
||||
"8年Java EE开发与教学经验,精通Java EE技术体系,对Java Web以及Spring、SpringMVC、Struts2、Hibernate、MyBatis等技术有深入的研究。对微服务架构SpringBoot、SpringData、SpringCloud、Docker等技术有系统的研究。熟练应用Vue.js,Node.js,Angular.js等前端框架。精通MySQL、Oracle、Redis、MongoDB等数据的搭建和优化等,曾参与研发黑龙江联通管理系统,国家电网项目,中国电建招标平台系统等开发与管理工作。 ",李老师,高级讲师
|
||||
"Java EE开发与教学多年,精通Java EE技术体系,对流程SSH,SSM以及潮流的SpringData、SpringCloud、Docker等技术有深入的研究。授课逻辑严谨、条理清晰,注重学生独立解决问题的能力,善于总结一类问题,启发式教学。 ",饶老师,高级讲师
|
||||
"从事软件开发工作多年,涉及.Net、HTML5、Unity3d、IOS等平台,曾参与乌龟猜猜,敲打乐敲你妹等跨平台游戏项目的开发,之后投身教育行业,授课过程中擅长使用图片、动画把知识点化繁为简,深受学员喜爱。",胡老师,高级讲师
|
||||
具备多年电商&互联网移动APP市场&物联网行业的软件开发经验,对主流MVVM技术框架(React、微信小程序、Vue),UI框架,模块化,原生JavaScript,Node.js,数据库,代码管理工具等有着丰富的实战运用经验和团队协作经验。曾带领团队完成企业级系统应用设计、搭建、开发、上线等一整套的流程。 ,王老师,高级讲师
|
||||
秉承务实、责任、创新、育人的价值观,用爱成就每一位学生。拥有多年互联网前端应用和软件开发经验,擅长React、Vue。曾就职于TalkingData、用友等公司,负责过多个大型数据分析和可视化平台、移动电商等项目架构和研发工作。,王老师,高级讲师
|
||||
6年+从业经验,精通HTML5、CSS3、JavaSript, 熟悉Vue、React、Angular、jQuery、BootStrap。 特别擅长微信小程序,首次研发和实施了完整的微信小程序商城项目。 教学细腻,能够深入浅出地剖析知识点。 ,苏老师,高级讲师
|
||||
擅长前端企业级项目,在前端领域有自己的发明专利,精通大前端各个方向技术,走在技术的前沿,授课风格幽默,课堂氛围活跃,有强烈的责任心和使命感,能将教学知识与技术应用场景相结合,做到让学生学有所成,学以致用。,潘老师,高级讲师
|
||||
毕业于211院校,曾就职于大型在线教育公司,主导通用组件库的研发,有丰富的项目开发经验和授课经验。擅长移动端开发以及前端主流框架Vue,React。上课风趣幽默,善于将晦涩难懂的术语用生活情境演绎;课后提炼重点,帮助学生高效学习。真正做到让学生知其然,并且知其所以然。 ,李老师,高级讲师
|
||||
毕业于巴黎第十三大学网络多媒体专业,旅法期间服务于艾姆家居,Cibox(法国),曾任微来中国前端负责人,主导澳蚁,趣拼标前端架构。精通JavaScript、Vue、Node.js、jQuery,熟悉 PHP、MySQL,性格乐观开朗,上课善于带动学生的学习激情。,梁老师,高级讲师
|
||||
开发全栈开发工程师,曾主导开发广铁运输、智能设备项目。精通Node.js、Webpack、Vue、React、微信小程序,热衷于技术分享,在B站上发布的微信小程序项目访问排名靠前。教学风格幽默、细腻、能快准狠的将知识传达给学生。 ,万老师,高级讲师
|
||||
多年软件开发经验,曾负责上海银行App架构搭建,主导组件化开发;曾负责51CTO主站重构,及直播平台搭建,及负责鸿蒙社区维护,熟练使用redis缓存及服务端渲染;擅长JavaScript、Vue.js技术栈。 ,赵老师,高级讲师
|
||||
拥有多年电商&互联网金融行业的软件开发经验,曾在招商基金、eims、TCL集团担任项目经理与技术负责人职位,对主流的MVVM框架、移动端开发框架、UI框架、SPA、构建工具、Serverless、代码版本管理软件、模块化、RxJS、Node.js、数据库等技术有丰富的开发和团队协同经验。 ,毛老师,高级讲师
|
||||
Web前端开发工程师,五年开发经验,两年团队管理经验,Vue资深用户。先后参加多个大型管理系统开发,负责前端技术架构,公共业务组件封装;精通前端技术体系,熟练使用CSS3、ES6、Vue、React等前端技术;授课清晰和蔼,擅长调用学员的思考能力,深受学员喜爱。 ,柴老师,高级讲师
|
||||
十年Java与Web前端项目研发与教学经验,曾担任系统架构师、前端主管等职位。曾主导和研发物联网、网银、团购、电商、金融等相关领域的项目。对Node.js、Vue、小程序、React、移动App、数据可视化等前端技术有深入研究并有丰富的项目架构和研发经验。授课条理清晰,循序渐进,深入浅出,认真耐心细致的将知识传授给每一位学员。 ,周老师,高级讲师
|
||||
八年团队研发经验,前后端各类技术均有涉猎,前后研发了多个平台级项目产品,曾担任某大型软件企业的前端架构师,负责公司平台级应用开发工具的设计和架构,精通OTO类移动开发业务,曾担任某大型餐饮软件公司前端负责人,负责公司移动端产品设计和技术架构的开发工作。,高老师,高级讲师
|
||||
Web前端工程师,多年前端开发和实践教学经验,精通JavaScript、Ajax、jQuery、Vue、React、小程序以及Node.js、PHP、MySQL等前后端技术。喜欢钻研,热爱源码封装,授课风趣幽默,由浅入深,广为好评! ,蒋老师,高级讲师
|
||||
"Web前端开发工程师,具有多年的Web开发经验和教学经验。精通HTML,CSS、JavaScript等,对于前端主流框架Vue、React等有深入研究。不但授课清晰, 而且指导超过千余名学生成功就业, 具有丰富的IT行业经验。授课风格幽默风趣,讲解课程细致深入浅出,授课过程中注重与学生的沟通,广受学生好评。 ",李老师,高级讲师
|
||||
"六年前端开发经验,曾担任核心技术团队前端负责人,对JavaScript, 小程序、Vue, Node等技术有深入的研发,擅长以项目、案例驱动教学,懂得如何以学生的角度去思考问题,亲和力强。曾参与华为云官网、华为云社区等多个大型项目的研发。 ",宋老师,高级讲师
|
||||
06年软件工程毕业,资深全栈讲师,从前端到后端架构有一套完整体系,入行前带领技术团队完成用户日增长量10W+的翼推送项目、天翼宽带、页游平台及分成系统架构和研发。讲课深入浅出,思路清晰到位,深受学生好评。,张老师,高级讲师
|
||||
多年开发经验,精通HTML5、CSS3、JavaScript、jQuery、Vue、React等前端技术,曾先后主导多个大型项目开发。授课认真负责、深入浅出,致力于让不同基础的学生日有所学、日有所长。 ,赵老师,高级讲师
|
||||
Apache Flink源码贡献者,专注大数据实时计算领域,ApacheCon Asia 2022亚洲峰会特邀讲师,张老师,高级讲师
|
||||
985计算机硕士,Apache Doris社区贡献者。 ,李老师,高级讲师
|
||||
985计算机硕士,多年深耕大数据、人工智能领域,ApacheCon Asia 2022亚洲峰会特邀讲师 ,赵老师,高级讲师
|
||||
Apache Flink源码贡献者,擅长大数据云平台技术领域。 ,曹老师,高级讲师
|
||||
Apache Pulsar社区贡献者,擅长数仓领域技术。,赵老师,高级讲师
|
||||
擅长数仓领域技术,擅长海量数据下的流式计算和离线数据处理。,闻老师,高级讲师
|
||||
10年开发和教学经验,擅长大数据实时计算,拥有互联网停车平台项目经验。 ,张老师,高级讲师
|
||||
Apache DolphinScheduler社区贡献者,擅长离线数仓领域。 ,孔老师,高级讲师
|
||||
大数据架构师,主导基于Flink的实时反欺诈风控、实时地铁故障预警等流式计算平台的设计和研发。 ,江老师,高级讲师
|
||||
擅长实时领域,拥有金融行业大数据应用架构及开发经验。 ,张老师,高级讲师
|
||||
北京大学计算机硕士,擅长大数据/人工智能领域技术。 ,原老师,高级讲师
|
||||
擅长数仓领域技术,拥有丰富的安全领域大数据行业经验。 ,谢老师,高级讲师
|
||||
大数据架构师,拥有丰富的技术攻坚经验。 ,史老师,高级讲师
|
||||
"北京化工大学工学硕士,多家互联网公司首席信息官,算法专家,具备多年机器学习,深度学习等人工智能相关算法的研发经验, 熟悉Python、Java等常用开发语言,对PyTorch,Tensorflow,PaddlePaddle等深度学习框架熟练使用,在自然语言处理应用方面具备多年的企业实践经验。 ",李老师,高级讲师
|
||||
"北京大学电子与通信工程硕士, 多年开发经验,对数据分析,大数据,机器学习,后端开发等技术有深入了解, 精通java Python等常用开发语言,在多家软件公司担任软件工程师, 项目经理, 有新浪微博,中新网新闻发布系统等多个大型项目经验。 ",原老师,高级讲师
|
||||
擅长人工智能领域技术,多年模式识别和数据挖掘开发经验,主导研发多项国家和省级科研项目,负责企业级信贷风控模型和智能医疗数据平台开发,专注ML/DL/PR/KG领域相关算法的应用,曾任职美团搜索部负责NER及Bert搜索排序优化对Hadoop、SparkTensorflow和PyTorch等大数据、人工智能技术有多年企业实践经验。 ,赵老师,高级讲师
|
||||
多年数据开发经验,曾参与过国云数据公司的数据中台开发,以及甲乙丙丁公司商品推荐系统开发。精通Python、MySQL编程语言,机器学习以及推荐相关算法,熟练应用Hadoop、Hive离线数仓等相关技术。曾担任过BI数据分析师,数据挖掘,推荐算法工程师,数据仓库工程师等数据以及算法相关岗位。 ,魏老师,高级讲师
|
||||
"负责公司后台核心系统、资质审核系统、风控审核系统、标的审核系统的设计与搭建,对接存管银行的API,对接第三方银行机构。后台系统基于大数据的车辆对比系统、车辆实时信息联网查询、车辆电池信息以及前台基于百度地图的路径规划、动态围栏等功能。 ",刘老师,高级讲师
|
||||
毕业于哈尔滨工程大学,擅长GIS行业工程实战,在图像分割、检测、追踪等方面有多年从业经历,参与多项国家级/部委级重大项目,具备丰富的工程落地经验。 ,姚老师,高级讲师
|
||||
曾就职于多家上市公司,并担任高级算法工程师、算法专家。研究领域包括NLP、目标检测、视频跟踪、大语言模型、多模态、模型推理加速等。,李老师,高级讲师
|
||||
6年开发经验,2年教学经验。在大型互联网公司担任TL、架构师,参与设计并搭建大数据、物联网等大型项目。精通Java、SSM框架以及SpringCloud、Dubbo、MQ等微服务架构体系。 ,李老师,高级讲师
|
||||
拥有10年设计经验,曾服务北京正邦品牌策划,百度用户体验中心,北京资海科技集团,龙采科技集团,优逸客科技教育集团,洛杉矶张大钦律师事务所等众多一线互联网设计与外包平台。担任视觉设计师。致力于研究视觉界面设计、品牌服务设计、人机交互等方向,项目经验丰富。,刘老师,高级讲师
|
||||
拥有7年的设计经验,视觉传达设计专业。精通photoshop、illusstrator,Axure等常用设计软件,熟练Html,CSS等前端技术。曾创办独立设计工作室。授课风格幽默,思路清晰。时常告诫学生莫忘设计初心,才能方得始终! ,闫老师,高级讲师
|
||||
拥有8年设计经验,艺术设计专业出身。涉及领域包括UI设计、网站设计、平面印刷广告、包装设计、手绘、VI设计等。曾为武汉理工孵化器、中国语料库等知名企业设计VI视觉识别系统,进行企业品牌推广。具备多年的网页与APP项目开发经验。 ,李老师,高级讲师
|
||||
从事设计行业7年,精通多种设计软件。曾在广告公司担任设计师及设计总监。服务企业有深圳海洋局、招商地产、香港珠宝、华润集团等。2012年投身教育事业,深受广大学员爱戴。,曾老师,高级讲师
|
||||
毕业于中央美术学院视觉传达专业,从事艺术设计类行业多年,曾任设计主管职位。主导进行7个品牌的提案,建立,维护,及后期升级的全案设计,服务于瑞幸,星巴克等30多个品牌。涉及品牌的线上视觉,线下电商,三维视觉,包装设计等,项目实战经验丰富。 ,张老师,高级讲师
|
||||
国内设计色彩搭配知名博主、Adobe认证讲师、十三年工作经验。擅长软件:Photoshop、Illustrator、CorelDRAW、Sketch、AxureRP、C4D、Principle等。服务过的部分知名客户:欧莱雅、万科地产、嘉顿食品、盛大游戏、宝马、阿里巴巴、新浪网、腾讯大粤网、福特汽车等。 ,高老师,高级讲师
|
||||
14年互联网设计经验,首批移动互联网UI设计师,知名WEB设计师,对视觉设计、平面设计、界面设计、体验设计、交互设计、动画制作、影视后期等多方面有研究。曾主导腾讯、小米、IBM、路虎汽车、湖南卫视、上海世博会等500强企业互联网项目。,谭老师,高级讲师
|
||||
10年Linux平台互联网开发经验,业界资深讲师。精通Linux内核开发、内核系统移植、ARM SOC体系结构设计、C/C++、Python、JavaScript、LISP、ARM/X86汇编等编程语言,全栈工程师对计算机原理从上到下融会贯通。,邢老师,高级讲师
|
||||
10年以上软件测试开发、质量管理、项目管理经验,熟练使用Java、Python开发语言,擅长功能测试、接口测试、自动化测试、性能测试等;具有丰富的教学经验,授课幽默风趣,能积极调动学员的积极性。,许老师,高级讲师
|
||||
8年软件测试及项目管理经验,曾任职于蔚来汽车研发部门,在大型电商公司负责测试平台开发,测试管理工作;参与测试的项目有互联网出行,电子商务等,拥有丰富的Java和Python语言经验,有性能测试、自动化测试及测试平台的开发经验。,张老师,高级讲师
|
||||
"从事软件开发和软件测试10年, 熟悉软件项目开发和软件测试流程;熟悉多种测试环境搭建,熟练操作数据库,熟悉Java、Python、H5语言,对Web端自动化测试、接口测试有着丰富经验积累,并有丰富企培、校企合作专业领域培训经验。",刘老师,高级讲师
|
||||
8年以上软件测试及项目管理经验,能熟练运用Python等编程语言,擅长功能测试,接口测试,及自动化测试,能够通过丰富的项目实战经验激发学生学习兴趣,培养学生独立思考和解决问题的能力。具有国家信息系统项目管理师认证、ACP认证。 ,朱老师,高级讲师
|
||||
7年互联网营销推广工作经验,对互联网+新营销有独到的见解和认知,曾为中国移动,中国电信,爱立信一线运营部进行营销培训,擅长以营销思维和锋利的创意解决问题。讲课风格循序渐进,让受训学员看得见、摸得着、体验得到的亲民教学,落到实处。 ,梁老师,高级讲师
|
||||
6年的互联网新媒体运营经验,对文字营销和视频营销有着独到的见解,为上百家企业做过新媒体营销策划方案。曾用2个月的时间孵化出0到200万粉丝的种草推荐类短视频账号,对热点事件极具敏感性。擅长讲故事,脱口秀,授课方式轻松幽默。 ,刘老师,高级讲师
|
||||
互联网行业从业十多年,曾任网站和电子杂志主编、市场营销主管、媒介主管、文案策划主管,文案策划、市场营销、网站运营和媒介监测经验丰富。曾主导腾讯游戏发布、腾讯动漫整体媒介营销、设计行业大赛策划和运营。,吴老师,高级讲师
|
||||
5年短视频直播实战经验,曾担任多家企业运营总监、操盘手。擅长流量整体规划操盘、0-1IP孵化及流量变现。孵化的蓝V账号曾在抖音星图地产赛道排名第一,授课风格幽默严谨,擅长用案例启发思考。 ,张老师,高级讲师
|
||||
致力于网站策划运营、网络营销推广SEM/SEO、淘宝SEO、QQ群优化、数据分析等媒体方面的研究与实践五年有余,经验丰富。在多家高热度网站拥有个人专栏,为众多互联网从业人员提供免费职业辅导和工作指导,拥有忠实粉丝圈,口碑在业界广为人知。,王老师,高级讲师
|
||||
6年新媒体短视频运营经验,创业做过新媒体代运营公司,有丰富的甲方、乙方、MCN机构等行业经验,在短视频以及公众号时代分别做过百万+、十万+等大小账号,合作过的企业有:招行、中行、平安、华润三九、达利、泰康、晨光、东鹏特饮等,擅长新媒体短视频ip孵化,直播带货等新媒体知识分享。 ,彭老师,高级讲师
|
||||
红粉笔联合创始人,曾在中山大学、河北经贸大学等二十余所高校讲授短视频、电商实训项目。具有多个领域商业变现经验,涵盖教育、娱乐、剧情、旅行、电商种草等领域。指导众多学员项目案例,学员零启动美食账号单个作品涨粉20W+、零启动新奇特特效账号首作品流量破100w+,单条带货视频变现8W+。,尹老师,高级讲师
|
||||
四年短视频和直播运营经验,创过业,北抖会创始人,哈尔滨工业大学毕业,曾任百度、苏宁AI架构师和AI产品经理,擅长知识类IP孵化和AI创作爆款短视频。运营过2个百万粉丝大号和3个同时在线过千人的直播间,创作过单条超一千万播放量和多条过百万播放量的爆款短视频,研发课程《AI制作爆款短视频》,抖音账号:野哥玩AI。,赵老师,高级讲师
|
||||
毕业于中国矿业大学新闻传播学,8年文旅媒体行业从业经验。从传统电视媒体转型新媒体短视频行业,深耕短视频及直播运营,擅长社交媒体平台机制及玩法,主导从0-1短视频矩阵及直播商业化变现。擅长以短视频及社群赋能上市旅企高质量发展,媒体营销实战经验丰富,操盘10+上市旅企的新媒体及社群运营工作,包括八达岭长城、天山天池、峨眉山、国际大巴扎、少林寺等5A、4A世界级旅游度假型景区。,宋老师,高级讲师
|
||||
中国开放原子开源基金会银牌讲师,广东省人力资源与社会保障厅师资培训基地人工智能、区块链技术专家讲师。广东省职业教育“双师型”名师工作室成员,厦门大学大数据实验室百家讲坛特邀讲师。多本畅销软件开发教材的编写人和审稿人,国家十三五教材审稿人,在教学方面有丰富的经验。 ,张老师,高级讲师
|
||||
开放原子开源基金会银牌讲师。具有多年常见指令集架构下的单片机外设、嵌入式操作系统下的开发经验。擅长机器视觉、深度学习结合各类传感器在智能机器人领域的应用。多年的教学经验,在基础理论和高等数学等方向的教学有深入的研究。 ,李老师,高级讲师
|
||||
"开放原子开源基金会银牌讲师,毕业于深圳大学,10年的软件开发,机器学习和人工智能算法研发经验。精通C/C++, Python等开发语言, 对智能机器人以及Pytorch有丰富的项目授课经验,对项目全流程开发有丰富的架构经验,对算法工程实践有深入的研究。授课风格风趣幽默, 知识讲解结构层次分明。 ",肖老师,高级讲师
|
||||
"传智教育高级讲师,拥有多年智能机器人、物联网行业经验。精通C++、C、Python、Java等多种编程语言,精通ROS机器人操作系统,对机器人开发有深入研究。 ",吴老师,高级讲师
|
||||
传智教育高级讲师,鸿蒙开发先行者。精通C、C++、Python、Java等多种编程语言,熟悉电子电路设计,拥有多年物联网行业经验,精通ROS机器人操作系统,对机器人开发有一定见解。教学方面能够激发学生的学习兴趣,帮助他们发掘自己的潜能,能够为学生提供专业的技术指导,帮助他们解决实际开发中遇到的问题。 ,肖老师,高级讲师
|
||||
中国开放原子开源基金会银牌讲师,擅长物联网协议、物联网设备的软硬件开发部署。精于通过编写优质C、C++代码,实现芯片功能。对智能硬件设计开发、机器人运动控制、传感器数据处理、人工智能算法有深入的理解和丰富的经验。 ,唐老师,高级讲师
|
||||
高级讲师,资深开发工程师,10多年研发经验和教学经验。精通C/C++/Python语言编程,对GTK+、Qt图形界面编程有深入研究,精通嵌入式、物联网开发,熟练自动化测试。拥有丰富的授课及培训技巧,讲课幽默生动有趣,深入浅出。,江老师,高级讲师
|
||||
精通C、C++、Python、Java等多种编程语言,擅长基于ROS下的工业互联网智能化开发,尤其在激光雕刻等工控机方向有较丰富的经验。熟悉物联网开发,多年教学经验,擅长启发学生思考,课堂风格轻松有趣。 ,覃老师,高级讲师
|
||||
|
@@ -0,0 +1,148 @@
|
||||
[
|
||||
{"name": "罗老师", "title": "高级讲师", "info": "11年JAVA开发经验,9年技术团队管理经验,7年项目架构经验(系统架构师职称),在电子政务、企业办公、电子商务、社交网络、移动应用、互金等行业软件方面拥有丰富的项目实战经验。先后在飞利信、高伟达、成都融特、日立、人人贷担任部门经理、TeamLeader、技术经理、Senior Consultant、技术经理,负责全国政协门户、Wradio+、Chinaface、居然在线、个贷官网、人人U学、Crawlers、Babel大数据平台等项目研发。\t\t\t\t\t\t"},
|
||||
{"name": "姜老师", "title": "高级讲师", "info": "擅长Java EE企业级应用,十余年项目管理经验,曾担任开发工程师,架构师等重要角色。主导多个大型项目的架构设计、管理等工作。在互联网项目领域具备丰富的经验,精通微服务架构,擅长解决高并发,亿级数据量等架构设计,拥有广泛的技术面与实践经验。"},
|
||||
{"name": "陈老师", "title": "高级讲师", "info": "11年互联网项目研发经验及教学经验,曾担任过开发工程师,技术部经理等职务。精通Java技术栈、对SpringBoot、Redis、RabbitMQ、ElasticSearch、Docker、SpringCloud等有深入研究。专注于分布式、高可用、高性能、微服务架构的设计。"},
|
||||
{"name": "石老师", "title": "高级讲师", "info": "具有多年开发经验,曾先后在重庆科普、微创、爱奇艺等多家软件公司担任软件开发工程师、项目经理等职务,精通Java EE、Struts2、Spring、Hibernate应用开发。"},
|
||||
{"name": "郭老师", "title": "高级讲师", "info": "6年软件开发与教学经验,曾在北京航天四创、中科软等单位担任研发经理职务,曾主持参与浙江省机要系统、国家电网系统、济宁市医疗平台、中国电信网络规划平台、人民日报社扶贫APP的设计开发工作。精通分布式、大数据、微服务、高并发、高可用等相关技术架构。\t\t\t\t\t\t"},
|
||||
{"name": "付老师", "title": "高级讲师", "info": "多年开发和教学经验,曾在三星(中国)参与过大型跨国项目担任开发工程师,授课风趣幽默,精通Java EE、数据库、MyBatis、Spring全家桶等企业级常用技术;分布式系统开发经验丰富,对SpringBoot、SpringCloud、Dubbo、ZooKeeper 、ES、Solr、Redis、MQ等分布式技术深有研究。\t\t\t\t\t\t"},
|
||||
{"name": "杨老师", "title": "高级讲师", "info": "10年软件开发和教学经验,武汉理工计算机和武汉大学经济学双学位,在Java和大数据开发领域有丰富的经验,曾参与铁路12306数据中台、用户画像、正晚点预测等多个系统的研发工作。\t\t\t\t\t\t\t"},
|
||||
{"name": "廖老师", "title": "高级讲师", "info": "13年软件开发管理与教学经验;软件与大数据开发认证工程师,项目经理。曾开发管理过信诚人寿、美的、民生银行、青岛啤酒、联想、SGS e-learning、国家工商总局市场信用分类监管系统等。熟悉分布式、微服务等J2EE及大数据生态圈技术知识体系。\t\t\t\t\t\t\t"},
|
||||
{"name": "蒙老师", "title": "高级讲师", "info": "12年互联网项目研发经验及教学经验,曾担任过开发工程师、技术部经理等职务。参与国家电网系统、饿了么外卖平台、每日优鲜O2O平台的设计以及开发工作。精通Java技术栈、对SpringBoot、Redis、Rabbit MQ、ElasticSearch、Docker、Spring Cloud等有深入研究。专注于分布式、高可用、高性能、微服务架构的设计。\t\t\t\t\t\t"},
|
||||
{"name": "何老师", "title": "高级讲师", "info": "6年软件开发经验,曾任职医惠科技开发工程师、阿里巴巴开发工程师岗位,担任过高级开发工程师、项目经理、技术经理等职位。对大型TOB项目如His系统中分布式服务、数字化架构、高并发、高可用、数据一致性等有丰富的项目经验和业务知识。\t\t\t\t\t\t"},
|
||||
{"name": "杨老师", "title": "高级讲师", "info": "5年软件开发及教学经验,先后在多家IT公司担任Android、Java方向软件开发工程师,研发项目涉及,新闻,理财,社交,教育等方向对Android、Java SE、Java Web、MySQL、前端等技术深入研究。\t\t\t\t\t\t"},
|
||||
{"name": "师老师", "title": "高级讲师", "info": "13年的项目开发和教育培训经验,精通Java EE的主流开发框架、Oracle和MySQL等关系型数据库。曾在中科院遥感应用研究所、慧点科技、达利本斯等公司担任软件开发工程师、项目总监,带团队做过边防部队、人寿集团、平安集团等多个企业的大型项目,之后在互联网公司知果科技担任开发经理,完成知果果网的核心产品开发。\t\t\t\t\t\t"},
|
||||
{"name": "杨老师", "title": "高级讲师", "info": "北京航空航天大学 计算机毕业。10年技术开发与管理经验。曾在北大方正、知名游戏公司、互联网公司等担任研发工程师、技术经理。参与移动聊天工具企业版、新媒体营销平台、档案信息管理系统等大型项目研发工作。擅长总结和梳理知识点,深入的对每个技术点进行讲解,引导式教学,注重培养学员能力。\t\t\t\t\t\t"},
|
||||
{"name": "于老师", "title": "高级讲师", "info": "多年教学和开发经验对Java EE技术体系结构、Java Web原理有深刻理解。精通RPC、消息中间件、SSM、SpringCloud等技术,曾参与中国移动门户系统、中国移动集团客户对公平台、CRM系统、健康管理系统等。热衷于技术研究,善于用引导方式教学;授课思路清晰,注重学生基础知识掌握扎实,注重培养学生自主解决问题的能力。\t\t"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "多年网络安全及密码学开发经验,服务器研发经验,曾任职国家互联网应急中心关口监测系统研发经理职位。精通Golang、C/C++程序设计,Go与区块链领域先驱者。"},
|
||||
{"name": "单老师", "title": "高级讲师", "info": "8年软件开发及教育相关工作; 有丰富的企业APP、网站、手游开发经验;精通大数据、Java等多种前沿技术,对Linux、MySQL、SSM框架底层、微服务架构 等有一定研究。喜欢钻研,乐于分享。学生遍及阿里、腾讯、苏宁等众多互联网企业。\t\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "8年Java EE企业级开发经验,5年开发团队管理经验,曾任某大型国有企业项目负责人,对于构建高吞吐、低延迟、分布式架构有丰富经验。精通Java SE、Java Web、Spring、SpringMVC、MyBatis和微服务等主流技术栈。具备丰富的项目重构和性能调优实战经验。秉承“复杂问题简单化,抽象问题具体化”的教学理念,注重学员解决问题能力和个人综合能力的全方位培养。\t\t\t\t\t\t"},
|
||||
{"name": "叶老师", "title": "高级讲师", "info": "9年开发经验,3年授课经验。精通Oracle、DB2、MySQL等数据库使用,精通SSM、MQ、Security等框架使用,熟练Redis、SOA、微服等高可用架构的搭建。曾就职于联想信息产品(深圳)有限公司研发实验室,从事Think产品研发系统的开发与维护。工作积极认真与强烈的责任感、思维视野开阔。"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "10年以上开发以及教育经验,参与过大型的电子商务购物平台、中国电信在线营业厅等项目的开发,在我友科技、杭州数云信息技术担任开发经理职位。曾参与项目:《NewBI》《我友商城》《中国电信套餐直通车》等。\r\n"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "13年开发管理经验,曾先后就职于东亚银行、京东等知名企业,担任过高级开发工程师、项目经理、技术经理、架构师等职位,对电商、金融风控、反欺诈领域及互联网分布式服务、高并发、高可用、秒杀场景等有丰富项目经验和行业知识。精通JavaSE、JavaEE、SSM、Spring全家桶、分布式缓存、各种消息中间件等。对多线程、高并发的解决方案及各种开源技术有着深厚的研究。\t\t\t\t\t\t"},
|
||||
{"name": "蒋老师", "title": "高级讲师", "info": "8年IT从业经验,先后担任高级Java软件工程师、项目组长等职务。具有扎实的Java基本功底和良好的编程规范,精通Java EE当下主流技术SSM,Spring,SpringMVC,Spring Boot,Spring Cloud,Spring Cloud Alibaba和NoSQL数据库;精通MySQL和Oracle数据库产品;多年分布式架构下的开发经验。具备扎实的技术功底、丰富的软件研发、项目管理经验及IT教学经验。幽默风趣,能充分调动学生学习的积极性与主动性,擅长以学员实际掌握情况灵活调整授课方案,以保证学员吸收最大化。\t\t\t\t\t\t"},
|
||||
{"name": "于老师", "title": "高级讲师", "info": "11年软件开发及教学经验,曾先后在360、亚信联创、众信旅游担任研发工程师、技术经理岗位。擅长Java后台程序设计、微服务开发等,特别是在互联网领域有很强的开发经验。授课思路清晰,对学生认真负责,注重学生学习效果。研发以及录制大型新闻资讯项目《黑马头条》。\t\t\t\t\t\t"},
|
||||
{"name": "姜老师", "title": "高级讲师", "info": "7年教学和开发经验。精通Java EE技术体系,对Java SE、Java Web、MySQL、Redis、Spring、SpringMVC以及MyBatis等技术点有深刻理解,授课风格幽默风趣,通俗易懂,条理清晰。关注于学生的成长、极具亲和力!\t\t\t\t\t\t"},
|
||||
{"name": "许老师", "title": "高级讲师", "info": "多年互联网项目与游戏项目研发和管理经验,曾带领公司团队研发电商、社交、实时视频等相关领域项目。涉及H5、Unity3D、小程序等不同平台。对Vue,React,微信小程序等都有研究并具备了丰富的项目架构与研发经验。\t\t\t\t\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "8年Java后台开发及教育工作经验,熟悉Java SE, Java EE, Spring,MyBatis, MySQL等常用框架,熟悉ELK技术栈,SpringCloud生态圈技术。熟练使用Vue.js, Node.js, Webpack等前端技术。曾在多家大型公司担任软件工程师,项目经理职务,涉及在线教育、证券交易、电商等不同领域。目前授课范围:SpringMVC, MyBatis, Lucene, Solr, SolrCloud, 乐优商城。\t\t\t\t\t"},
|
||||
{"name": "赵老师", "title": "高级讲师", "info": "10余年互联网行业开发经验及教学经验,资深架构师,精通Java、C、Python等开发语言。曾就职大型互联网公司从事电商架构相关的设计和Java开发平台以及基础中间件的设计研发工作;曾主导研发了支持高并发处理能力的API网关、高性能数据库连接池以及海量数据归档平台。授课诙谐幽默,讲解通俗易懂。\t\t\t\t\t\t\t"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "10年Java研发和教学经验。先后任职于东方邦信、明医众禾、开薪点、企家有道等大型互联网企业,担任高级Java研发、技术经理等职务。负责和主导过多个互联网项目,如东方汇、医德帮、薪人薪事,涉及互联网金融、PHC基层医疗生态、人资 SaaS等领域。精通Java EE基础、并发编程、Socket网络编程,对Spring全家桶、Netty、MyBatis、MySQL Oracle、Redis、SpringBoot、Dubbo、Spring Cloud & Alibaba 微服务体系、RabbitMQ、Docker、K8S等多种互联网前沿技术有深入研究,熟悉JS 、Jquery、Bootstrap、Seajs、Backbone、ElementUI等前端技术。专注细致、幽默风趣,善于结合生活化场景和实际企业场景传达知识点。\t\t\t\t\t\t"},
|
||||
{"name": "郭老师", "title": "高级讲师", "info": "5年的Java软件开发经验,5年Java专业教学经验,曾参与设计与开发多个大型互联网项目,主要有质监抽检,青历商城,E贷网P2P,天艺引资评估系统等等。长期致力于Web应用的开发与设计。擅长Java EE核心技术,SSM、Dubbo、SpringBoot、SpringCloud等。\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "6年开发经验,2年教学经验。在大型互联网公司担任TL、架构师,参与设计并搭建大数据、物联网等大型项目。精通Java、SSM框架以及SpringCloud、Dubbo、MQ等微服务架构体系。\t\t\t\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "曾任职于中国移动、平安保险。精通Java EE体系、MySQL、Android、鸿蒙等技术。主导研发《三天从0带你写一个Java小游戏》、《9天快速入门Java》、《Java从入门到起飞》、《鸿蒙应用开发》等多套课程。\t\t\t\t"},
|
||||
{"name": "申老师", "title": "高级讲师", "info": "8年Java研发及教学经验,精通JavaEE技术体系,曾担任开发工程师、架构师等重要角色。参与于采帅令外卖系统、中国中铁集团道岔钢轨测量系统、中仕学社等互联网项目设计研发。曾主导过多个大型企业级项目的研发。对流行框架Spring,Redis、ElasticSearch、MQ中间件等有深入研究,熟悉以SpringCloud为核心的微服务技术体系,以及微服务在企业落地的最佳实践。授课幽默风趣,条理清晰,乐于分享,注重培养学生独立思考的能力。\t\t\t\t\t"},
|
||||
{"name": "律老师", "title": "高级讲师", "info": "5年的软件开发及教学经验,擅长Java EE企业级应用,参与多个大型项目的架构设计、管理、开发等工作。在互联网项目领域具备丰富的经验,掌握Java SE,Java Web、MySQL、Redis以及Spring、SpringMVC、MyBatis、SpringBoot、SpringCloud等分布式架构技术,拥有广泛的技术面与实践经验。\t\t\t\t\t\t"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "多年互联网开发经验,深入理解Java虚拟机原理,以及参数优化,对SSM、SpringBoot、SpringCloud微服务等主流框架有深入的理解和使用,对实时流处理框架storm,并Kafka有多年使用经验。\t\t\t\t\t\t\t"},
|
||||
{"name": "苗老师", "title": "高级讲师", "info": "15年研发经验,曾担任架构师、项目经理等职位,精通JavaEE技术栈,曾负责全国党员远程教育系统、云南学分银行系统、郑州地铁CCTV网管系统等项目的研发与管理工作。\t\t\t\t\t\t\t"},
|
||||
{"name": "袁老师", "title": "高级讲师", "info": "精通C#,Java语言开发,精通MySQL,Oracle,SQL Server数据库,SSM框架,JS,jQuery前段框架。先后在广东源恒科技有限公司、武汉付运通科技有限公司任职, 教学严谨,授课思路清晰,善于语言表达和总结行业经验,因材施教。\t\t\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "多年开发与教学经验,精通Java企业平台开发技术,熟练掌握并使用设计模式、常用开源框架、分布式架构及微服务架构,先后担任过工程师、项目经理等职位,曾参与过商城项目,主导过智慧医疗等项目研发,具备一定的项目管理经验,同时具有丰富的教学经验,授课认真负责,条理清晰。\t\t\t\t\t\t"},
|
||||
{"name": "邓老师", "title": "高级讲师", "info": "10年Java开发及教学经验,曾在多家公司担任软件开发工程师,技术功底深厚,授课幽默风趣,通俗易懂。精通MySQL,Redis及Spring, MyBatis, SpringMVC, SpringBoot, SpringCloud, SpringData系列框架,并对Solr, ElasticSearch, Dubbo, MQ中间件技术有深入研究。\t\t\t\t\t\t"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "9年一线研发与教学经验,曾先后在中石油、久其软件等企业担任研发工程师,参与油气管道全生命周期建设、昆仑燃气等项目,并为国资委、兵工、航天科工等研发CI财报产品。精通Java SE、Java EE技术和SSH/SSM、SpringBoot等常用框架,对Dubbo、ZooKeeper、SpringCloud、MQ、Redis、MongoDB、MySQL等流行框架和技术有深入研究。授课逻辑清晰,通俗易懂,受到众多学生的认可和喜爱。\t\t\t\t\t\t\t"},
|
||||
{"name": "翟老师", "title": "高级讲师", "info": "5年Java开发和项目管理经验,3年的授课经验。曾先后于北大医信、泰和诚任开发工程师、项目主管职位,精通Java语言,擅长Java EE技术栈,对MySQL数据库有深入的了解。课堂幽默风趣,善于用生活中的例子讲解Java中晦涩难懂的知识点。"},
|
||||
{"name": "高老师", "title": "高级讲师", "info": "多年开发和教学经验,曾在中星测控、中软国际担任开发工程师,有移动端、PC端、服务端软件等多类型软件的开发经验,授课不拘一格、深入浅出、风默有趣。精通Java开发,对JVM、数据库、Web规范、数据结构与算法以及主流应用框架均有深入研究。\t\t\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "多年开发与教学经验,对Spring生态系统,消息中间件,数据库等技术有深入理解和研究。先后在陕西出版集团,中软、电讯盈科等担任过软件开发工程师职务,参与开发过书海小说网,延安英雄传后端等项目开发。\t\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "具有8年软件开发与教学经验,具有丰富的开发经验,先后在四方精创、华侨城文旅科技、达普信科技等公司担任技术研发与项目运维等工作,先后经历过中国银行(香港)私人银行业务开发、华侨城文旅科技公司内部网站建设以及海关平台相关业务的需求研发。熟悉使用SSH、SSM、缓存Redis、MySQL、Oracle、数据报表等。\t\t\t\t\t\t"},
|
||||
{"name": "袁老师", "title": "高级讲师", "info": "从事Java EE开发及教学工作多年,先后在多家公司担任开发工程师,项目经理等职务。精通Redis及Spring、MyBatis、SpringMVC、SpringBoot、SpringCloud系列框架;拥有大数据项目开发经验,对Hadoop生态圈如Hive,Zookeeper,Hbase等技术有深入研究。\t\t\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "毕业于东北师范大学,五年以上开发经验,多年软件开发教育培训经验,精通Java SE、MySQL等编程开发语言和开发技术,熟练掌握各种数据结构和算法,精通Spring、SpringMVC、MyBatis等常用框架,熟练掌握微服务框架,授课风格幽默风趣,可以将知识化繁为简、深入浅出的教授给学员。\t\t\t\t\t\t"},
|
||||
{"name": "王老师", "title": "高级讲师", "info": "5年软件研发与项目管理经验,精通Java开发语言以及MySQL、Oracle数据库,对Java流行框架有深入研究,曾在企业中参与主导旅游、活动竞赛等领域项目开发。在传智播客有三年教学经验,授课认真负责,注重学员独立分析能力。\t\t\t\t\t\t\t"},
|
||||
{"name": "郝老师", "title": "高级讲师", "info": "5年软件开发及教学经验,Android、Java方向软件开发工程师,研发项目涉及教育、新闻、娱乐直播等。对Android、Java SE、Java Web、MySQL、前端等技术深入研究。\t\t\t\t\t\t"},
|
||||
{"name": "陈老师", "title": "高级讲师", "info": "15年研发及技术管理经验,历任Java研发、架构师、技术总监等职位。热衷于研究主流Java技术,擅长微服务、高并发、高可用、大数据处理平台的搭建及主要解决方案的落地实施。曾主持开发电商日志采集系统、电商数据开放平台、电商推荐和搜索分析系统、商家开放平台、物联网冷链监控平台等电商及物联网行业SaaS项目。\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "985,211重点大学毕业,拥有10年的开发经验和架构设计经验,曾在几个大型公司担任系统架构师,精通系统架构,曾主持各种大型网站架构设计与开发,主持设计研发ODPS框架,SWORD框架,授课特点:幽默风趣,通俗易懂,条理清晰,层次分明。\t\t\t\t\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "6年Java软件开发及相关教育工作,熟悉Java EE、Android开发,对主流框架有深入了解。曾参与过联通集成公司的4G系统和结算系统开发;主导过移动健康相关项目应用开发。传智授课3年,讲课风格诙谐幽默,倡导寓教于乐的授课方式。\t\t\t\t\t\t\t"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "多年J2EE项目研发及管理经验,曾就职于多家大型互联网企业,有着多年的一线开发经验,涉及过互联网电商项目,移动管理系统等,企业ERP系统,手机App项目等。精通Java SE,Java Web,对Spring、MyBatis、SpringMVC、SpringBoot、Android、MySQL、Redis、Solr、Dubbo、FastDFS等有深入研究。"},
|
||||
{"name": "年老师", "title": "高级讲师", "info": "7年软件开发与教学经验,曾负责游戏大数据平台可视化、国家公开信息采集系统、企业电商架构构建等。精通JavaEE,SSM/SSH、Spring全家桶技术。对NoSQL、消息队列、服务集成、微服务、持续集成及各种开源技术有着深厚的研究。\t\t\t\t\t"},
|
||||
{"name": "周老师", "title": "高级讲师", "info": "多年开发和教学经验,涉及到手机应用Web后台开发,精通Java SE、 Java EE、Android。曾多次参与开发国家版权局对应版权业务。\t\t\t\t\t\t"},
|
||||
{"name": "陈老师", "title": "高级讲师", "info": "6年一线研发经验,2年Java教学经验,先后负责国家电网、质检总局、广西海关等单位的多个大型系统研发,后期主做在线教育互联网项目。精通MVC框架、擅长分布式框架、微服务框架,授课有热情、思路清晰、深入浅出。主讲项目阶段及就业冲刺阶段课程。\t\t\t\t\t\t\t"},
|
||||
{"name": "黄老师", "title": "高级讲师", "info": "拥有5年的企业开发和教学经验,对于目前主流的SSM, SpingBoot, SpingCloud, SQL优化等常用框架和技术都有深入的理解。熟练掌握Redis缓存技术和缓存方案。对于常用的消息中间件RabbitMQ和ActiveMQ等以及定时任务等都有相关的实际开发经验。对于前端框架Vue.jsp以及其他前端基础都有所涉及。\t\t\t\t\t\t"},
|
||||
{"name": "赵老师", "title": "高级讲师", "info": "14年软件开发与教学经验,具有丰富的技术团队管理经验,精通Java \r\n EE(JSPServlet、JDBC、Struts2、Spring、Hibernate、MyBatis),以及 WebService、Ajax、Oracle、DB2、MySQL、NoSQL、分布式RPC框架Dubbo、SpringCloud、ZooKeeper、MQ、Redis等。曾先后参与江苏媒资系统、中信银行大客户对公平台、胜利油田办公自动化平台、武警部队信息化系统的研发与管理工作。"},
|
||||
{"name": "徐老师", "title": "高级讲师", "info": "JAVA开发工程师。有着多年的软件开发经验,精通Java EE企业级应用技术,精通MySql、Oracle、DB2等主流关系型数据库,有着丰富的数据库开发经验。曾经参与过移动公司的实时人流监控、用户缴费日志实时分析等大数据项目,熟悉大数据相关开发。\t\t\t\t\t\t\t"},
|
||||
{"name": "陈老师", "title": "高级讲师", "info": "10年软件研发、项目管理与教学经验,精通Java开发技术,曾先后担任过技术经理、技术主管、项目经理等职位。主导研发超过5个以上大型项目与平台。精通微服务架构,擅长解决高并发,亿级数据量等架构设计,拥有广泛的技术面与实践经验。\t\t\t\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "10年企业一线开发经验,在多家企业担任开发工程师,架构师,项目经理等职位,参与或主持航信-航意险,中彩网,网上商城等项目的开发。专注于JavaEE开发,深入研究Spring全家桶、MyBatis、Dobuo等流行框架,对分布式,微服务架构体系有独到的见解。\t\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "多年开发和教学经验,对Java EE,iOS,Android多平台的开发有丰富经验,熟练掌握SSM、Dubbo,Spring Security,SpringBoot等框架,能熟练操作Oracle,MySQL等关系型数据库, 授课幽默风趣,通俗易懂,关注学员消化吸收,注重培养学员独立分析能力及动手能力。\t\t\t\t\t\t\t"},
|
||||
{"name": "束老师", "title": "高级讲师", "info": "12年工作经验,架构师,多年IT领域研发经验、深入的理解SSM、Redis、RocketMQ、Memcached、Quartz、Dubbo、SpringBoot、SpringCloud等框架,主要负责参与的项目《全国数字物价监测中心》、《上海浦发银行信用卡客户服务中心》、《某金融平台统一支付中心》等项目。\t\t\t\t\t\t"},
|
||||
{"name": "唐老师", "title": "高级讲师", "info": "13年项目开发和教学经验,对SpringData、SpringCloud、Docker等技术有深入的研究,熟练掌握 MySQL、Redis,SSM框架、Dubbo、ZooKeeper等技术,热衷主流Java开源框架,曾参与中国电信计费系统、银行自动服务后台操作系统等。\t\t\t\t\t\t\t"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "负责公司后台核心系统、资质审核系统、风控审核系统、标的审核系统的设计与搭建,对接存管银行的API,对接第三方银行机构。后台系统基于大数据的车辆对比系统、车辆实时信息联网查询、车辆电池信息以及前台基于百度地图的路径规划、动态围栏等功能。"},
|
||||
{"name": "宋老师", "title": "高级讲师", "info": "10年软件开发经验,曾在用友网络担任项目经理、项目总监等职务,先后主导过佛山美的集团互联网系统应用、白云国际机场商旅电商系统和深圳民太安公估保险移动互联网应用等。 精通Java EE、并发编程、SSM、SpringBoot。对CDN、NoSQL、消息队列、微服务、JVM性能调优、爬虫级各种开源框架等技术。\r\n\t\t\t\t\t\t\t"},
|
||||
{"name": "徐老师", "title": "高级讲师", "info": "10年Java从业经验,曾负责安防视频监控系统、华为VMS语言邮箱系统、电信网络质量监控系统、溯源APP等。精通Java SE,Java EE,SSM/SSH,Spring全家桶。对NoSQL、消息队列、服务集成、流程引擎、微服务、持续集成及各种开源技术有着深厚的研究。\t\t\t\t\t"},
|
||||
{"name": "杨老师", "title": "高级讲师", "info": "从事Java工作多年,有着丰富的开发和教学经验,Java,JSP,Servlet开发,精通SSM、Dubbo,Spring Security,Struts2,Hibernate等框架,能熟练操作Oracle,MySQL等关系型数据库。授课幽默风趣,通俗易懂。\t\t\t\t\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "从事十年大型互联网软件开发和教学工作,具有丰富的软件开发和项目管理经验,精通Java 、Swift、Python等语言,曾主导并参与多个大型项目,包括W3Mobile华为协同办公平台,迪士尼蓝牙点读笔等。具有丰富的教学经验,授课思路清晰,风趣幽默,通俗易懂。\t\t\t\t\t\t\t"},
|
||||
{"name": "王老师", "title": "高级讲师", "info": "10多年IT从业经验,北大计算机毕业,曾在中科软从事系统分析与架构工作,精通OOM、PD、DDD建模;对计算机原理、体系结构、常用算法有深入研究;熟练分布式和微服务环境下的主流技术架构。\t\t\t\t\t\t"},
|
||||
{"name": "王老师", "title": "高级讲师", "info": "9年Java开发管理和教学,先后在STS(国外)、艾默生网络能源、华胜天成等担任过软件开发工程师、项目主管等职务。精通MySQL、Oracle等数据库以及市场流行的Web框架(SSM、Spring全家桶等)。参与完成的项目主要有:移动国漫项目、短信平台、流量平台、海外物流等。\t\t\t\t\t\t\t"},
|
||||
{"name": "孔老师", "title": "高级讲师", "info": "先后在宜信、顺丰、美菜担任研发工程师,具有近10年的Java实战研发经验,熟练掌握当今主流技术(MyBatis/iBatis、Spring、SpringMVC、Dubbo、Elasticsearch、SpringBoot、SpringCloud),曾参与研发基础医疗His、CRM、美菜搜索、顺丰优选等项目。\t\t\t\t\t"},
|
||||
{"name": "梅老师", "title": "高级讲师", "info": "曾担任过Android应用开发,Java企业级开发工程师,智能家居行业曾获取App软件专利,精通MyBatis,SpringMVC,Spring,Vue等当下主流的框架,以及其底层实现的原理,熟练使用MySQL,Oracle,Redis主流的数据库,有丰富的授课经验。\t\t\t\t\t\t"},
|
||||
{"name": "夏老师", "title": "高级讲师", "info": "上海交大软件工程毕业,10年大型项目实战经验,精通C++、Java、JavaScript等多种主流开发语言。曾在多家500强企业任职,参与金融、物联网、通信等多个领域系统的设计和开发,对常用分布式技术和微服务架构均有深入研究。\t\t\t\t\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "曾经主导和参与过:中国银行网点规划系统、河南移动网上营业厅、郑州市科技局综合性门户网站、河南地税缴费系统等等项目。曾经参与策划和编写过多本书籍并出版发行,例如《Ajax+JSP巧学巧用》《Struts2框架开发从入门到精通》《EJB3从入门到精通》等等。\t\t\t\t\t\t\t"},
|
||||
{"name": "陆老师", "title": "高级讲师", "info": "拥有多年开发和教学经验,精通Java EE领域编程语言。 深入研究流行框架 SpringMVC、Spring、Hibernate、MyBatis、缓存系统Redis等。 教学擅长理论与实际相结合,对复杂简单化。 曾参与研发大型项目《万达慧云系统》、《建筑需求响应运营管理系统》等。\t\t\t"},
|
||||
{"name": "周老师", "title": "高级讲师", "info": "2015年加入黑马,精通Servlet、JSP、JavaMail、JDBC、JMS、WebService、Struts2、Spring、SpringMVC框架、MyBatis等技术;精通MySQL、Oracle数据库,Tomcat、JBoss等服务器的搭建和部署。在中软等企业从事大型Web项目的设计开发多年,曾任项目经理等职位。\t\t\t"},
|
||||
{"name": "王老师", "title": "高级讲师", "info": "多年软件开发及项目管理经验,精通Java、PHP、C++等编程语言;先后在北京K2地产、由米定制等企业担任开发及管理工作;精通Struts2、Spring、Hibernate、SpringMVC、MyBatis等常用框架;对Oracle,MySQL,Redis,MongoDB等数据库技术有深入研究。"},
|
||||
{"name": "何老师", "title": "高级讲师", "info": "拥有多年开发经验,先后在中科大洋、易宝支付等公司担任软件开发工程师等职务。热衷于研究主流Java开源框架、Web开发技术。擅长J2EE技术栈中的Spring、SpringMVC, MyBatis,SpringBoot等流行框架,以及对Tomcat, ElasticSearch, Oracle, MySQL,Redis等有深入研究。对待学生循循善诱,讲解知识深入浅出。"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "8年Java EE开发与教学经验,精通Java EE技术体系,对Java Web以及Spring、SpringMVC、Struts2、Hibernate、MyBatis等技术有深入的研究。对微服务架构SpringBoot、SpringData、SpringCloud、Docker等技术有系统的研究。熟练应用Vue.js,Node.js,Angular.js等前端框架。精通MySQL、Oracle、Redis、MongoDB等数据的搭建和优化等,曾参与研发黑龙江联通管理系统,国家电网项目,中国电建招标平台系统等开发与管理工作。\t\t\t\t\t\t\t"},
|
||||
{"name": "饶老师", "title": "高级讲师", "info": "Java EE开发与教学多年,精通Java EE技术体系,对流程SSH,SSM以及潮流的SpringData、SpringCloud、Docker等技术有深入的研究。授课逻辑严谨、条理清晰,注重学生独立解决问题的能力,善于总结一类问题,启发式教学。\t\t\t\t"},
|
||||
{"name": "胡老师", "title": "高级讲师", "info": "从事软件开发工作多年,涉及.Net、HTML5、Unity3d、IOS等平台,曾参与乌龟猜猜,敲打乐敲你妹等跨平台游戏项目的开发,之后投身教育行业,授课过程中擅长使用图片、动画把知识点化繁为简,深受学员喜爱。"},
|
||||
{"name": "王老师", "title": "高级讲师", "info": "具备多年电商&互联网移动APP市场&物联网行业的软件开发经验,对主流MVVM技术框架(React、微信小程序、Vue),UI框架,模块化,原生JavaScript,Node.js,数据库,代码管理工具等有着丰富的实战运用经验和团队协作经验。曾带领团队完成企业级系统应用设计、搭建、开发、上线等一整套的流程。\t\t\t\t\t"},
|
||||
{"name": "王老师", "title": "高级讲师", "info": "秉承务实、责任、创新、育人的价值观,用爱成就每一位学生。拥有多年互联网前端应用和软件开发经验,擅长React、Vue。曾就职于TalkingData、用友等公司,负责过多个大型数据分析和可视化平台、移动电商等项目架构和研发工作。"},
|
||||
{"name": "苏老师", "title": "高级讲师", "info": "6年+从业经验,精通HTML5、CSS3、JavaSript, 熟悉Vue、React、Angular、jQuery、BootStrap。 特别擅长微信小程序,首次研发和实施了完整的微信小程序商城项目。 教学细腻,能够深入浅出地剖析知识点。\t\t\t\t\t\t\t"},
|
||||
{"name": "潘老师", "title": "高级讲师", "info": "擅长前端企业级项目,在前端领域有自己的发明专利,精通大前端各个方向技术,走在技术的前沿,授课风格幽默,课堂氛围活跃,有强烈的责任心和使命感,能将教学知识与技术应用场景相结合,做到让学生学有所成,学以致用。"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "毕业于211院校,曾就职于大型在线教育公司,主导通用组件库的研发,有丰富的项目开发经验和授课经验。擅长移动端开发以及前端主流框架Vue,React。上课风趣幽默,善于将晦涩难懂的术语用生活情境演绎;课后提炼重点,帮助学生高效学习。真正做到让学生知其然,并且知其所以然。\t\t\t\t\t\t"},
|
||||
{"name": "梁老师", "title": "高级讲师", "info": "毕业于巴黎第十三大学网络多媒体专业,旅法期间服务于艾姆家居,Cibox(法国),曾任微来中国前端负责人,主导澳蚁,趣拼标前端架构。精通JavaScript、Vue、Node.js、jQuery,熟悉 PHP、MySQL,性格乐观开朗,上课善于带动学生的学习激情。"},
|
||||
{"name": "万老师", "title": "高级讲师", "info": "开发全栈开发工程师,曾主导开发广铁运输、智能设备项目。精通Node.js、Webpack、Vue、React、微信小程序,热衷于技术分享,在B站上发布的微信小程序项目访问排名靠前。教学风格幽默、细腻、能快准狠的将知识传达给学生。\t\t\t"},
|
||||
{"name": "赵老师", "title": "高级讲师", "info": "多年软件开发经验,曾负责上海银行App架构搭建,主导组件化开发;曾负责51CTO主站重构,及直播平台搭建,及负责鸿蒙社区维护,熟练使用redis缓存及服务端渲染;擅长JavaScript、Vue.js技术栈。\t\t\t\t"},
|
||||
{"name": "毛老师", "title": "高级讲师", "info": "拥有多年电商&互联网金融行业的软件开发经验,曾在招商基金、eims、TCL集团担任项目经理与技术负责人职位,对主流的MVVM框架、移动端开发框架、UI框架、SPA、构建工具、Serverless、代码版本管理软件、模块化、RxJS、Node.js、数据库等技术有丰富的开发和团队协同经验。\t\t\t\t\t\t\t\t\t\t\t\t"},
|
||||
{"name": "柴老师", "title": "高级讲师", "info": "Web前端开发工程师,五年开发经验,两年团队管理经验,Vue资深用户。先后参加多个大型管理系统开发,负责前端技术架构,公共业务组件封装;精通前端技术体系,熟练使用CSS3、ES6、Vue、React等前端技术;授课清晰和蔼,擅长调用学员的思考能力,深受学员喜爱。\t\t\t\t\t\t"},
|
||||
{"name": "周老师", "title": "高级讲师", "info": "十年Java与Web前端项目研发与教学经验,曾担任系统架构师、前端主管等职位。曾主导和研发物联网、网银、团购、电商、金融等相关领域的项目。对Node.js、Vue、小程序、React、移动App、数据可视化等前端技术有深入研究并有丰富的项目架构和研发经验。授课条理清晰,循序渐进,深入浅出,认真耐心细致的将知识传授给每一位学员。\t\t\t\t"},
|
||||
{"name": "高老师", "title": "高级讲师", "info": "八年团队研发经验,前后端各类技术均有涉猎,前后研发了多个平台级项目产品,曾担任某大型软件企业的前端架构师,负责公司平台级应用开发工具的设计和架构,精通OTO类移动开发业务,曾担任某大型餐饮软件公司前端负责人,负责公司移动端产品设计和技术架构的开发工作。"},
|
||||
{"name": "蒋老师", "title": "高级讲师", "info": "Web前端工程师,多年前端开发和实践教学经验,精通JavaScript、Ajax、jQuery、Vue、React、小程序以及Node.js、PHP、MySQL等前后端技术。喜欢钻研,热爱源码封装,授课风趣幽默,由浅入深,广为好评!\t\t\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "Web前端开发工程师,具有多年的Web开发经验和教学经验。精通HTML,CSS、JavaScript等,对于前端主流框架Vue、React等有深入研究。不但授课清晰, 而且指导超过千余名学生成功就业, 具有丰富的IT行业经验。授课风格幽默风趣,讲解课程细致深入浅出,授课过程中注重与学生的沟通,广受学生好评。\t\t\t\t\t"},
|
||||
{"name": "宋老师", "title": "高级讲师", "info": "六年前端开发经验,曾担任核心技术团队前端负责人,对JavaScript, 小程序、Vue, Node等技术有深入的研发,擅长以项目、案例驱动教学,懂得如何以学生的角度去思考问题,亲和力强。曾参与华为云官网、华为云社区等多个大型项目的研发。\t\t\t\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "06年软件工程毕业,资深全栈讲师,从前端到后端架构有一套完整体系,入行前带领技术团队完成用户日增长量10W+的翼推送项目、天翼宽带、页游平台及分成系统架构和研发。讲课深入浅出,思路清晰到位,深受学生好评。"},
|
||||
{"name": "赵老师", "title": "高级讲师", "info": "多年开发经验,精通HTML5、CSS3、JavaScript、jQuery、Vue、React等前端技术,曾先后主导多个大型项目开发。授课认真负责、深入浅出,致力于让不同基础的学生日有所学、日有所长。\t\t\t\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "Apache Flink源码贡献者,专注大数据实时计算领域,ApacheCon Asia 2022亚洲峰会特邀讲师"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "985计算机硕士,Apache Doris社区贡献者。\t\t\t\t\t"},
|
||||
{"name": "赵老师", "title": "高级讲师", "info": "985计算机硕士,多年深耕大数据、人工智能领域,ApacheCon Asia 2022亚洲峰会特邀讲师\t\t\t\t"},
|
||||
{"name": "曹老师", "title": "高级讲师", "info": "Apache Flink源码贡献者,擅长大数据云平台技术领域。\t\t"},
|
||||
{"name": "赵老师", "title": "高级讲师", "info": "Apache Pulsar社区贡献者,擅长数仓领域技术。"},
|
||||
{"name": "闻老师", "title": "高级讲师", "info": "擅长数仓领域技术,擅长海量数据下的流式计算和离线数据处理。"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "10年开发和教学经验,擅长大数据实时计算,拥有互联网停车平台项目经验。\t\t\t\t"},
|
||||
{"name": "孔老师", "title": "高级讲师", "info": "Apache DolphinScheduler社区贡献者,擅长离线数仓领域。\t\t\t\t"},
|
||||
{"name": "江老师", "title": "高级讲师", "info": "大数据架构师,主导基于Flink的实时反欺诈风控、实时地铁故障预警等流式计算平台的设计和研发。\t\t\t\t\t\t"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "擅长实时领域,拥有金融行业大数据应用架构及开发经验。\t\t\t\t"},
|
||||
{"name": "原老师", "title": "高级讲师", "info": "北京大学计算机硕士,擅长大数据/人工智能领域技术。\t\t"},
|
||||
{"name": "谢老师", "title": "高级讲师", "info": "擅长数仓领域技术,拥有丰富的安全领域大数据行业经验。\t"},
|
||||
{"name": "史老师", "title": "高级讲师", "info": "大数据架构师,拥有丰富的技术攻坚经验。\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "北京化工大学工学硕士,多家互联网公司首席信息官,算法专家,具备多年机器学习,深度学习等人工智能相关算法的研发经验, 熟悉Python、Java等常用开发语言,对PyTorch,Tensorflow,PaddlePaddle等深度学习框架熟练使用,在自然语言处理应用方面具备多年的企业实践经验。\t\t\t\t"},
|
||||
{"name": "原老师", "title": "高级讲师", "info": "北京大学电子与通信工程硕士, 多年开发经验,对数据分析,大数据,机器学习,后端开发等技术有深入了解, 精通java Python等常用开发语言,在多家软件公司担任软件工程师, 项目经理, 有新浪微博,中新网新闻发布系统等多个大型项目经验。\t\t\t\t\t\t"},
|
||||
{"name": "赵老师", "title": "高级讲师", "info": "擅长人工智能领域技术,多年模式识别和数据挖掘开发经验,主导研发多项国家和省级科研项目,负责企业级信贷风控模型和智能医疗数据平台开发,专注ML/DL/PR/KG领域相关算法的应用,曾任职美团搜索部负责NER及Bert搜索排序优化对Hadoop、SparkTensorflow和PyTorch等大数据、人工智能技术有多年企业实践经验。\t\t\t\t\t"},
|
||||
{"name": "魏老师", "title": "高级讲师", "info": "多年数据开发经验,曾参与过国云数据公司的数据中台开发,以及甲乙丙丁公司商品推荐系统开发。精通Python、MySQL编程语言,机器学习以及推荐相关算法,熟练应用Hadoop、Hive离线数仓等相关技术。曾担任过BI数据分析师,数据挖掘,推荐算法工程师,数据仓库工程师等数据以及算法相关岗位。\t\t\t\t\t\t"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "负责公司后台核心系统、资质审核系统、风控审核系统、标的审核系统的设计与搭建,对接存管银行的API,对接第三方银行机构。后台系统基于大数据的车辆对比系统、车辆实时信息联网查询、车辆电池信息以及前台基于百度地图的路径规划、动态围栏等功能。\t\t\t\t\t\t\t"},
|
||||
{"name": "姚老师", "title": "高级讲师", "info": "毕业于哈尔滨工程大学,擅长GIS行业工程实战,在图像分割、检测、追踪等方面有多年从业经历,参与多项国家级/部委级重大项目,具备丰富的工程落地经验。\t\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "曾就职于多家上市公司,并担任高级算法工程师、算法专家。研究领域包括NLP、目标检测、视频跟踪、大语言模型、多模态、模型推理加速等。"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "6年开发经验,2年教学经验。在大型互联网公司担任TL、架构师,参与设计并搭建大数据、物联网等大型项目。精通Java、SSM框架以及SpringCloud、Dubbo、MQ等微服务架构体系。\t\t\t\t\t\t"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "拥有10年设计经验,曾服务北京正邦品牌策划,百度用户体验中心,北京资海科技集团,龙采科技集团,优逸客科技教育集团,洛杉矶张大钦律师事务所等众多一线互联网设计与外包平台。担任视觉设计师。致力于研究视觉界面设计、品牌服务设计、人机交互等方向,项目经验丰富。"},
|
||||
{"name": "闫老师", "title": "高级讲师", "info": "拥有7年的设计经验,视觉传达设计专业。精通photoshop、illusstrator,Axure等常用设计软件,熟练Html,CSS等前端技术。曾创办独立设计工作室。授课风格幽默,思路清晰。时常告诫学生莫忘设计初心,才能方得始终!\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "拥有8年设计经验,艺术设计专业出身。涉及领域包括UI设计、网站设计、平面印刷广告、包装设计、手绘、VI设计等。曾为武汉理工孵化器、中国语料库等知名企业设计VI视觉识别系统,进行企业品牌推广。具备多年的网页与APP项目开发经验。\t\t\t\t\t"},
|
||||
{"name": "曾老师", "title": "高级讲师", "info": "从事设计行业7年,精通多种设计软件。曾在广告公司担任设计师及设计总监。服务企业有深圳海洋局、招商地产、香港珠宝、华润集团等。2012年投身教育事业,深受广大学员爱戴。"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "毕业于中央美术学院视觉传达专业,从事艺术设计类行业多年,曾任设计主管职位。主导进行7个品牌的提案,建立,维护,及后期升级的全案设计,服务于瑞幸,星巴克等30多个品牌。涉及品牌的线上视觉,线下电商,三维视觉,包装设计等,项目实战经验丰富。\t\t\t\t\t\t"},
|
||||
{"name": "高老师", "title": "高级讲师", "info": "国内设计色彩搭配知名博主、Adobe认证讲师、十三年工作经验。擅长软件:Photoshop、Illustrator、CorelDRAW、Sketch、AxureRP、C4D、Principle等。服务过的部分知名客户:欧莱雅、万科地产、嘉顿食品、盛大游戏、宝马、阿里巴巴、新浪网、腾讯大粤网、福特汽车等。\t\t\t\t\t\t"},
|
||||
{"name": "谭老师", "title": "高级讲师", "info": "14年互联网设计经验,首批移动互联网UI设计师,知名WEB设计师,对视觉设计、平面设计、界面设计、体验设计、交互设计、动画制作、影视后期等多方面有研究。曾主导腾讯、小米、IBM、路虎汽车、湖南卫视、上海世博会等500强企业互联网项目。"},
|
||||
{"name": "邢老师", "title": "高级讲师", "info": "10年Linux平台互联网开发经验,业界资深讲师。精通Linux内核开发、内核系统移植、ARM SOC体系结构设计、C/C++、Python、JavaScript、LISP、ARM/X86汇编等编程语言,全栈工程师对计算机原理从上到下融会贯通。"},
|
||||
{"name": "许老师", "title": "高级讲师", "info": "10年以上软件测试开发、质量管理、项目管理经验,熟练使用Java、Python开发语言,擅长功能测试、接口测试、自动化测试、性能测试等;具有丰富的教学经验,授课幽默风趣,能积极调动学员的积极性。"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "8年软件测试及项目管理经验,曾任职于蔚来汽车研发部门,在大型电商公司负责测试平台开发,测试管理工作;参与测试的项目有互联网出行,电子商务等,拥有丰富的Java和Python语言经验,有性能测试、自动化测试及测试平台的开发经验。"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "从事软件开发和软件测试10年, 熟悉软件项目开发和软件测试流程;熟悉多种测试环境搭建,熟练操作数据库,熟悉Java、Python、H5语言,对Web端自动化测试、接口测试有着丰富经验积累,并有丰富企培、校企合作专业领域培训经验。"},
|
||||
{"name": "朱老师", "title": "高级讲师", "info": "8年以上软件测试及项目管理经验,能熟练运用Python等编程语言,擅长功能测试,接口测试,及自动化测试,能够通过丰富的项目实战经验激发学生学习兴趣,培养学生独立思考和解决问题的能力。具有国家信息系统项目管理师认证、ACP认证。\t\t\t\t\t"},
|
||||
{"name": "梁老师", "title": "高级讲师", "info": "7年互联网营销推广工作经验,对互联网+新营销有独到的见解和认知,曾为中国移动,中国电信,爱立信一线运营部进行营销培训,擅长以营销思维和锋利的创意解决问题。讲课风格循序渐进,让受训学员看得见、摸得着、体验得到的亲民教学,落到实处。\t\t\t\t\t\t\t"},
|
||||
{"name": "刘老师", "title": "高级讲师", "info": "6年的互联网新媒体运营经验,对文字营销和视频营销有着独到的见解,为上百家企业做过新媒体营销策划方案。曾用2个月的时间孵化出0到200万粉丝的种草推荐类短视频账号,对热点事件极具敏感性。擅长讲故事,脱口秀,授课方式轻松幽默。\t\t\t\t\t\t"},
|
||||
{"name": "吴老师", "title": "高级讲师", "info": "互联网行业从业十多年,曾任网站和电子杂志主编、市场营销主管、媒介主管、文案策划主管,文案策划、市场营销、网站运营和媒介监测经验丰富。曾主导腾讯游戏发布、腾讯动漫整体媒介营销、设计行业大赛策划和运营。"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "5年短视频直播实战经验,曾担任多家企业运营总监、操盘手。擅长流量整体规划操盘、0-1IP孵化及流量变现。孵化的蓝V账号曾在抖音星图地产赛道排名第一,授课风格幽默严谨,擅长用案例启发思考。\t\t\t\t\t"},
|
||||
{"name": "王老师", "title": "高级讲师", "info": "致力于网站策划运营、网络营销推广SEM/SEO、淘宝SEO、QQ群优化、数据分析等媒体方面的研究与实践五年有余,经验丰富。在多家高热度网站拥有个人专栏,为众多互联网从业人员提供免费职业辅导和工作指导,拥有忠实粉丝圈,口碑在业界广为人知。"},
|
||||
{"name": "彭老师", "title": "高级讲师", "info": "6年新媒体短视频运营经验,创业做过新媒体代运营公司,有丰富的甲方、乙方、MCN机构等行业经验,在短视频以及公众号时代分别做过百万+、十万+等大小账号,合作过的企业有:招行、中行、平安、华润三九、达利、泰康、晨光、东鹏特饮等,擅长新媒体短视频ip孵化,直播带货等新媒体知识分享。\t\t\t\t\t"},
|
||||
{"name": "尹老师", "title": "高级讲师", "info": "红粉笔联合创始人,曾在中山大学、河北经贸大学等二十余所高校讲授短视频、电商实训项目。具有多个领域商业变现经验,涵盖教育、娱乐、剧情、旅行、电商种草等领域。指导众多学员项目案例,学员零启动美食账号单个作品涨粉20W+、零启动新奇特特效账号首作品流量破100w+,单条带货视频变现8W+。"},
|
||||
{"name": "赵老师", "title": "高级讲师", "info": "四年短视频和直播运营经验,创过业,北抖会创始人,哈尔滨工业大学毕业,曾任百度、苏宁AI架构师和AI产品经理,擅长知识类IP孵化和AI创作爆款短视频。运营过2个百万粉丝大号和3个同时在线过千人的直播间,创作过单条超一千万播放量和多条过百万播放量的爆款短视频,研发课程《AI制作爆款短视频》,抖音账号:野哥玩AI。"},
|
||||
{"name": "宋老师", "title": "高级讲师", "info": "毕业于中国矿业大学新闻传播学,8年文旅媒体行业从业经验。从传统电视媒体转型新媒体短视频行业,深耕短视频及直播运营,擅长社交媒体平台机制及玩法,主导从0-1短视频矩阵及直播商业化变现。擅长以短视频及社群赋能上市旅企高质量发展,媒体营销实战经验丰富,操盘10+上市旅企的新媒体及社群运营工作,包括八达岭长城、天山天池、峨眉山、国际大巴扎、少林寺等5A、4A世界级旅游度假型景区。"},
|
||||
{"name": "张老师", "title": "高级讲师", "info": "中国开放原子开源基金会银牌讲师,广东省人力资源与社会保障厅师资培训基地人工智能、区块链技术专家讲师。广东省职业教育“双师型”名师工作室成员,厦门大学大数据实验室百家讲坛特邀讲师。多本畅销软件开发教材的编写人和审稿人,国家十三五教材审稿人,在教学方面有丰富的经验。\t\t\t\t\t\t\t"},
|
||||
{"name": "李老师", "title": "高级讲师", "info": "开放原子开源基金会银牌讲师。具有多年常见指令集架构下的单片机外设、嵌入式操作系统下的开发经验。擅长机器视觉、深度学习结合各类传感器在智能机器人领域的应用。多年的教学经验,在基础理论和高等数学等方向的教学有深入的研究。\t\t\t\t\t\t\t"},
|
||||
{"name": "肖老师", "title": "高级讲师", "info": "开放原子开源基金会银牌讲师,毕业于深圳大学,10年的软件开发,机器学习和人工智能算法研发经验。精通C/C++, Python等开发语言, 对智能机器人以及Pytorch有丰富的项目授课经验,对项目全流程开发有丰富的架构经验,对算法工程实践有深入的研究。授课风格风趣幽默, 知识讲解结构层次分明。\t\t\t\t\t\t\t"},
|
||||
{"name": "吴老师", "title": "高级讲师", "info": "传智教育高级讲师,拥有多年智能机器人、物联网行业经验。精通C++、C、Python、Java等多种编程语言,精通ROS机器人操作系统,对机器人开发有深入研究。\t\t\t\t\t\t"},
|
||||
{"name": "肖老师", "title": "高级讲师", "info": "传智教育高级讲师,鸿蒙开发先行者。精通C、C++、Python、Java等多种编程语言,熟悉电子电路设计,拥有多年物联网行业经验,精通ROS机器人操作系统,对机器人开发有一定见解。教学方面能够激发学生的学习兴趣,帮助他们发掘自己的潜能,能够为学生提供专业的技术指导,帮助他们解决实际开发中遇到的问题。\t\t\t\t\t\t"},
|
||||
{"name": "唐老师", "title": "高级讲师", "info": "中国开放原子开源基金会银牌讲师,擅长物联网协议、物联网设备的软硬件开发部署。精于通过编写优质C、C++代码,实现芯片功能。对智能硬件设计开发、机器人运动控制、传感器数据处理、人工智能算法有深入的理解和丰富的经验。\t\t\t\t\t\t"},
|
||||
{"name": "江老师", "title": "高级讲师", "info": "高级讲师,资深开发工程师,10多年研发经验和教学经验。精通C/C++/Python语言编程,对GTK+、Qt图形界面编程有深入研究,精通嵌入式、物联网开发,熟练自动化测试。拥有丰富的授课及培训技巧,讲课幽默生动有趣,深入浅出。"},
|
||||
{"name": "覃老师", "title": "高级讲师", "info": "精通C、C++、Python、Java等多种编程语言,擅长基于ROS下的工业互联网智能化开发,尤其在激光雕刻等工控机方向有较丰富的经验。熟悉物联网开发,多年教学经验,擅长启发学生思考,课堂风格轻松有趣。\t\t\t\t\t"}
|
||||
]
|
||||
@@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = wangyizhaopin.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = wangyizhaopin
|
||||
@@ -0,0 +1,18 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class WangyizhaopinItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
name = scrapy.Field()
|
||||
postTypeFullName = scrapy.Field()
|
||||
description = scrapy.Field()
|
||||
reqEducationName = scrapy.Field()
|
||||
reqWorkYearsName = scrapy.Field()
|
||||
requirement = scrapy.Field()
|
||||
workPlaceNameList = scrapy.Field()
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
|
||||
|
||||
class WangyizhaopinSpiderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the spider middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_spider_input(self, response, spider):
|
||||
# Called for each response that goes through the spider
|
||||
# middleware and into the spider.
|
||||
|
||||
# Should return None or raise an exception.
|
||||
return None
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
# Called with the results returned from the Spider, after
|
||||
# it has processed the response.
|
||||
|
||||
# Must return an iterable of Request, or item objects.
|
||||
for i in result:
|
||||
yield i
|
||||
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
# Called when a spider or process_spider_input() method
|
||||
# (from other spider middleware) raises an exception.
|
||||
|
||||
# Should return either None or an iterable of Request or item objects.
|
||||
pass
|
||||
|
||||
def process_start_requests(self, start_requests, spider):
|
||||
# Called with the start requests of the spider, and works
|
||||
# similarly to the process_spider_output() method, except
|
||||
# that it doesn’t have a response associated.
|
||||
|
||||
# Must return only requests (not items).
|
||||
for r in start_requests:
|
||||
yield r
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
|
||||
|
||||
class WangyizhaopinDownloaderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the downloader middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# Called for each request that goes through the downloader
|
||||
# middleware.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this request
|
||||
# - or return a Response object
|
||||
# - or return a Request object
|
||||
# - or raise IgnoreRequest: process_exception() methods of
|
||||
# installed downloader middleware will be called
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
# Called with the response returned from the downloader.
|
||||
|
||||
# Must either;
|
||||
# - return a Response object
|
||||
# - return a Request object
|
||||
# - or raise IgnoreRequest
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
# Called when a download handler or a process_request()
|
||||
# (from other downloader middleware) raises an exception.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this exception
|
||||
# - return a Response object: stops process_exception() chain
|
||||
# - return a Request object: stops process_exception() chain
|
||||
pass
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
@@ -0,0 +1,46 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
|
||||
class WangyizhaopinPipeline:
|
||||
def open_spider(self, spider):
|
||||
if spider.name == "wyzp":
|
||||
print("数据爬取开始")
|
||||
self.f = open("wyzp.csv", mode="a", encoding="utf-8")
|
||||
|
||||
def close_spider(self, spider):
|
||||
if spider.name == "wyzp":
|
||||
print("数据爬取结束")
|
||||
if self.f:
|
||||
self.f.close()
|
||||
|
||||
def process_item(self, item, spider):
|
||||
if spider.name == "wyzp":
|
||||
# csv写入
|
||||
self.f.write(f"{item['name']}\n")
|
||||
return item
|
||||
|
||||
|
||||
class WangyizhaopinPipeline1:
|
||||
def open_spider(self, spider):
|
||||
if spider.name == "wyzp1":
|
||||
print("数据爬取开始")
|
||||
self.f = open("wyzp1.csv", mode="a", encoding="utf-8")
|
||||
|
||||
def close_spider(self, spider):
|
||||
if spider.name == "wyzp1":
|
||||
print("数据爬取结束")
|
||||
if self.f:
|
||||
self.f.close()
|
||||
|
||||
def process_item(self, item, spider):
|
||||
if spider.name == "wyzp1":
|
||||
# csv写入
|
||||
self.f.write(f"{item['name']}\n")
|
||||
return item
|
||||
@@ -0,0 +1,95 @@
|
||||
# Scrapy settings for wangyizhaopin project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = "wangyizhaopin"
|
||||
|
||||
SPIDER_MODULES = ["wangyizhaopin.spiders"]
|
||||
NEWSPIDER_MODULE = "wangyizhaopin.spiders"
|
||||
LOG_LEVEL = "WARNING"
|
||||
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = "wangyizhaopin (+http://www.yourdomain.com)"
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = True
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
#DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
# "Accept-Language": "en",
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# "wangyizhaopin.middlewares.WangyizhaopinSpiderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
#DOWNLOADER_MIDDLEWARES = {
|
||||
# "wangyizhaopin.middlewares.WangyizhaopinDownloaderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# "scrapy.extensions.telnet.TelnetConsole": None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
"wangyizhaopin.pipelines.WangyizhaopinPipeline": 300,
|
||||
"wangyizhaopin.pipelines.WangyizhaopinPipeline1": 301,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = "httpcache"
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
|
||||
|
||||
# Set settings whose default value is deprecated to a future-proof value
|
||||
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
|
||||
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
||||
FEED_EXPORT_ENCODING = "utf-8"
|
||||
@@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
||||
@@ -0,0 +1,6 @@
|
||||
# import requests
|
||||
#
|
||||
# url = "https://hr.163.com/api/hr163/position/queryPage"
|
||||
#
|
||||
# a = requests.get(url)
|
||||
# print(a.text)
|
||||
@@ -0,0 +1,27 @@
|
||||
import scrapy
|
||||
from scrapy.linkextractors import LinkExtractor
|
||||
from scrapy.spiders import CrawlSpider, Rule
|
||||
|
||||
#无法传参,只能应用于数据在一个页面里传参
|
||||
class TencentCrawlSpider(CrawlSpider):
|
||||
name = "tencent_crawl"
|
||||
allowed_domains = ["tencent.com"]
|
||||
start_urls = ["https://careers.tencent.com/search.html?pcid=40001 "]
|
||||
|
||||
#链接提取规则
|
||||
rules = (
|
||||
#使用Rule类生成连接提取规则对象
|
||||
# LinkExtracto 用于设置连接提取规则,一般永allow,接受正则表达式
|
||||
# follow参数决定是否在提取后的响应中继续提取链接
|
||||
|
||||
# 设置详情页面链接提取规则
|
||||
Rule(LinkExtractor(allow=r"https://careers.tencent.com/jobdesc.html?postId=\d+"), callback="parse_item"),)
|
||||
# Rule(LinkExtractor(allow=r"https://careers.tencent.com/jobdesc.html?postId=\?"), callback="parse_item", follow=True),)
|
||||
|
||||
def parse_item(self, response):
|
||||
item = {}
|
||||
print(response.url)
|
||||
# item["domain_id"] = response.xpath('//input[@id="sid"]/@value').get()
|
||||
# item["name"] = response.xpath('//div[@id="name"]').get()
|
||||
# item["description"] = response.xpath('//div[@id="description"]').get()
|
||||
return item
|
||||
@@ -0,0 +1,57 @@
|
||||
import scrapy
|
||||
import json
|
||||
from wangyizhaopin.items import WangyizhaopinItem
|
||||
|
||||
#该项目无法翻页,需要scrapy结合selenium等模拟点击
|
||||
|
||||
class WyzpSpider(scrapy.Spider):
|
||||
name = "wyzp"
|
||||
allowed_domains = ["hr.163.com"]
|
||||
start_urls = ["https://hr.163.com/job-list.html"]
|
||||
|
||||
def start_requests(self):
|
||||
|
||||
|
||||
post_body = {"currentPage": 1, "pageSize": 10}
|
||||
url = "https://hr.163.com/api/hr163/position/queryPage"
|
||||
|
||||
yield scrapy.Request(
|
||||
url=url,
|
||||
callback=self.parse_detail,
|
||||
body=json.dumps(post_body),
|
||||
method='POST',
|
||||
headers={'Content-Type': 'application/json;charset=UTF-8'}
|
||||
)
|
||||
|
||||
# yield scrapy.Request(
|
||||
# url=self.start_urls[0],
|
||||
# callback=self.parse,
|
||||
# headers={'Content-Type': 'application/json;charset=UTF-8'}
|
||||
# )
|
||||
# def parse(self, response, **kwargs):
|
||||
# part_url = response.xpath('//li[@class=" ant-pagination-next"]').extract_first()
|
||||
# print(part_url)
|
||||
# print(response.json)
|
||||
#
|
||||
# if part_url != response.xpath('//li[@class="ant-pagination-disabled ant-pagination-next"]').extract_first():
|
||||
# pass
|
||||
|
||||
def parse_detail(self, response):
|
||||
# 打印整个响应对象
|
||||
# print(response.json())
|
||||
# 解析JSON响应
|
||||
data = response.json()
|
||||
# print(1)
|
||||
# 提取您需要的数据
|
||||
for item in data['data']['list']:
|
||||
info = WangyizhaopinItem()
|
||||
info["name"] = item.get('name')
|
||||
info["postTypeFullName"] = item.get('postTypeFullName')
|
||||
info["description"] = item.get('description')
|
||||
info["reqEducationName"] = item.get('reqEducationName')
|
||||
info["reqWorkYearsName"] = item.get('reqWorkYearsName')
|
||||
info["requirement"] = item.get('requirement')
|
||||
info["workPlaceNameList"] = item.get('workPlaceNameList')
|
||||
yield info
|
||||
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
import scrapy
|
||||
import json
|
||||
from wangyizhaopin.items import WangyizhaopinItem
|
||||
|
||||
#该项目无法翻页,需要scrapy结合selenium等模拟点击
|
||||
|
||||
class WyzpSpider(scrapy.Spider):
|
||||
name = "wyzp1"
|
||||
allowed_domains = ["hr.163.com"]
|
||||
start_urls = ["https://hr.163.com/job-list.html"]
|
||||
|
||||
def start_requests(self):
|
||||
|
||||
|
||||
post_body = {"currentPage": 1, "pageSize": 10}
|
||||
url = "https://hr.163.com/api/hr163/position/queryPage"
|
||||
|
||||
yield scrapy.Request(
|
||||
url=url,
|
||||
callback=self.parse_detail,
|
||||
body=json.dumps(post_body),
|
||||
method='POST',
|
||||
headers={'Content-Type': 'application/json;charset=UTF-8'}
|
||||
)
|
||||
|
||||
# yield scrapy.Request(
|
||||
# url=self.start_urls[0],
|
||||
# callback=self.parse,
|
||||
# headers={'Content-Type': 'application/json;charset=UTF-8'}
|
||||
# )
|
||||
# def parse(self, response, **kwargs):
|
||||
# part_url = response.xpath('//li[@class=" ant-pagination-next"]').extract_first()
|
||||
# print(part_url)
|
||||
# print(response.json)
|
||||
#
|
||||
# if part_url != response.xpath('//li[@class="ant-pagination-disabled ant-pagination-next"]').extract_first():
|
||||
# pass
|
||||
|
||||
def parse_detail(self, response):
|
||||
# 打印整个响应对象
|
||||
# print(response.json())
|
||||
# 解析JSON响应
|
||||
data = response.json()
|
||||
|
||||
# 提取您需要的数据
|
||||
for item in data['data']['list']:
|
||||
info = WangyizhaopinItem()
|
||||
info["name"] = item.get('name')
|
||||
info["postTypeFullName"] = item.get('postTypeFullName')
|
||||
info["description"] = item.get('description')
|
||||
info["reqEducationName"] = item.get('reqEducationName')
|
||||
info["reqWorkYearsName"] = item.get('reqWorkYearsName')
|
||||
info["requirement"] = item.get('requirement')
|
||||
info["workPlaceNameList"] = item.get('workPlaceNameList')
|
||||
yield info
|
||||
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
内容运营实习生(AI音乐)
|
||||
高级游戏测试工程师(仙侠单机)
|
||||
算法实习生(搜索LLM和多模态技术)
|
||||
资深VFX特效设计师(界面)-第五人格
|
||||
资深UI视觉设计师-七日世界
|
||||
资深动画设计师-风格化在研
|
||||
资深概念设计师(场景)(天下)
|
||||
资深UI视觉设计师-卡通风格
|
||||
概念设计专家(角色)-萤火突击
|
||||
资深级UI 视觉设计师(萤火突击)
|
||||
|
@@ -0,0 +1,93 @@
|
||||
'''
|
||||
输入大学排名url链接
|
||||
输出 排名、大学名称、总分
|
||||
库 requests bs4
|
||||
'''
|
||||
|
||||
import requests
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
def get_html_text(url):
|
||||
# header = {
|
||||
# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.52"}
|
||||
try:
|
||||
r = requests.get(url)
|
||||
r.raise_for_status()
|
||||
r.encoding = r.apparent_encoding
|
||||
return r.text
|
||||
r.close()
|
||||
except:
|
||||
return ""
|
||||
|
||||
|
||||
def fill_univ_list(ulist, html):
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
for tr in soup.find("tbody").children:
|
||||
if isinstance(tr, bs4.element.Tag): # 判断类型是不是标签类型
|
||||
tds = tr("td")
|
||||
# tds.strip()
|
||||
# print(tds[0].text.strip())
|
||||
a = tds[0].text.strip()
|
||||
b=tds[1].text.strip()
|
||||
c=tds[5].text.strip()
|
||||
ulist.append([a,b,c])
|
||||
|
||||
|
||||
def print_univ_list(ulist, num):
|
||||
print("{:^20}\t{:^20}\t{:^20}".format("排名", "学校", "总分"))
|
||||
for i in range(num):
|
||||
u = ulist[i]
|
||||
print("{:^20}\t{:^20}\t{:^20}".format(u[0], u[1][0:4], u[2]))
|
||||
# print("suv" + str(num))
|
||||
|
||||
|
||||
def main():
|
||||
uinfo = []
|
||||
url = "https://www.shanghairanking.cn/rankings/bcur/202011"
|
||||
html = get_html_text(url)
|
||||
fill_univ_list(uinfo, html)
|
||||
print_univ_list(uinfo, 20)
|
||||
|
||||
|
||||
main()
|
||||
|
||||
# url = "https://www.shanghairanking.cn/rankings/bcur/202011"
|
||||
# header = {
|
||||
# "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.52"}
|
||||
#
|
||||
# try:
|
||||
# r = requests.get(url, headers=header)
|
||||
# r.raise_for_status()
|
||||
# r.encoding = r.apparent_encoding
|
||||
# except:
|
||||
# print("出现异常")
|
||||
#
|
||||
# demo = r.text
|
||||
# soup = BeautifulSoup(demo, "html.parser")
|
||||
# ulists=[]
|
||||
# for tr in soup.find("tbody").children:
|
||||
# if isinstance(tr, bs4.element.Tag): # 判断类型是不是标签类型
|
||||
# tds = tr("td")
|
||||
# ulists.append(tds[0].string, tds[1].string, tds[2].string)
|
||||
#
|
||||
#
|
||||
# print("{:^10}\t{:^6}\t{:^10}".format("排名", "学校", "总分"))
|
||||
# for i in range(ulists):
|
||||
# u = ulist[i]
|
||||
# print("{:^10}\t{:^6}\t{:^10}".format(u[0],u[1],u[2]))
|
||||
|
||||
'''
|
||||
<tr data-v-3fe7d390><td data-v-3fe7d390><div class="ranking top2" data-v-3fe7d390>
|
||||
2
|
||||
</div></td><td class="align-left" data-v-3fe7d390><div class="univname-container" data-v-3fe7d390><div class="logo" data-v-3fe7d390><img alt="北京大学" onerror='this.src="/images/blank.svg"' src="https://www.shanghairanking.cn/_uni/logo/86350223.png" class="univ-logo" data-v-3fe7d390></div> <div class="univname" data-v-3fe7d390><div data-v-b80b4d60 data-v-3fe7d390><div class="tooltip" data-v-b80b4d60><div class="link-container" data-v-b80b4d60><a href="/institution/peking-university" class="name-cn" data-v-b80b4d60>北京大学 </a> <div class="collection" style="display:none" data-v-b80b4d60><img src="/_nuxt/img/uncollection.5e124aa.svg" alt data-v-b80b4d60></div></div> <!----></div></div> <div data-v-f9104fdc data-v-3fe7d390><div class="tooltip" data-v-f9104fdc><div class="link-container" data-v-f9104fdc><a href="/institution/peking-university" class="name-en" data-v-f9104fdc>Peking University </a></div> <!----></div></div> <p class="tags" data-v-3fe7d390>双一流/985/211</p> <!----> <!----> <!----></div></div></td><td data-v-3fe7d390>
|
||||
北京
|
||||
<!----></td><td data-v-3fe7d390>
|
||||
综合
|
||||
<!----></td><td data-v-3fe7d390>
|
||||
746.7
|
||||
</td><td data-v-3fe7d390>
|
||||
36.1
|
||||
</td></tr>
|
||||
'''
|
||||
@@ -0,0 +1,55 @@
|
||||
# 扒百度小说西游记
|
||||
# https://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"4306063500"}
|
||||
# 章节url
|
||||
# https://dushu.baidu.com/api/pc/getChapterContent?data={"book_id":"4306063500","cid":"4306063500|1569782244","need_bookinfo":1}
|
||||
# 内容url
|
||||
|
||||
import requests
|
||||
import asyncio
|
||||
import aiohttp
|
||||
import json
|
||||
import aiofiles
|
||||
|
||||
'''
|
||||
1.同步操作:访问getCatalog拿到所有章节cid和名称
|
||||
2.异步操作,访问getChapterContent 下载所有文章内容
|
||||
'''
|
||||
|
||||
|
||||
async def getCatalog(url):
|
||||
headers = {
|
||||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.35",
|
||||
"Referer": "https://dushu.baidu.com/pc/detail?gid=4306063500"}
|
||||
r = requests.get(url, headers=headers)
|
||||
dic = r.json()
|
||||
tasks = []
|
||||
for item in dic['data']['novel']['items']: # item对应每一个小说cid与标题
|
||||
title = item['title']
|
||||
cid = item['cid']
|
||||
# 准备异步任务
|
||||
# print(cid, title)
|
||||
tasks.append(asyncio.create_task(aiodownload(cid, b_id, title)))
|
||||
await asyncio.wait(tasks)
|
||||
|
||||
|
||||
async def aiodownload(cid, b_id, title):
|
||||
data = {
|
||||
"book_id": f"{cid}",
|
||||
"cid": f"{b_id}|{cid}",
|
||||
"need_bookinfo": 1
|
||||
}
|
||||
data = json.dumps(data)
|
||||
url = f"https://dushu.baidu.com/api/pc/getChapterContent?data={data}"
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url) as r:
|
||||
# await print(r.json())
|
||||
dic = await r.json()
|
||||
async with open(f'./novel/{title}.txt', 'w') as f:
|
||||
await f.write(dic['data']['novel']['content'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
b_id = '4306063500'
|
||||
url = 'https://dushu.baidu.com/api/pc/getCatalog?data={"book_id":"' + b_id + '"}' # 本来带{}加f会变为转义符号换‘ + 内容+ ’
|
||||
asyncio.run(getCatalog(url))
|
||||
@@ -0,0 +1,49 @@
|
||||
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
|
||||
import requests
|
||||
from lxml import etree
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
import csv
|
||||
|
||||
"""
|
||||
1.如何提取单个页面的数据
|
||||
2.上线程池,多个页面同时抓取
|
||||
"""
|
||||
|
||||
|
||||
def download_one_page(data):
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.35",
|
||||
"Referer": "http://www.xinfadi.com.cn/priceDetail.html"
|
||||
}
|
||||
url = "http://www.xinfadi.com.cn/getPriceData.html"
|
||||
r = requests.get(url, headers=headers, data=data)
|
||||
r.encoding = "utf-8"
|
||||
print(r.text)
|
||||
|
||||
# soup = BeautifulSoup(r.text,'html.parser')
|
||||
# print(soup.prettify())
|
||||
obj = re.compile(
|
||||
r'"id":(?P<id>.*?),.*?"prodName"(?P<prodName>.*?),.*?"avgPrice"(?P<avgPrice>.*?),.*?"pubDate"(?P<pubDate>.*?),',
|
||||
re.S)
|
||||
result = obj.finditer(r.text)
|
||||
f = open('data.csv', mode='a',encoding='utf-8')
|
||||
csvwriter = csv.writer(f)
|
||||
for i in result:
|
||||
dic = i.groupdict()
|
||||
csvwriter.writerow(dic.values()) # 写入一行
|
||||
f.close()
|
||||
print(f"{data}over!")
|
||||
|
||||
# html = etree.HTML(r.text)
|
||||
# table = html.xpath('/html/body/div[2]/div/div/div/div[4]/div[1]/div/table/tbody')
|
||||
# trs = table.xpath('./tr/td/text()')[1:]
|
||||
# print(trs)
|
||||
# print(table)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
with ThreadPoolExecutor(50) as t:
|
||||
for i in range(1, 200):
|
||||
t.submit(download_one_page(data={"limit": "20", "current": f"{i}"}))
|
||||
@@ -0,0 +1,14 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class CaipiaoItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
qihao = scrapy.Field()
|
||||
red_ball = scrapy.Field()
|
||||
blue_ball = scrapy.Field()
|
||||
pass
|
||||
@@ -0,0 +1,103 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
|
||||
|
||||
class CaipiaoSpiderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the spider middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_spider_input(self, response, spider):
|
||||
# Called for each response that goes through the spider
|
||||
# middleware and into the spider.
|
||||
|
||||
# Should return None or raise an exception.
|
||||
return None
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
# Called with the results returned from the Spider, after
|
||||
# it has processed the response.
|
||||
|
||||
# Must return an iterable of Request, or item objects.
|
||||
for i in result:
|
||||
yield i
|
||||
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
# Called when a spider or process_spider_input() method
|
||||
# (from other spider middleware) raises an exception.
|
||||
|
||||
# Should return either None or an iterable of Request or item objects.
|
||||
pass
|
||||
|
||||
def process_start_requests(self, start_requests, spider):
|
||||
# Called with the start requests of the spider, and works
|
||||
# similarly to the process_spider_output() method, except
|
||||
# that it doesn’t have a response associated.
|
||||
|
||||
# Must return only requests (not items).
|
||||
for r in start_requests:
|
||||
yield r
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
|
||||
|
||||
class CaipiaoDownloaderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the downloader middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# Called for each request that goes through the downloader
|
||||
# middleware.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this request
|
||||
# - or return a Response object
|
||||
# - or return a Request object
|
||||
# - or raise IgnoreRequest: process_exception() methods of
|
||||
# installed downloader middleware will be called
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
# Called with the response returned from the downloader.
|
||||
|
||||
# Must either;
|
||||
# - return a Response object
|
||||
# - return a Request object
|
||||
# - or raise IgnoreRequest
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
# Called when a download handler or a process_request()
|
||||
# (from other downloader middleware) raises an exception.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this exception
|
||||
# - return a Response object: stops process_exception() chain
|
||||
# - return a Request object: stops process_exception() chain
|
||||
pass
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info("Spider opened: %s" % spider.name)
|
||||
@@ -0,0 +1,64 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
import pymysql
|
||||
from caipiao.settings import MYSQL
|
||||
|
||||
class CaipiaoPipeline:
|
||||
"""
|
||||
在爬虫开始时候打开文件,
|
||||
执行过程中不断写入,
|
||||
执行完毕关掉文件
|
||||
"""
|
||||
|
||||
def open_spider(self, spider):
|
||||
print("开始")
|
||||
self.f = open("./双色球.csv", mode='a', encoding='utf-8')
|
||||
|
||||
def close_spider(self, spider):
|
||||
print("结束")
|
||||
self.f.close()
|
||||
|
||||
def process_item(self, item, spider):
|
||||
# print(item)
|
||||
self.f.write(f"{item['qihao']},{'_'.join(item['red_ball'])},{item['blue_ball']}\n")
|
||||
# with open("./双色球.csv",mode='a',encoding='utf-8') as f:
|
||||
# f.write(f"{item['qihao']},{'_'.join(item['red_ball'])},{item['blue_ball']}\n")
|
||||
return item
|
||||
|
||||
|
||||
class CaipiaoMySqlPipeline:
|
||||
|
||||
|
||||
def open_spider(self, spider):
|
||||
self.conn =pymysql.connect(
|
||||
host=MYSQL['host'],
|
||||
port=MYSQL['port'],
|
||||
user=MYSQL['user'],
|
||||
password=MYSQL['password'],
|
||||
database=MYSQL['database']
|
||||
)
|
||||
|
||||
def close_spider(self, spider):
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
|
||||
def process_item(self, item, spider):
|
||||
try:
|
||||
cursor =self.conn.cursor()
|
||||
sql ="insert into caipiao(qihao,red_ball,blue_ball) values(%s,%s,%s)"
|
||||
cursor.execute(sql,(item['qihao'],'_'.join(item['red_ball']),item['blue_ball']))
|
||||
self.conn.commit()
|
||||
except:
|
||||
self.conn.rollback()
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
|
||||
return item
|
||||
@@ -0,0 +1,102 @@
|
||||
# Scrapy settings for caipiao project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
import pymysql
|
||||
BOT_NAME = "caipiao"
|
||||
|
||||
SPIDER_MODULES = ["caipiao.spiders"]
|
||||
NEWSPIDER_MODULE = "caipiao.spiders"
|
||||
|
||||
LOG_LEVEL="WARNING"
|
||||
|
||||
MYSQL={
|
||||
"host":"localhost",
|
||||
"port":3306,
|
||||
"user":'root',
|
||||
"password":'123123',
|
||||
"database":'spider'
|
||||
}
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = "caipiao (+http://www.yourdomain.com)"
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = True
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
#CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
#DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
# "Accept-Language": "en",
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# "caipiao.middlewares.CaipiaoSpiderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
#DOWNLOADER_MIDDLEWARES = {
|
||||
# "caipiao.middlewares.CaipiaoDownloaderMiddleware": 543,
|
||||
#}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# "scrapy.extensions.telnet.TelnetConsole": None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
"caipiao.pipelines.CaipiaoPipeline": 300,
|
||||
"caipiao.pipelines.CaipiaoMySqlPipeline": 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = "httpcache"
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = "scrapy.extensions.httpcache.FilesystemCacheStorage"
|
||||
|
||||
# Set settings whose default value is deprecated to a future-proof value
|
||||
REQUEST_FINGERPRINTER_IMPLEMENTATION = "2.7"
|
||||
TWISTED_REACTOR = "twisted.internet.asyncioreactor.AsyncioSelectorReactor"
|
||||
FEED_EXPORT_ENCODING = "utf-8"
|
||||
@@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
||||
@@ -0,0 +1,35 @@
|
||||
import scrapy
|
||||
from caipiao.items import CaipiaoItem
|
||||
|
||||
class A500Spider(scrapy.Spider):
|
||||
name = "500"
|
||||
allowed_domains = ["500.com"]
|
||||
start_urls = ["https://datachart.500.com/ssq/"]
|
||||
|
||||
def parse(self, response):
|
||||
# print(response.text)
|
||||
trs = response.xpath("//tbody[@id='tdata']/tr")
|
||||
# print(trs)
|
||||
for tr in trs:
|
||||
# red_ball = tr.xpath("./td[@class='chartBall01']/text()").extract()
|
||||
red_ball =tr.css(".chartBall01::text").extract()
|
||||
if not red_ball:
|
||||
continue
|
||||
blue_ball =tr.css(".chartBall02::text").extract_first()
|
||||
qihao =tr.xpath("./td[1]/text()").extract_first().strip()
|
||||
|
||||
cai =CaipiaoItem() #cai=dic
|
||||
|
||||
cai['qihao']=qihao
|
||||
cai['red_ball']=red_ball
|
||||
cai['blue_ball']=blue_ball
|
||||
print(cai)
|
||||
|
||||
yield cai
|
||||
# dic ={
|
||||
# 'qihao':qihao,
|
||||
# 'red_ball':red_ball,
|
||||
# 'blue_ball':blue_ball
|
||||
# }
|
||||
# print(dic)
|
||||
# yield dic
|
||||
@@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = caipiao.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = caipiao
|
||||
@@ -0,0 +1,30 @@
|
||||
24004,9_13_17_18_21_27,4
|
||||
24005,5_6_20_23_25_32,3
|
||||
24006,7_18_20_21_26_32,5
|
||||
24007,4_5_6_7_20_22,15
|
||||
24008,8_20_21_23_27_30,13
|
||||
24009,1_4_7_10_17_23,14
|
||||
24010,11_16_20_21_23_24,4
|
||||
24011,8_12_16_20_27_31,6
|
||||
24012,1_3_7_18_22_28,15
|
||||
24013,1_7_10_14_21_25,7
|
||||
24014,1_2_10_22_24_25,13
|
||||
24015,3_8_17_18_20_30,15
|
||||
24016,7_12_20_24_32_33,4
|
||||
24017,4_17_19_20_25_32,4
|
||||
24018,6_15_17_24_28_29,16
|
||||
24019,5_11_12_16_17_20,8
|
||||
24020,3_8_12_14_17_33,8
|
||||
24021,4_6_16_17_23_24,11
|
||||
24022,2_9_11_14_18_26,6
|
||||
24023,1_10_22_25_28_32,10
|
||||
24024,3_7_21_24_26_30,10
|
||||
24025,8_15_21_22_25_33,13
|
||||
24026,4_7_18_19_20_25,6
|
||||
24027,2_6_13_27_28_32,13
|
||||
24028,3_7_8_11_18_19,5
|
||||
24029,12_18_23_25_28_33,4
|
||||
24030,1_8_22_25_29_33,10
|
||||
24031,9_10_13_25_30_32,2
|
||||
24032,1_3_4_11_12_21,16
|
||||
24033,6_10_11_18_20_32,5
|
||||
|
@@ -0,0 +1,39 @@
|
||||
import requests
|
||||
import re
|
||||
import csv
|
||||
|
||||
url = "https://movie.douban.com/top250"
|
||||
r = None
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 Edg/107.0.1418.26'}
|
||||
try:
|
||||
r = requests.get(url, headers=headers)
|
||||
r.raise_for_status()
|
||||
r.encoding = r.apparent_encoding
|
||||
except:
|
||||
print("异常")
|
||||
|
||||
# print(r.text)
|
||||
page_context = r.text
|
||||
|
||||
obj = re.compile(r'<li>.*?<span class="title">(?P<name>.*?)</span>.*?'
|
||||
r'<p class="">.*?<br>(?P<year>.*?) .*?'
|
||||
r'<span class="rating_num" property="v:average">(?P<score>.*?)</span>.*?'
|
||||
r'<span>(?P<review>.*?)人评价</span>.*?</li>'
|
||||
, re.S) # (?P<name>.*?)分组组名
|
||||
result = obj.finditer(page_context)
|
||||
f = open('data.csv', mode='w')
|
||||
csvwriter = csv.writer(f)
|
||||
for i in result:
|
||||
dic = i.groupdict()
|
||||
dic['year'] = dic['year'].strip()
|
||||
csvwriter.writerow(dic.values()) # 写入一行
|
||||
f.close()
|
||||
print("over!")
|
||||
|
||||
# print(i.group("name"))
|
||||
# print(i.group('year').strip())#去空格
|
||||
# print(i.group("score"))
|
||||
# print(i.group("review") + '人评论')
|
||||
|
||||
r.close()
|
||||
@@ -0,0 +1,31 @@
|
||||
# 拿页面源码
|
||||
# 提取数据
|
||||
|
||||
import requests
|
||||
from lxml import etree
|
||||
|
||||
url = 'https://www.zbj.com/search/service/?kw=sass&r=2'
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 '
|
||||
'Safari/537.36 Edg/107.0.1418.35'}
|
||||
r = None
|
||||
try:
|
||||
r = requests.get(url, headers=headers)
|
||||
r.raise_for_status()
|
||||
r.encoding = r.apparent_encoding
|
||||
except:
|
||||
print('异常')
|
||||
# print(r.text)
|
||||
|
||||
# 解析
|
||||
html =etree.HTML(r.text)
|
||||
# 拿到每一个服务商div
|
||||
divs = html.xpath('//*[@id="__layout"]/div/div[3]/div/div[3]/div[4]')
|
||||
|
||||
for i in divs:
|
||||
price = i.xpath('./div[1]/div[1]/div/div[2]/div[1]/span/text()')
|
||||
print(price)
|
||||
title = i.xpath('./div/div/div/div[2]/div[2]/a[1]/text()')
|
||||
print(title)
|
||||
|
||||
r.close()
|
||||
@@ -0,0 +1,37 @@
|
||||
import requests
|
||||
import os
|
||||
|
||||
#
|
||||
# # 京东页面商品爬取案例
|
||||
# def get_JD_text():
|
||||
# kv = {'User-agent': 'Mozilla/5.0'}
|
||||
# try:
|
||||
# r = requests.get(url,headers=kv)
|
||||
# r.raise_for_status()
|
||||
# r.encoding = r.apparent_encoding
|
||||
# return r.text
|
||||
# except:
|
||||
# print("产生异常")
|
||||
#
|
||||
#
|
||||
# if __name__ == "__main__":
|
||||
# url = "https://item.jd.com/12842874.html"
|
||||
# print(get_JD_text())
|
||||
|
||||
|
||||
url = 'https://player.youku.com/embed/XNTY2MTQ0MDgw'
|
||||
root = "D://program//"
|
||||
path = root + url.split('/')[-1]+'.mp4'
|
||||
try:
|
||||
if not os.path.exists(root):
|
||||
os.mkdir(root) # 判断文件根目录是否存在,如果不存在创建根目录
|
||||
if not os.path.exists(path): # 判断文件是否存在
|
||||
r = requests.get(url)
|
||||
with open(path, 'wb') as f:
|
||||
f.write(r.content)
|
||||
f.close()
|
||||
print("文件保存成功")
|
||||
else:
|
||||
print("文件已存在")
|
||||
except:
|
||||
print("出现异常")
|
||||
@@ -0,0 +1,28 @@
|
||||
# import requests
|
||||
#
|
||||
# r = requests.get("http://www.baidu.com")
|
||||
# var = r.status_code # 检查状态码,是200则成功
|
||||
# print(var)
|
||||
#
|
||||
# r.encoding = 'utf-8'
|
||||
# n = r.text
|
||||
# print(n)
|
||||
|
||||
# 通用代码框架
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def getHTMLText():
|
||||
try:
|
||||
r = requests.get(url, timeout=30)
|
||||
r.raise_for_status()
|
||||
r.encoding = r.apparent_encoding
|
||||
return r.text
|
||||
except:
|
||||
print("出现异常")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
url = "http://www.baidu.com"
|
||||
print(getHTMLText())
|
||||
@@ -0,0 +1,31 @@
|
||||
# 17k小说网
|
||||
# 登陆 -》得到cookies
|
||||
# 带着cookies去请求到url 获得内容
|
||||
|
||||
# 两个操作需要连一起
|
||||
# 使用session 进行请求, - 》一连串的请求,过程中cookies不会丢失
|
||||
|
||||
import requests
|
||||
|
||||
# 会话
|
||||
session = requests.session() # 返回一个session会话
|
||||
data = {
|
||||
"loginName": "18742526670", "password": "abc88888888"
|
||||
} # data数据从负载里拿
|
||||
# 登陆
|
||||
url = r'https://passport.17k.com/ck/user/login'
|
||||
resp = session.post(url, data=data)
|
||||
print(resp.text)
|
||||
print(resp.json())
|
||||
# json():返回类型为字典,可以通过键名来获取响应的值
|
||||
# text:返回的类型为字符串,无法通过键名来获取响应的值
|
||||
print(resp.cookies)
|
||||
|
||||
# 拿书架上数据
|
||||
resp1 = session.get('https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919')
|
||||
print(resp1.json())
|
||||
#另一种方法
|
||||
resp2 = requests.get('https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919',headers={
|
||||
"Cookie":"GUID=22eea9b7-f889-4fcb-88fa-e8d9a4c7c601; BAIDU_SSP_lcr=https://cn.bing.com/; sajssdk_2015_cross_new_user=1; __bid_n=1845a12e389cea8fad4207; Hm_lvt_9793f42b498361373512340937deb2a0=1667958498; c_channel=0; c_csc=web; accessToken=avatarUrl%3Dhttps%253A%252F%252Fcdn.static.17k.com%252Fuser%252Favatar%252F18%252F98%252F61%252F99416198.jpg-88x88%253Fv%253D1667958854000%26id%3D99416198%26nickname%3D%25E4%25B9%25A6%25E5%258F%258Bj8h88A807%26e%3D1683511267%26s%3Ddb666b3b9accfd31; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%2299416198%22%2C%22%24device_id%22%3A%221845a12e26f90b-07ffa98fb392a7-7d5d5474-1327104-1845a12e2704e0%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%2C%22first_id%22%3A%2222eea9b7-f889-4fcb-88fa-e8d9a4c7c601%22%7D; Hm_lpvt_9793f42b498361373512340937deb2a0=1667960222"
|
||||
})
|
||||
print(resp2.text)
|
||||
@@ -0,0 +1,16 @@
|
||||
import requests
|
||||
|
||||
|
||||
def get_HTML_Text():
|
||||
try:
|
||||
r = requests.get(url, timeout=30)
|
||||
r.raise_for_status()
|
||||
r.encoding = r.apparent_encoding
|
||||
return r.text
|
||||
except:
|
||||
print("异常")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
url = "https://www.jd.com/robots.txt"
|
||||
print(get_HTML_Text())
|
||||
@@ -0,0 +1,18 @@
|
||||
skips:
|
||||
- B101
|
||||
- B105
|
||||
- B301
|
||||
- B303
|
||||
- B306
|
||||
- B307
|
||||
- B311
|
||||
- B320
|
||||
- B321
|
||||
- B324
|
||||
- B403
|
||||
- B404
|
||||
- B406
|
||||
- B410
|
||||
- B503
|
||||
- B603
|
||||
- B605
|
||||
@@ -0,0 +1,35 @@
|
||||
[bumpversion]
|
||||
current_version = 0.9.1
|
||||
commit = False
|
||||
tag = False
|
||||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>\w+))?
|
||||
serialize =
|
||||
{major}.{minor}.{patch}-{release}
|
||||
{major}.{minor}.{patch}
|
||||
|
||||
[bumpversion:part:release]
|
||||
optional_value = placeholder
|
||||
values =
|
||||
a1
|
||||
b1
|
||||
rc1
|
||||
placeholder
|
||||
|
||||
[bumpversion:file:VERSION]
|
||||
search = {current_version}
|
||||
replace = {new_version}
|
||||
|
||||
[bumpversion:file:src/scrapy_redis/__init__.py]
|
||||
search = __version__ = "{current_version}"
|
||||
replace = __version__ = "{new_version}"
|
||||
|
||||
[bumpversion:file:.cookiecutterrc]
|
||||
search = version: {current_version}
|
||||
replace = version: {new_version}
|
||||
|
||||
[bumpversion:file:HISTORY.rst]
|
||||
search = .. bumpversion marker
|
||||
replace = .. bumpversion marker
|
||||
|
||||
{new_version} ({now:%Y-%m-%d})
|
||||
------------------
|
||||
@@ -0,0 +1,19 @@
|
||||
# Generated by cookiepatcher, a small shim around cookiecutter (pip install cookiepatcher)
|
||||
|
||||
cookiecutter:
|
||||
email: rolando at rmax.io
|
||||
full_name: Rolando Espinoza
|
||||
github_username: rolando
|
||||
project_name: Scrapy-Redis
|
||||
project_package: scrapy_redis
|
||||
project_short_description: Redis-based components for Scrapy.
|
||||
project_slug: scrapy-redis
|
||||
pypi_username: rolando
|
||||
use_codecov: y
|
||||
use_cython: n
|
||||
use_landscape: y
|
||||
use_pypi_deployment_with_travis: n
|
||||
use_pytest: y
|
||||
use_requiresio: y
|
||||
version: 0.9.1
|
||||
year: 2011-2022
|
||||
@@ -0,0 +1,25 @@
|
||||
[paths]
|
||||
source =
|
||||
src
|
||||
|
||||
[run]
|
||||
omit = setup.py
|
||||
branch = true
|
||||
source =
|
||||
scrapy_redis
|
||||
tests
|
||||
parallel = true
|
||||
|
||||
[report]
|
||||
show_missing = true
|
||||
precision = 2
|
||||
omit = */__init__.py
|
||||
exclude_lines =
|
||||
pragma: no cover
|
||||
def __repr__
|
||||
if self.debug:
|
||||
if settings.DEBUG
|
||||
raise AssertionError
|
||||
raise NotImplementedError
|
||||
if 0:
|
||||
if __name__ == .__main__.:
|
||||
@@ -0,0 +1,46 @@
|
||||
*.py[cod]
|
||||
*.swp
|
||||
*~
|
||||
|
||||
.ropeproject
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Packages
|
||||
*.egg
|
||||
*.egg-info
|
||||
dist
|
||||
build
|
||||
eggs
|
||||
parts
|
||||
bin
|
||||
var
|
||||
sdist
|
||||
develop-eggs
|
||||
.installed.cfg
|
||||
lib
|
||||
lib64
|
||||
__pycache__
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
.coverage
|
||||
.tox
|
||||
nosetests.xml
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
|
||||
# Mr Developer
|
||||
.mr.developer.cfg
|
||||
.project
|
||||
.pydevproject
|
||||
|
||||
# JetBrains PyCharm IDE
|
||||
/.idea/
|
||||
|
||||
.venv
|
||||
.tags
|
||||
@@ -0,0 +1,21 @@
|
||||
# http://editorconfig.org
|
||||
|
||||
root = true
|
||||
|
||||
[*]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
|
||||
[*.bat]
|
||||
indent_style = tab
|
||||
end_of_line = crlf
|
||||
|
||||
[LICENSE]
|
||||
insert_final_newline = false
|
||||
|
||||
[Makefile]
|
||||
indent_style = tab
|
||||
@@ -0,0 +1,12 @@
|
||||
|
||||
[flake8]
|
||||
|
||||
max-line-length = 119
|
||||
ignore =
|
||||
W503
|
||||
P102
|
||||
P103
|
||||
|
||||
exclude =
|
||||
tests/test_spiders.py E731
|
||||
docs/conf.py E265
|
||||
@@ -0,0 +1,3 @@
|
||||
# GitHub syntax highlighting
|
||||
pixi.lock linguist-language=YAML
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
# Description
|
||||
|
||||
Please describe your problem/feature request/bug
|
||||
|
||||
# Step to Reproduce
|
||||
|
||||
Please offer the steps to reproduce your problem/bug
|
||||
|
||||
# Error log
|
||||
|
||||
Please provide error message or screen shot for better understanding.
|
||||
@@ -0,0 +1,25 @@
|
||||
# Description
|
||||
|
||||
Please include a summary of the changes and the related issue. Please also include relevant motivation and context. List any dependencies that are required for this change.
|
||||
|
||||
Fixes #(issue)
|
||||
|
||||
# How Has This Been Tested?
|
||||
|
||||
Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration
|
||||
- [] pytest
|
||||
- [] Other test (please specify)
|
||||
|
||||
# Test Configuration:
|
||||
- OS version:
|
||||
- Necessary Libraries (optional):
|
||||
|
||||
# Checklist:
|
||||
- [] My code follows the style guidelines of this project
|
||||
- [] I have performed a self-review of my code
|
||||
- [] I have commented my code, particularly in hard-to-understand areas
|
||||
- [] I have made corresponding changes to the documentation
|
||||
- [] My changes generate no new warnings
|
||||
- [] I have added tests that prove my fix is effective or that my feature works
|
||||
- [] New and existing unit tests pass locally with my changes
|
||||
- [] Any dependent changes have been merged and published in downstream modules
|
||||
@@ -0,0 +1,31 @@
|
||||
# This is GitHub Action for cross platform building
|
||||
name: build
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches: [master]
|
||||
|
||||
jobs:
|
||||
builds:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
python-version: ["3.12"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run build
|
||||
env:
|
||||
TOXENV: build
|
||||
run: |
|
||||
pip install -r requirements-tests.txt
|
||||
tox
|
||||
@@ -0,0 +1,41 @@
|
||||
# This is GitHub Action for linting and security check
|
||||
name: check
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches: [master]
|
||||
|
||||
concurrency:
|
||||
group: ${{github.workflow}}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
checks:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.12"]
|
||||
env: [security, flake8]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run check
|
||||
env:
|
||||
TOXENV: ${{ matrix.env }}
|
||||
run: |
|
||||
pip install -r requirements-tests.txt
|
||||
tox
|
||||
|
||||
pre-commit:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: pre-commit/action@v3.0.0
|
||||
@@ -0,0 +1,30 @@
|
||||
# This is GitHub Action for cross platform building
|
||||
name: docs
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches: [master]
|
||||
|
||||
jobs:
|
||||
builds:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.12"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Build docs
|
||||
env:
|
||||
TOXENV: docs
|
||||
run: |
|
||||
pip install -r requirements-tests.txt
|
||||
tox
|
||||
@@ -0,0 +1,43 @@
|
||||
# This is GitHub Action for tests
|
||||
name: test
|
||||
on:
|
||||
push:
|
||||
branches: [master]
|
||||
pull_request:
|
||||
branches: [master]
|
||||
|
||||
jobs:
|
||||
tests:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.12"]
|
||||
|
||||
services:
|
||||
redis:
|
||||
image: redis
|
||||
options: >-
|
||||
--health-cmd "redis-cli ping"
|
||||
--health-interval 10s
|
||||
--health-timeout 5s
|
||||
--health-retries 5
|
||||
|
||||
container: python:${{ matrix.python-version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
|
||||
- name: Run pytest
|
||||
env:
|
||||
REDIS_HOST: redis
|
||||
TOXENV: pytest
|
||||
TOX_TESTENV_PASSENV: REDIS_HOST
|
||||
run: |
|
||||
pip install -r requirements-tests.txt
|
||||
tox
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user