mirror of
https://github.com/xishandong/crawlProject.git
synced 2024-11-25 16:34:42 +08:00
第一次上传
This commit is contained in:
commit
2c9d84a149
94
README.md
Normal file
94
README.md
Normal file
@ -0,0 +1,94 @@
|
||||
# 爬虫项目实战
|
||||
|
||||
## 说明
|
||||
|
||||
所有项目均为作者练手分享项目,如遇侵权请联系删除,仅作学习分享,不能进行任何商业活动。
|
||||
|
||||
由于程序完成的时间问题,部分项目可能无法复用。
|
||||
|
||||
练习笔记见note.txt
|
||||
|
||||
此项目将持续更新
|
||||
|
||||
## 基础篇
|
||||
|
||||
### request篇
|
||||
|
||||
1. 第一个爬虫程序,百度网页
|
||||
2. 初始反爬-ua
|
||||
3. 认识post请求-- 百度翻译
|
||||
4. 豆瓣电影
|
||||
5. 肯德基位置查询
|
||||
|
||||
### 解析html以及正则篇
|
||||
|
||||
1. 获取fakeua -- lxml解析
|
||||
2. 4k图片爬取 -- lxml以及解决编码错误问题
|
||||
3. 58 -- lxml以及分页爬取
|
||||
4. bs案例
|
||||
5. bs基础
|
||||
6. xpath解析
|
||||
7. xpath基础
|
||||
8. 正则练习
|
||||
9. 正则基础
|
||||
10. 简历爬取
|
||||
|
||||
### selenium
|
||||
|
||||
1. 12306模拟登录
|
||||
2. damai网
|
||||
3. 基础自动操作
|
||||
4. 模拟登陆
|
||||
5. 动作链和ifream处理
|
||||
6. 无头浏览器和反检测
|
||||
|
||||
### scrapy篇
|
||||
|
||||
1. bossjob一级页面爬取
|
||||
2. 双色球
|
||||
3. 图片
|
||||
4. 阳光政策
|
||||
5. yi车数据爬取 -- 带有js逆向,不过是入门级,以及大批量json数据解析
|
||||
6. 校花网
|
||||
7. 网易新闻
|
||||
8. 17k小说爬取
|
||||
|
||||
### 高性能异步爬虫
|
||||
|
||||
1. 认识flask
|
||||
2. meinv图片批量爬取
|
||||
3. 明星图片爬取
|
||||
4. 多任务协程
|
||||
5. 线程池基础
|
||||
6. 线程池应用
|
||||
|
||||
### 综合案例
|
||||
|
||||
1. 某视频网站 --> m3u8视频下载,解决带密钥以及不带密钥情况,m3u8入门级别以及多线程下载
|
||||
2. ins爬虫,对于页面参数提取以及解析json文件
|
||||
3. 语言爬虫,利用网络将文本转为语言,支持中英韩三国语言
|
||||
4. 验证码相关 -- 某诗文网登录以及图片验证码解决 --- ddddocr
|
||||
|
||||
## 进阶篇
|
||||
|
||||
### js逆向专题
|
||||
|
||||
***
|
||||
|
||||
#### 请求头或响应数据加密
|
||||
|
||||
1. 某天气网站---> 动态js 动态key 动态参数 反debug
|
||||
2. 某足球网站 --> 请求体多重加密,加密位置难定位
|
||||
3. youdao翻译
|
||||
4. fjs公共交易 --> 混淆参数加密
|
||||
5. wangyiyun音乐 --> 实现全站数据爬取
|
||||
6. 娱乐指数 --> 基础入门
|
||||
|
||||
#### 环境检测
|
||||
|
||||
|
||||
|
||||
#### wasm加密
|
||||
|
||||
1. 某航空 --> wasm操作内容实现加密解密 阿里系v2检测 阿里系v3检测(待解决)
|
||||
|
239
js/wasm/air/Flight.py
Normal file
239
js/wasm/air/Flight.py
Normal file
@ -0,0 +1,239 @@
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from urllib.parse import quote
|
||||
|
||||
import execjs
|
||||
import prettytable as pt
|
||||
import requests
|
||||
|
||||
|
||||
class CEAir:
|
||||
def __init__(self):
|
||||
self.headers = {
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Origin': 'https://m.ceair.com',
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
|
||||
}
|
||||
self.cookies = {
|
||||
'_xid': 'CVUbYl9lz3HFU2na2mZviTHeQM%2BaLh%2FhZbEQ2Axq1MA%3D',
|
||||
'_fmdata': 'uD5xda4HKJuu34L%2BVFA7yz9OQ7lR4yI6hmuL2aRyRiYEBkNHudAH0OBn7047MefSAP4CBQbxadfirurKjXlEhA%3D%3D',
|
||||
'acw_tc': 'ac11000116822592388965513e00ce89a852f5d79237d70f416ffeb9d66973'
|
||||
}
|
||||
self.session = requests.Session()
|
||||
self.flag = 1
|
||||
|
||||
def ajax_request(self, *, url, json_data) -> json:
|
||||
"""
|
||||
# 发送请求
|
||||
:param url: api接口
|
||||
:param json_data: 表单信息
|
||||
:return: json数据
|
||||
"""
|
||||
resp = self.session.post(url=url, json=json_data, headers=self.headers, cookies=self.cookies)
|
||||
try:
|
||||
data = resp.json()['res']
|
||||
ctx = execjs.compile(open('./demo.js', 'r', encoding='utf-8').read()).call('decrypto', data)
|
||||
return json.loads(ctx)
|
||||
except requests.exceptions.JSONDecodeError:
|
||||
# 处理acw_sc
|
||||
self.cookie_update(resp.text)
|
||||
print('cookies更新完成')
|
||||
return self.ajax_request(url=url, json_data=json_data)
|
||||
|
||||
def get_flight(self, arr, dep, date):
|
||||
"""
|
||||
:param arr: 到达城市
|
||||
:param dep: 出发城市
|
||||
:param date: 出发时间
|
||||
:return: 返回航班信息
|
||||
"""
|
||||
data = {
|
||||
"tripType": 0, "depCode": self.get_city_code(dep), "arrCode": self.get_city_code(arr), "dt": "1", "at": "1",
|
||||
"depN": dep, "arrN": arr,
|
||||
"flightDate": date, "carryChd": False, "carryInf": False, "productType": "CASH", "curIndex": 0
|
||||
}
|
||||
url = quote(json.dumps(data, separators=(',', ':')))
|
||||
url = 'https://m.ceair.com/mapp/reserve/flightList?newParam=' + url
|
||||
self.headers.update({
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/json',
|
||||
'M-CEAIR-ENCRYPTED': 'true',
|
||||
'Origin': 'https://m.ceair.com',
|
||||
'Pragma': 'no-cache',
|
||||
'Referer': url,
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
|
||||
'X-CEAIR-OS': 'M',
|
||||
'app_token_key': '',
|
||||
'sec-ch-ua': '"Chromium";v="112", "Google Chrome";v="112", "Not:A-Brand";v="99"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'transactionId': '05202304231034048094',
|
||||
})
|
||||
json_data = {
|
||||
"currentQueryType": "FLIGHT_LIST", "currentSegIndex": 0, "carryChd": False, "carryInf": False,
|
||||
"productCodes": [],
|
||||
"selectedRoutes": [], "productType": "CASH",
|
||||
"routes": [{"arrCode": data['arrCode'], "depCode": data['depCode'], "flightDate": data['flightDate'],
|
||||
"arrCodeType": data['at'], "depCodeType": data['dt'], "depCityName": data['depN'],
|
||||
"arrCityName": data['arrN'], "segIndex": 0}],
|
||||
"tripType": "OW", "cabinGrade": "", "memberLabel": "", "salesChannel": "7701", "moduleX": "mShopping",
|
||||
"os": "M",
|
||||
"appVersion": "99.0.0", "transactionId": "05202304231034048094"
|
||||
}
|
||||
a = json.dumps(json_data, separators=(',', ':'))
|
||||
enc = execjs.compile(open('./demo.js', 'r', encoding='utf-8').read()).call('encrypt', a)
|
||||
json_data = {
|
||||
'req': enc
|
||||
}
|
||||
resp = self.ajax_request(url='https://m.ceair.com/m-base/sale/shopping', json_data=json_data)
|
||||
data = resp['data'].get('flights') if resp['data'] else None
|
||||
if data:
|
||||
self.print_flights(list(self.process_json(data)))
|
||||
else:
|
||||
print('没有这一天的航班信息!!或者输入了国家')
|
||||
|
||||
def cookie_update(self, html):
|
||||
"""
|
||||
处理acw_sc的cookie更新
|
||||
:param html: 网页源码
|
||||
:return: None
|
||||
"""
|
||||
pattern1 = re.compile(r'.*?arg1=\'(.*?)\';')
|
||||
pattern2 = re.compile(r'.*?setCookie\(\"(.*?)\".*?,.*?x\)')
|
||||
arg1 = pattern1.findall(html)
|
||||
name = pattern2.findall(html)
|
||||
if name and arg1:
|
||||
print('====开始处理acw_sc_v2====')
|
||||
self.cookies[name[0]] = self.acw_sc_v2(arg1[0])
|
||||
print('acw_sc_v2 =', self.cookies[name[0]])
|
||||
print('====结束处理acw_sc_v2====')
|
||||
else:
|
||||
item = self.acw_sc_v3(html)
|
||||
self.cookies['acw_tc'] = item[0]
|
||||
self.cookies['acw_sc__v3'] = item[1]
|
||||
|
||||
@staticmethod
|
||||
def acw_sc_v2(arg):
|
||||
"""
|
||||
处理acw_sc_v2
|
||||
:param arg: 网页中获取到的实时参数
|
||||
:return: acw_sc_v2的生成值
|
||||
"""
|
||||
return execjs.compile(open('./demo.js', 'r', encoding='utf-8').read()).call('getCookie', arg)
|
||||
|
||||
@staticmethod
|
||||
def acw_sc_v3(html):
|
||||
print('====开始处理滑块====')
|
||||
with open('./slide.html', 'w', encoding='utf-8') as fp:
|
||||
fp.write(html)
|
||||
acw_tc = input('acw_tc: ')
|
||||
acw_sc__v3: str = input('acw_sc__v3: ')
|
||||
print('====结束处理滑块====')
|
||||
return acw_tc, acw_sc__v3
|
||||
|
||||
@staticmethod
|
||||
def process_json(flights):
|
||||
for _flight in flights:
|
||||
flightNoGroup = _flight['flightNoGroup']
|
||||
depTime = _flight['depTime']
|
||||
depDate = _flight['depDate']
|
||||
depWeek = _flight['depWeek']
|
||||
arrTime = _flight['arrTime']
|
||||
arrDate = _flight['arrDate']
|
||||
arrWeek = _flight['arrWeek']
|
||||
depAirportName = _flight['depCityName'] + _flight['depAirportName'] + _flight['depTerminal']
|
||||
arrAirportName = _flight['arrCityName'] + _flight['arrAirportName'] + _flight['arrTerminal']
|
||||
transferStopInfos = '\n'.join(
|
||||
[info['typeText'] + ',' + info['cityName'] + ',' + info['stopTime'] for info in
|
||||
_flight['transferStopInfos']])
|
||||
flightServices = '\n'.join(
|
||||
[f"{info['flightNoGroup']}, {info['meal']}" for info in _flight['flightServices'] if info])
|
||||
fares = '\n'.join(
|
||||
[f'{info["baseCabinCodeText"]} : {info["salePrice"]}' for info in _flight['fares'] if info])
|
||||
yield {
|
||||
'flightNoGroup': flightNoGroup,
|
||||
'depDate': depTime + ' ' + depDate + ' ' + depWeek,
|
||||
'arrDate': arrTime + ' ' + arrDate + ' ' + arrWeek,
|
||||
'depAirportName': depAirportName,
|
||||
'arrAirportName': arrAirportName,
|
||||
'transferStopInfos': transferStopInfos,
|
||||
'flightServices': flightServices,
|
||||
'fares': fares
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def print_flights(items):
|
||||
tb = pt.PrettyTable()
|
||||
tb.field_names = ['航班号', '出发时间', '到达时间', '出发机场', '到达机场', '中转信息', '是否含餐食', '价格']
|
||||
tb.align = 'c'
|
||||
# 填充宽度
|
||||
tb.padding_width = 12
|
||||
for item in items:
|
||||
tb.add_row([item[i] for i in item])
|
||||
print(tb)
|
||||
|
||||
@staticmethod
|
||||
def get_city_code(city_name):
|
||||
"""
|
||||
获取城市编码
|
||||
:param city_name: string
|
||||
:return: city code
|
||||
"""
|
||||
while True:
|
||||
try:
|
||||
json_data = {
|
||||
'methodName': 'searchairport',
|
||||
'IsHighLight': True,
|
||||
'Keyword': city_name,
|
||||
'comeFrom': 'CEAIR_M',
|
||||
'CountryType': None,
|
||||
'salesChannel': '7701',
|
||||
'moduleX': 'mShopping',
|
||||
'os': 'M',
|
||||
'appVersion': '99.0.0',
|
||||
'transactionId': '0520230421151319554',
|
||||
}
|
||||
headers = {
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||
'Content-Type': 'application/json',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
|
||||
}
|
||||
resp = \
|
||||
requests.post('https://m.ceair.com/m-base/sale/getBasicData', headers=headers,
|
||||
json=json_data).json()[
|
||||
'data']['Data']['CityList']
|
||||
if resp:
|
||||
print(city_name + '---' + resp[0]['CityCode'])
|
||||
return resp[0]['CityCode']
|
||||
else:
|
||||
return None
|
||||
except:
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def check_value(value):
|
||||
if not value:
|
||||
return 'import sys\nprint("您的输入有误!退出程序...")\nsys.exit(1)'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
flight = CEAir()
|
||||
while True:
|
||||
arr = input('输入到达城市: ')
|
||||
# exec(check_value(arr))
|
||||
dep = input('输入出发城市: ')
|
||||
# exec(check_value(dep))
|
||||
date = input('输入出发时间: ')
|
||||
# exec(check_value(date))
|
||||
flight.get_flight(arr=arr, dep=dep, date=date)
|
10602
js/wasm/air/ddd.js
Normal file
10602
js/wasm/air/ddd.js
Normal file
File diff suppressed because one or more lines are too long
191
js/wasm/air/demo.js
Normal file
191
js/wasm/air/demo.js
Normal file
@ -0,0 +1,191 @@
|
||||
const Module = require('./ddd.js')
|
||||
|
||||
var res = 'yX67CAY2RrMSJ1TxneWBANxXBK5wL6Rvk2bDRa+DKUspUee9v69x1s3TH1rv8tP4cJl5aQImurVCCHIQM4kz9xljjBV922sd1uSCv9xjnAKFy0lD9G2yVHb3kXpNLrB0eu7ITdyjo0Dxa0STnuTci4Bhci2TJ5dpsC20pGvgCeravpF32+Y+HnKZHjQG9/4fuCyODs6yYY42eAo7RP78abnihlRmIxWVFbMTb/6SYtqEycGkUtWyB1bTrXl+lpMXlfOMllQj7nUwNaS3DKNL+c2FPxVrcpbwdHtqm2icFIJA3UFFOUn6VrucV2oLNgbWMr2rKK75fSQu+nvLIoVCbN0hU4U2ccgAXVS2p7ZrUVa3smdoTBj+AUVBWgTBl70du36fCmHPdjUrpoI4v0+sLtkOTrGhGKs8LX6Tu8Gd9mNAcLYpdZmgA2ndgtTlF7Up8lYJBsFKLrKurO0EVWqcWQ4rZSoesJ0bwXmIzikqq04xcTM61oe7VAKBd33KZXh0FJsP/rRMwtRFXuvvvcnfAGAhvfRWh+pzSdPULo3s0mtTbiKM5P3rabCzimEAGCFbKg8cCfiO+PakuqZlkxWj+MdAWV58xfFksTKSxdhzNeQ6WzKn4g/gZzxJ0mM2nrxQ'
|
||||
|
||||
var input = '{"currentQueryType":"FLIGHT_LIST","currentSegIndex":0,"carryChd":false,"carryInf":false,"productCodes":[],"selectedRoutes":[],"productType":"CASH","routes":[{"arrCode":"BJS","depCode":"SHA","flightDate":"20230424","arrCodeType":"1","depCodeType":"1","depCityName":"上海","arrCityName":"北京","segIndex":0}],"tripType":"OW","cabinGrade":"","memberLabel":"","salesChannel":"7701","moduleX":"mShopping","os":"M","appVersion":"99.0.0","transactionId":"0520230421133633605"}'
|
||||
|
||||
var arg1 = '1BD9F59BC7D673529D64C4F4285144E6AA4B3127';
|
||||
|
||||
var k = [121, 96, 7, 103, 57, 95, 61, 124, 121, 96, 7, 103, 57, 95, 61, 124]
|
||||
function getCookie(arg1) {
|
||||
var _0x5e8b26 = '3000176000856006061501533003690027800375'
|
||||
String['prototype']['hexXor'] = function (_0x4e08d8) {
|
||||
var _0x5a5d3b = '';
|
||||
for (var _0xe89588 = 0x0; _0xe89588 < this['length'] && _0xe89588 < _0x4e08d8['length']; _0xe89588 += 0x2) {
|
||||
var _0x401af1 = parseInt(this['slice'](_0xe89588, _0xe89588 + 0x2), 0x10);
|
||||
var _0x105f59 = parseInt(_0x4e08d8['slice'](_0xe89588, _0xe89588 + 0x2), 0x10);
|
||||
var _0x189e2c = (_0x401af1 ^ _0x105f59)['toString'](0x10);
|
||||
if (_0x189e2c['length'] === 0x1) {
|
||||
_0x189e2c = '\x30' + _0x189e2c;
|
||||
}
|
||||
_0x5a5d3b += _0x189e2c;
|
||||
}
|
||||
return _0x5a5d3b;
|
||||
}
|
||||
|
||||
String['prototype']['unsbox'] = function () {
|
||||
var _0x4b082b = [0xf, 0x23, 0x1d, 0x18, 0x21, 0x10, 0x1, 0x26, 0xa, 0x9, 0x13, 0x1f, 0x28, 0x1b, 0x16, 0x17, 0x19, 0xd, 0x6, 0xb, 0x27, 0x12, 0x14, 0x8, 0xe, 0x15, 0x20, 0x1a, 0x2, 0x1e, 0x7, 0x4, 0x11, 0x5, 0x3, 0x1c, 0x22, 0x25, 0xc, 0x24];
|
||||
var _0x4da0dc = [];
|
||||
var _0x12605e = '';
|
||||
for (var _0x20a7bf = 0x0; _0x20a7bf < this['\x6c\x65\x6e\x67\x74\x68']; _0x20a7bf++) {
|
||||
var _0x385ee3 = this[_0x20a7bf];
|
||||
for (var _0x217721 = 0x0; _0x217721 < _0x4b082b['length']; _0x217721++) {
|
||||
if (_0x4b082b[_0x217721] === _0x20a7bf + 0x1) {
|
||||
_0x4da0dc[_0x217721] = _0x385ee3;
|
||||
}
|
||||
}
|
||||
}
|
||||
_0x12605e = _0x4da0dc['\x6a\x6f\x69\x6e']('');
|
||||
return _0x12605e;
|
||||
};
|
||||
var _0x23a392 = arg1['unsbox']();
|
||||
arg2 = _0x23a392['hexXor'](_0x5e8b26);
|
||||
return arg2
|
||||
}
|
||||
function decrypto(data) {
|
||||
var sss = wbsk_AES_cbc_decrypt_base64(data, k)
|
||||
a = JSON.parse(sss)
|
||||
return sss
|
||||
}
|
||||
function encrypt(data) {
|
||||
return wbsk_AES_cbc_encrypt_base64(data, k)
|
||||
}
|
||||
|
||||
console.log(decrypto(res));
|
||||
console.log(encrypt(input))
|
||||
console.log(getCookie(arg1))
|
||||
|
||||
function wbsk_AES_cbc_decrypt_base64(input, iv) {
|
||||
var tmp_input = base64ToArrayBuffer(input)
|
||||
var result = wbsk_AES_cbc_decrypt(tmp_input, tmp_input.length, iv, iv.length);
|
||||
return byteToString(result);
|
||||
}
|
||||
|
||||
function wbsk_AES_cbc_decrypt(input, inlen, iv, ivlen) {
|
||||
var tt = [];
|
||||
|
||||
var len = inlen;
|
||||
var outadd = Module._malloc(len);
|
||||
var output = Module.HEAP8.subarray(outadd, outadd + len);
|
||||
|
||||
var lenadd = Module._malloc(4);
|
||||
var lenput = Module.HEAP32.subarray(lenadd / 4, lenadd / 4 + 1);
|
||||
lenput[0] = len;
|
||||
|
||||
|
||||
var CBCDecrypt = Module.cwrap('wbsk_AES_cbc_decrypt', 'number', ['array', 'number', 'number', 'number', 'array', 'number'])
|
||||
var r = CBCDecrypt(new Uint8Array(input), inlen, outadd, lenadd, new Uint8Array(iv), ivlen);
|
||||
var olen = lenput[0];
|
||||
|
||||
for (var key in output) {
|
||||
tt.push(output[key]);
|
||||
}
|
||||
|
||||
Module._free(outadd);
|
||||
Module._free(lenadd);
|
||||
|
||||
return (tt.slice(0, olen));
|
||||
|
||||
}
|
||||
|
||||
function wbsk_AES_cbc_encrypt_base64(input, iv) {
|
||||
var tmp_input = stringToByte(input);
|
||||
var result = wbsk_AES_cbc_encrypt(tmp_input, tmp_input.length, iv, iv.length);
|
||||
return arrayBufferToBase64(result);
|
||||
}
|
||||
|
||||
function wbsk_AES_cbc_encrypt(input, inlen, iv, ivlen) {
|
||||
var tt = [];
|
||||
|
||||
var len = (Math.floor(inlen / 16) + 1) * 16;
|
||||
var outadd = Module._malloc(len);
|
||||
var output = Module.HEAP8.subarray(outadd, outadd + len);
|
||||
|
||||
var lenadd = Module._malloc(4);
|
||||
var lenput = Module.HEAP32.subarray(lenadd / 4, lenadd / 4 + 1);
|
||||
lenput[0] = len;
|
||||
|
||||
|
||||
var CBCEncrypt = Module.cwrap('wbsk_AES_cbc_encrypt', 'number', ['array', 'number', 'number', 'number', 'array', 'number'])
|
||||
var r = CBCEncrypt(new Uint8Array(input), inlen, outadd, lenadd, new Uint8Array(iv), ivlen);
|
||||
var olen = lenput[0];
|
||||
|
||||
for (var key in output) {
|
||||
tt.push(output[key]);
|
||||
}
|
||||
|
||||
Module._free(outadd);
|
||||
Module._free(lenadd);
|
||||
|
||||
return (tt.slice(0, olen));
|
||||
|
||||
}
|
||||
|
||||
function stringToByte(str) {
|
||||
var bytes = new Array();
|
||||
var len, c;
|
||||
len = str.length;
|
||||
for (var i = 0; i < len; i++) {
|
||||
c = str.charCodeAt(i);
|
||||
if (c >= 0x010000 && c <= 0x10FFFF) {
|
||||
bytes.push(((c >> 18) & 0x07) | 0xF0);
|
||||
bytes.push(((c >> 12) & 0x3F) | 0x80);
|
||||
bytes.push(((c >> 6) & 0x3F) | 0x80);
|
||||
bytes.push((c & 0x3F) | 0x80);
|
||||
} else if (c >= 0x000800 && c <= 0x00FFFF) {
|
||||
bytes.push(((c >> 12) & 0x0F) | 0xE0);
|
||||
bytes.push(((c >> 6) & 0x3F) | 0x80);
|
||||
bytes.push((c & 0x3F) | 0x80);
|
||||
} else if (c >= 0x000080 && c <= 0x0007FF) {
|
||||
bytes.push(((c >> 6) & 0x1F) | 0xC0);
|
||||
bytes.push((c & 0x3F) | 0x80);
|
||||
} else {
|
||||
bytes.push(c & 0xFF);
|
||||
}
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
function byteToString(arr) {
|
||||
if (typeof arr === 'string') {
|
||||
return arr;
|
||||
}
|
||||
var str = '',
|
||||
_arr = arr;
|
||||
for (var i = 0; i < _arr.length; i++) {
|
||||
var one = (_arr[i] & 0xff).toString(2),
|
||||
v = one.match(/^1+?(?=0)/);
|
||||
if (v && one.length == 8) {
|
||||
var bytesLength = v[0].length;
|
||||
var store = (_arr[i] & 0xff).toString(2).slice(7 - bytesLength);
|
||||
for (var st = 1; st < bytesLength; st++) {
|
||||
store += (_arr[st + i] & 0xff).toString(2).slice(2);
|
||||
}
|
||||
str += String.fromCharCode(parseInt(store, 2));
|
||||
i += bytesLength - 1;
|
||||
} else {
|
||||
str += String.fromCharCode(_arr[i]);
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
function arrayBufferToBase64(buffer) {
|
||||
var binary = '';
|
||||
var bytes = new Uint8Array(buffer);
|
||||
var len = bytes.byteLength;
|
||||
for (var i = 0; i < len; i++) {
|
||||
binary += String.fromCharCode(bytes[i]);
|
||||
}
|
||||
return btoa(binary);
|
||||
}
|
||||
|
||||
function base64ToArrayBuffer(base64) {
|
||||
var binary_string = atob(base64);
|
||||
var len = binary_string.length;
|
||||
var bytes = new Uint8Array(len);
|
||||
for (var i = 0; i < len; i++) {
|
||||
bytes[i] = binary_string.charCodeAt(i);
|
||||
}
|
||||
return bytes;
|
||||
}
|
332
js/wasm/air/slide.html
Normal file
332
js/wasm/air/slide.html
Normal file
@ -0,0 +1,332 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>æ»å¨éªè¯é¡µé¢</title>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<style type="text/css">
|
||||
html, body, div, span, p{
|
||||
margin:0;
|
||||
padding:0;
|
||||
border:0;
|
||||
outline:0;
|
||||
font-size:100%;
|
||||
vertical-align:baseline;
|
||||
background:transparent;
|
||||
}
|
||||
body{
|
||||
background: #fff;
|
||||
}
|
||||
</style>
|
||||
<script type="text/javascript">
|
||||
if (window.console === undefined){
|
||||
console = {};
|
||||
console.log = function(){};
|
||||
};
|
||||
|
||||
window._waf_is_mobile = false;
|
||||
|
||||
(function(a) {
|
||||
if (/(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows ce|xda|xiino/i.test(a) || /1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i.test(a.substr(0, 4))){
|
||||
window._waf_is_mobile = true;
|
||||
}
|
||||
})(navigator.userAgent || navigator.vendor || window.opera);
|
||||
</script>
|
||||
<!-- æµ·å¤é¡µé¢å è½½æ¤js -->
|
||||
<!-- <script type="text/javascript" charset="utf-8" src="//aeis.alicdn.com/sd/ncpc/nc.js?t=2015052012"></script> -->
|
||||
</head>
|
||||
<body>
|
||||
<div id="PC" style="display: none">
|
||||
|
||||
<div class="contentbg">
|
||||
<div class="content">
|
||||
<div class="left"></div>
|
||||
<div class="right">
|
||||
<h1>访é®éªè¯</h1>
|
||||
<p>å«ç¦»å¼ï¼ä¸ºäºæ´å¥½ç访é®ä½éªï¼è¯·æ»å¨æ»åè¿è¡éªè¯ï¼éè¿åå³å¯ç»§ç»è®¿é®ç½é¡µ</p>
|
||||
<div id="nocaptcha" class="nc-container"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div id="_umfp" style="display:inline;width:1px;height:1px;overflow:hidden"></div>
|
||||
</div>
|
||||
<div id="H5" style="display: none;">
|
||||
<div class="waf-nc-h5-mask"></div>
|
||||
<div id="WAF_NC_H5_WRAPPER" class="waf-nc-h5-wrapper">
|
||||
<div class="waf-nc-h5-panel">
|
||||
<img class="waf-nc-h5-icon" src="//img.alicdn.com/imgextra/i1/O1CN01L12MaQ1ZwfYKk7Yrc_!!6000000003259-2-tps-900-594.png" alt="" height="132" width="200">
|
||||
<div class="waf-nc-h5-description">为äºæ´å¥½ç访é®ä½éªï¼è¯·æ»å¨æ»åè¿è¡éªè¯</div>
|
||||
</div>
|
||||
<div id="h5_nocaptcha" class="nc-container" data-nc-idx="1"></div>
|
||||
</div>
|
||||
</div>
|
||||
<div style="margin-left:20px" id="traceid">TraceID: 76b20fe716897324658686923e7d50</div>
|
||||
</div>
|
||||
</body>
|
||||
<script type="text/javascript">
|
||||
|
||||
var requestInfo = {
|
||||
type: 'POST', // 'GET' å 'POST'
|
||||
url: 'https://m.ceair.com/m-base/sale/shopping', // 'https://www.taobao.com/detail'
|
||||
args: '',
|
||||
data: '{"req": "yX67CAY2RrMSJ1TxneWBANxXBK5wL6Rvk2bDRa DKUspUee9v69x1s3TH1rv8tP4cJl5aQImurVCCHIQM4kz9xljjBV922sd1uSCv9xjnAKFy0lD9G2yVHb3kXpNLrB0eu7ITdyjo0Dxa0STnuTci4Bhci2TJ5dpsC20pGvgCeravpF32 Y HnKZHjQG9/4fuCyODs6yYY42eAo7RP78abnihlRmIxWVFbMTb/6SYtqEycGkUtWyB1bTrXl lpMXlfOMllQj7nUwNaS3DKNL b25tZGFQATiHpx X 5//zSrvAGMKDhqWPiGGOpETy2hjTWVdCSg30GiXLi1JVHPgb8XwWNx7P8vSQwRR5yWsOwOS4VlTS48Bn7uCRs25XhSkkMcPUzMh4CNScCvQsK4b1ecjup7j7S/sk8iGwSQjmXoVplQthl/CT35Rrd0LEHtn6xESx436EYQJzafDdweVi7CbFlFr2tzykvHlNsj/iYxC2R6PuGQYhcTGkOXQ36MT1oeSdMHuDYDicdwUQiTmCUhxg2vAHvtSYmOXfnLESxBoKefeLXy0IDmAiHrK8riamcKKMCxQkj3S29GKKM/a/kH3nu8t7KlM55EaP/B7cExqbwS2WfAesDUlxbCaivh5sar3KLxlYEeS2rs4ltFKQ=="}', //a=1&b=2&c=3...
|
||||
token: '1032574b-c24f-4eff-aba3-cf9f3fd317fc',
|
||||
refer: 'ejy+YRfrjq63pIq12Exo1rDW6sI=',
|
||||
headers: {"Content-Type":"application/json"},
|
||||
};
|
||||
|
||||
|
||||
function insertScripts(){
|
||||
var script = document.createElement('script');
|
||||
var time = new Date();
|
||||
var head = document.head || document.getElementsByTagName('head')[0];
|
||||
script.src = '//g.alicdn.com/AWSC/AWSC/awsc.js?t=' + (time.getFullYear()+(time.getMonth()+1)+time.getDate()+time.getHours());
|
||||
if ("onload" in script) {
|
||||
script.onload = function(){
|
||||
initNC();
|
||||
}
|
||||
} else {
|
||||
script.onreadystatechange = function() {
|
||||
if (/loaded|complete/.test(script.readyState)) {
|
||||
initNC();
|
||||
}
|
||||
};
|
||||
}
|
||||
head.appendChild(script);
|
||||
//å è½½åèªçcss
|
||||
var style = document.createElement('style');
|
||||
style.type = "text/css";
|
||||
var css = '';
|
||||
if (_waf_is_mobile){
|
||||
css = '#waf_nc_h5_block{position:fixed;_position:absolute;width:100%;height:100%;top:0;bottom:0;left:0;z-index:9999}.waf-nc-h5-mask{background:#777;opacity:.5;filter:alpha(opacity=50);width:100%;height:100%}.waf-nc-h5-wrapper{width:94%;position:absolute;top:20%;left:50%;margin-top:-20%;margin-left:-47%;padding:5% 1%;background:#fff;border-radius:3px;box-sizing:border-box}.waf-nc-h5-panel{width:100%;text-align:center}.waf-nc-h5-icon{margin:0 auto}.waf-nc-h5-description{margin-top:40px;font-size:14px;color:#595959}#traceid{text-align:center;margin-top:500px!important;font-size:12px;color:#999}.nc-container{margin-top:30px}.nc_bg{background:#fff3ea!important}.btn_slide{background:#ff6a00!important;border:0!important;color:#fff!important;width:48px!important;height:48px!important;font-size:30px;font-weight:900!important;line-height:48px!important}.btn_ok{background:#ff6a00!important;border:0!important;color:#fff!important;width:48px!important;height:48px!important;font-size:30px;font-weight:900!important;line-height:48px!important}.nc_scale{background:rgba(241,241,242,1)!important;height:48px!important}.nc-lang-cnt{height:48px!important;margin-left:10px!important;line-height:48px!important;font-size:14px!important}.nc_1_nocaptcha{width:300px!important;height:48px!important;margin:auto!important;left:0!important;right:0!important}'
|
||||
}else{
|
||||
css = '.head{position:relative;height:70px;padding-left:25px;border-bottom:1px solid #ebecec}.content{width:1000px;min-height:250px;margin-top:164px!important;margin-left:auto;margin-right:auto}.left{width:300px;height:198px;float:left;background:url(https://img.alicdn.com/imgextra/i1/O1CN01L12MaQ1ZwfYKk7Yrc_!!6000000003259-2-tps-900-594.png) no-repeat;background-size:cover}.right{margin-left:250px;padding-left:140px}.contentbg{width:100%;min-height:250px}.right p{font-size:14px;color:#333;line-height:25px;height:25px;text-align:left}#nocaptcha{margin-top:40px;margin-left:5px;width:300px!important}#nocaptcha .nc-lang-cnt{color:#fff}#nocaptcha .clickCaptcha_text .nc-lang-cnt{color:#333}#traceid{margin-left:250px!important;padding-left:140px!important;font-size:14px;color:rgba(153,153,153,1)}.nc_bg{background:#fff3ea!important}.btn_slide{background:#ff6a00!important;border:0!important;color:#fff!important;width:48px!important;height:48px!important;font-size:30px;font-weight:900!important;line-height:48px!important}.btn_ok{background:#ff6a00!important;border:0!important;color:#fff!important;width:48px!important;height:48px!important;font-size:30px;font-weight:900!important;line-height:48px!important}.nc_scale{background:rgba(241,241,242,1)!important;height:48px!important}.nc-lang-cnt{height:48px!important;margin-left:10px!important;line-height:48px!important;font-size:14px!important}.nc_1_nocaptcha{width:300px!important;height:48px!important}'
|
||||
}
|
||||
try {
|
||||
style.appendChild(document.createTextNode(css));
|
||||
}catch(e){
|
||||
style.styleSheet.cssText = css;
|
||||
}
|
||||
var head = document.head || document.getElementsByTagName('head')[0];
|
||||
head.appendChild(style);
|
||||
}
|
||||
|
||||
insertScripts();
|
||||
|
||||
function parseURL(url) {
|
||||
|
||||
var search_index = url.indexOf('?'),
|
||||
hash_index = url.indexOf('#');
|
||||
|
||||
var base, search, hash;
|
||||
|
||||
try{
|
||||
if (search_index < 0 || (hash_index > -1 && search_index > hash_index)){
|
||||
if (hash_index < 0){
|
||||
base = url;
|
||||
search = '';
|
||||
hash = '';
|
||||
}else{
|
||||
base = url.slice(0, hash_index);
|
||||
search = '';
|
||||
hash = url.slice(hash_index, url.length);
|
||||
}
|
||||
|
||||
}else{
|
||||
if (hash_index < 0){
|
||||
base = url.slice(0, search_index);
|
||||
search = url.slice(search_index, url.length);
|
||||
hash = '';
|
||||
}else{
|
||||
base = url.slice(0, search_index);
|
||||
search = url.slice(search_index, hash_index);
|
||||
hash = url.slice(hash_index, url.length);
|
||||
}
|
||||
}
|
||||
}catch(e){
|
||||
base = url;
|
||||
search = '';
|
||||
hash = '';
|
||||
}
|
||||
|
||||
|
||||
return {
|
||||
base: base,
|
||||
search: search,
|
||||
hash: hash,
|
||||
original: url
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
function parseQuery(qstr) {
|
||||
if (qstr.charAt(0) != '?') {
|
||||
return {};
|
||||
}
|
||||
var query = {};
|
||||
var a = qstr.substr(1).split('&');
|
||||
for (var i = 0; i < a.length; i++) {
|
||||
var b = a[i].split('=');
|
||||
console.log(decodeURIComponent(b[0]))
|
||||
if (decodeURIComponent(b[0]) !== 'u_asec'){
|
||||
query[decodeURIComponent(b[0])] = decodeURIComponent(b[1] || '');
|
||||
}
|
||||
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
function addQuery(query, data) {
|
||||
var qdata = parseQuery(query);
|
||||
var rt = '?';
|
||||
for (var i in data) {
|
||||
qdata[i] = data[i];
|
||||
}
|
||||
for (var i in qdata) {
|
||||
rt += encodeURIComponent(i) + '=' + encodeURIComponent(qdata[i]) + '&';
|
||||
}
|
||||
rt = rt.substr(0 , rt.length - 1);
|
||||
return rt;
|
||||
}
|
||||
|
||||
function combineUrl(parsedUrl) {
|
||||
return parsedUrl.base + parsedUrl.search + parsedUrl.hash;
|
||||
|
||||
}
|
||||
|
||||
function parseFormQuery(qstr) {
|
||||
if (qstr.length === 0 || qstr.indexOf('=') < 0){
|
||||
return [];
|
||||
}
|
||||
|
||||
var formItems = [];
|
||||
var a = qstr.split('&');
|
||||
for (var i = 0; i < a.length; i++) {
|
||||
var b = a[i].split('=');
|
||||
var str = '<input type="hidden" name="' + b[0] + '" value="' + b[1] + '" />'
|
||||
formItems.push(str);
|
||||
}
|
||||
return formItems;
|
||||
}
|
||||
|
||||
function reform(data) {
|
||||
var form = document.createElement('form');
|
||||
var parsedUrl = parseURL(requestInfo.url);
|
||||
parsedUrl.search = addQuery(parsedUrl.search,data)
|
||||
var newUrl = combineUrl(parsedUrl);
|
||||
form.action = newUrl;
|
||||
form.method = "POST";
|
||||
form.innerHTML = parseFormQuery(requestInfo.data).join('');
|
||||
document.body.appendChild(form);
|
||||
form.submit();
|
||||
// document.body.appendChild(form);
|
||||
}
|
||||
|
||||
var NC_Opt = {
|
||||
renderTo: "nocaptcha",//渲æå°DOM ID
|
||||
appkey: "CF_APP_WAF", // åºç¨æ è¯
|
||||
scene: "register",
|
||||
trans: {"key1": "code100", "user": "default"},
|
||||
token: requestInfo.token,//umid token
|
||||
language: "cn",//è¯è¨å
ï¼é»è®¤ä¸æ
|
||||
isEnabled: true,
|
||||
times: 3,
|
||||
success: function (data) {
|
||||
document.getElementById("nc_1_n1z").innerHTML='â'
|
||||
document.getElementsByClassName('nc-lang-cnt')[0].innerHTML='<b style="margin-left:60px;-webkit-text-fill-color:#FF6A00 !important">éªè¯æåï¼</b>'
|
||||
|
||||
if (requestInfo.type === 'GET'){
|
||||
var d = {
|
||||
u_atoken: data.token,
|
||||
u_asession: data.sessionId,
|
||||
u_asig: data.sig,
|
||||
u_aref: requestInfo.refer
|
||||
};
|
||||
// location.href = requestInfo.url + addQuery(requestInfo.data, d);
|
||||
var parsedUrl = parseURL(requestInfo.url);
|
||||
parsedUrl.search = addQuery(parsedUrl.search,d)
|
||||
// location.href = combineUrl(parsedUrl);
|
||||
location.replace(combineUrl(parsedUrl));
|
||||
}else{
|
||||
var d = {
|
||||
u_atoken: data.token,
|
||||
u_asession: data.sessionId,
|
||||
u_asig: data.sig,
|
||||
u_aref: requestInfo.refer
|
||||
};
|
||||
reform(d);
|
||||
}
|
||||
},
|
||||
// æ»å¨éªè¯å¤±è´¥æ¶è§¦å该åè°åæ°ã
|
||||
fail: function (failCode) {
|
||||
window.console && console.log(s);
|
||||
},
|
||||
// éªè¯ç å è½½åºç°å¼å¸¸æ¶è§¦å该åè°åæ°ã
|
||||
error: function (errorCode) {
|
||||
window.console && console.log(errorCode)
|
||||
}
|
||||
};
|
||||
var NC_h5_Opt = {
|
||||
renderTo: "h5_nocaptcha",//渲æå°DOM ID
|
||||
appkey: "CF_APP_WAF", // åºç¨æ è¯
|
||||
scene: "register_h5",
|
||||
trans: {"key1": "code200", "user": "default"},
|
||||
token: requestInfo.token,//umid token
|
||||
language: "cn",//è¯è¨å
ï¼é»è®¤ä¸æ
|
||||
isEnabled: true,
|
||||
times: 3,
|
||||
success: function (data) {
|
||||
document.getElementById("nc_1_n1z").innerHTML='â'
|
||||
document.getElementsByClassName('nc-lang-cnt')[0].innerHTML='<b style="margin-left:60px;-webkit-text-fill-color:#FF6A00 !important">éªè¯æåï¼</b>'
|
||||
if (data.token === undefined) data.token = requestInfo.token;
|
||||
if (requestInfo.type === 'GET'){
|
||||
var d = {
|
||||
u_atoken: data.token,
|
||||
u_asession: data.sessionId,
|
||||
u_asig: data.sig,
|
||||
u_aref: requestInfo.refer
|
||||
};
|
||||
// location.href = requestInfo.url + addQuery(requestInfo.data, d);
|
||||
var parsedUrl = parseURL(requestInfo.url);
|
||||
parsedUrl.search = addQuery(parsedUrl.search,d)
|
||||
// location.href = combineUrl(parsedUrl);
|
||||
location.replace(combineUrl(parsedUrl));
|
||||
}else{
|
||||
var d = {
|
||||
u_atoken: data.token,
|
||||
u_asession: data.sessionId,
|
||||
u_asig: data.sig,
|
||||
u_aref: requestInfo.refer
|
||||
};
|
||||
reform(d);
|
||||
}
|
||||
},
|
||||
// æ»å¨éªè¯å¤±è´¥æ¶è§¦å该åè°åæ°ã
|
||||
fail: function (failCode) {
|
||||
window.console && console.log(s);
|
||||
},
|
||||
// éªè¯ç å è½½åºç°å¼å¸¸æ¶è§¦å该åè°åæ°ã
|
||||
error: function (errorCode) {
|
||||
window.console && console.log(errorCode)
|
||||
}
|
||||
};
|
||||
|
||||
function initNC() {
|
||||
if (window._waf_is_mobile){
|
||||
document.getElementById('H5').style.display = 'block';
|
||||
// NoCaptcha.init(NC_h5_Opt);
|
||||
// NoCaptcha.setEnabled(true);
|
||||
AWSC.use("nc", function (state, module) {
|
||||
window.nc = module.init(NC_h5_Opt);
|
||||
})
|
||||
}else{
|
||||
document.getElementById('PC').style.display = 'block';
|
||||
// var nc = new noCaptcha(NC_Opt);
|
||||
AWSC.use("nc", function (state, module) {
|
||||
window.nc = module.init(NC_Opt);
|
||||
})
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</html>
|
||||
|
18
js/请求头请求体加密/fjs/demo.js
Normal file
18
js/请求头请求体加密/fjs/demo.js
Normal file
File diff suppressed because one or more lines are too long
48
js/请求头请求体加密/fjs/fjs.py
Normal file
48
js/请求头请求体加密/fjs/fjs.py
Normal file
@ -0,0 +1,48 @@
|
||||
import requests
|
||||
import execjs
|
||||
pageNum = 1
|
||||
# 控制请求的页数
|
||||
while pageNum < 2:
|
||||
# 准备js逆向出请求头和表单签名
|
||||
ts = int(execjs.compile(open('./sign.js', 'r', encoding='utf-8').read()).call('ts'))
|
||||
json_data = {
|
||||
'pageNo': pageNum,
|
||||
'pageSize': 40,
|
||||
'total': 5770,
|
||||
'AREACODE': '',
|
||||
'M_PROJECT_TYPE': '',
|
||||
'KIND': 'GCJS',
|
||||
'GGTYPE': '1',
|
||||
'PROTYPE': '',
|
||||
'timeType': '6',
|
||||
'BeginTime': '2022-07-18 00:00:00',
|
||||
'EndTime': '2023-01-18 23:59:59',
|
||||
'createTime': [],
|
||||
'ts': ts,
|
||||
}
|
||||
sign = str(execjs.compile(open('./sign.js', 'r', encoding='utf-8').read()).call('sign', json_data))
|
||||
headers = {
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7',
|
||||
'Connection': 'keep-alive',
|
||||
'Content-Type': 'application/json;charset=UTF-8',
|
||||
'Origin': 'https://ggzyfw.fujian.gov.cn',
|
||||
'Referer': 'https://ggzyfw.fujian.gov.cn/business/list/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
|
||||
'portal-sign': sign,
|
||||
'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
|
||||
# 发起请求
|
||||
response = requests.post('https://ggzyfw.fujian.gov.cn/FwPortalApi/Trade/TradeInfo', headers=headers, json=json_data).json()
|
||||
data = response['Data']
|
||||
|
||||
# 解密文件
|
||||
ctx = execjs.compile(open('./demo.js', 'r', encoding='utf-8').read()).call('decrypt', data)
|
||||
print(ctx)
|
||||
pageNum += 1
|
58
js/请求头请求体加密/fjs/sign.js
Normal file
58
js/请求头请求体加密/fjs/sign.js
Normal file
@ -0,0 +1,58 @@
|
||||
const Crypto = require('crypto')
|
||||
|
||||
var d = "3637CB36B2E54A72A7002978D0506CDF"
|
||||
|
||||
function sign(t) {
|
||||
for (var e in t)
|
||||
"" !== t[e] && void 0 !== t[e] || delete t[e];
|
||||
var n = d + l(t);
|
||||
return s(n)
|
||||
}
|
||||
function s(e) {
|
||||
return md5(e)
|
||||
}
|
||||
|
||||
function l(t) {
|
||||
for (var e = Object.keys(t).sort(u), n = "", a = 0; a < e.length; a++)
|
||||
if (void 0 !== t[e[a]])
|
||||
if (t[e[a]] && t[e[a]]instanceof Object || t[e[a]]instanceof Array) {
|
||||
var i = JSON.stringify(t[e[a]]);
|
||||
n += e[a] + i
|
||||
} else
|
||||
n += e[a] + t[e[a]];
|
||||
return n
|
||||
}
|
||||
|
||||
// 创建标准md5算法
|
||||
function md5(text){
|
||||
return Crypto.createHash('md5').update(text).digest('hex')
|
||||
}
|
||||
function u(t, e) {
|
||||
return t.toString().toUpperCase() > e.toString().toUpperCase() ? 1 : t.toString().toUpperCase() == e.toString().toUpperCase() ? 0 : -1
|
||||
}
|
||||
|
||||
// 测试数据
|
||||
data = {
|
||||
'pageNo': 1,
|
||||
'pageSize': 20,
|
||||
'total': 0,
|
||||
'AREACODE': '',
|
||||
'M_PROJECT_TYPE': '',
|
||||
'KIND': 'GCJS',
|
||||
'GGTYPE': '1',
|
||||
'PROTYPE': '',
|
||||
'timeType': '6',
|
||||
'BeginTime': '2022-07-18 00:00:00',
|
||||
'EndTime': '2023-01-18 23:59:59',
|
||||
'createTime': [],
|
||||
'ts': ts(),
|
||||
}
|
||||
|
||||
// 生成时间戳
|
||||
function ts(){
|
||||
return (new Date).getTime()
|
||||
}
|
||||
|
||||
console.log(ts())
|
||||
console.log(sign(data))
|
||||
|
50
js/请求头请求体加密/football/599_info.py
Normal file
50
js/请求头请求体加密/football/599_info.py
Normal file
@ -0,0 +1,50 @@
|
||||
import requests
|
||||
import execjs
|
||||
import time
|
||||
|
||||
headers = {
|
||||
'authority': 'api.599.com',
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9',
|
||||
'cache-control': 'no-cache',
|
||||
'origin': 'https://599.com',
|
||||
'pragma': 'no-cache',
|
||||
'referer': 'https://599.com/',
|
||||
'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-site',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
|
||||
}
|
||||
ts = int(time.time() * 1000)
|
||||
pre_params = {
|
||||
"appType": "3",
|
||||
"channelNumber": "GF1001",
|
||||
"comId": "8",
|
||||
"lang": "zh",
|
||||
"platform": "pc",
|
||||
"st": ts,
|
||||
"timeZone": "8",
|
||||
"version": "671",
|
||||
"versionCode": "671"
|
||||
}
|
||||
sign = execjs.compile(open('./js/sss.js', 'r', encoding='utf-8').read()).call('Z', '/footballapi/core/matchlist/v2/immediate', pre_params)
|
||||
params = {
|
||||
'comId': '8',
|
||||
'lang': 'zh',
|
||||
'timeZone': '8',
|
||||
'version': '671',
|
||||
'versionCode': '671',
|
||||
'channelNumber': 'GF1001',
|
||||
'platform': 'pc',
|
||||
'appType': '3',
|
||||
'st': str(ts),
|
||||
'sign': sign,
|
||||
}
|
||||
response = requests.get('https://api.599.com/footballapi/core/matchlist/v2/immediate', params=params, headers=headers)
|
||||
|
||||
data = response.json()['data']
|
||||
ctx = execjs.compile(open('./js/demo.js', 'r', encoding='utf-8').read()).call('decrypt', data)
|
||||
print(ctx)
|
2744
js/请求头请求体加密/football/js/demo.js
Normal file
2744
js/请求头请求体加密/football/js/demo.js
Normal file
File diff suppressed because one or more lines are too long
37
js/请求头请求体加密/football/js/sss.js
Normal file
37
js/请求头请求体加密/football/js/sss.js
Normal file
@ -0,0 +1,37 @@
|
||||
const crypto = require('crypto-js')
|
||||
|
||||
function md5(text){
|
||||
text = String(text)
|
||||
return crypto.MD5(text).toString()
|
||||
}
|
||||
|
||||
var e = '/footballapi/core/matchlist/v2/immediate'
|
||||
var t = {
|
||||
"appType": "3",
|
||||
"channelNumber": "GF1001",
|
||||
"comId": "8",
|
||||
"lang": "zh",
|
||||
"platform": "pc",
|
||||
"st": 1678167676726,
|
||||
"timeZone": "8",
|
||||
"version": "671",
|
||||
"versionCode": "671"
|
||||
}
|
||||
|
||||
function l() {
|
||||
return e
|
||||
}
|
||||
function Z(e, t) {
|
||||
var n = {}
|
||||
, o = e;
|
||||
for (var r in Object.keys(t).sort().map((function(e) {
|
||||
n[e] = t[e]
|
||||
}
|
||||
)),
|
||||
n)
|
||||
o = o + r + n[r];
|
||||
return o += md5("wjj"),
|
||||
md5(o).toLowerCase() + "99"
|
||||
}
|
||||
|
||||
console.log(Z(e, t));
|
209
js/请求头请求体加密/weather/getParams.js
Normal file
209
js/请求头请求体加密/weather/getParams.js
Normal file
@ -0,0 +1,209 @@
|
||||
const CryptoJS = require('crypto-js')
|
||||
|
||||
|
||||
const askEgGnDlalR = "ajmEmqsokwfpfWv8";//AESkey,可自定义
|
||||
const asi5jI3cvFQI = "bH7Ppp3nOF5k5PCt";//密钥偏移量IV,可自定义
|
||||
const ackbiFPNKGDI = "dE1E6BPpAF5gwUEN";//AESkey,可自定义
|
||||
const aci1c2jlP3KO = "fOf1MjRiLdsmtenp";//密钥偏移量IV,可自定义
|
||||
const dsk80WzdMTMv = "h8ByxcqtKbzeqa3q";//DESkey,可自定义
|
||||
const dsiCs366A1HA = "xJqk2s8ZDjgDHbBN";//密钥偏移量IV,可自定义
|
||||
const dcku3b1jsXMn = "oHwOHptKV6TukKXJ";//DESkey,可自定义
|
||||
const dciQs6k7qfCc = "prKerE8BHOzo9jvI";//密钥偏移量IV,可自定义
|
||||
|
||||
|
||||
function md5(text) {
|
||||
text = String(text)
|
||||
return CryptoJS.MD5(text).toString()
|
||||
}
|
||||
|
||||
var BASE64 = {
|
||||
encrypt: function (text) {
|
||||
return CryptoJS.enc.Base64.stringify(CryptoJS.enc.Utf8.parse(text));
|
||||
},
|
||||
decrypt: function (text) {
|
||||
return CryptoJS.enc.Base64.parse(text).toString(CryptoJS.enc.Utf8);
|
||||
}
|
||||
};
|
||||
|
||||
var DES = {
|
||||
encrypt: function (text, key, iv) {
|
||||
var secretkey = (CryptoJS.MD5(key).toString()).substr(0, 16);
|
||||
var secretiv = (CryptoJS.MD5(iv).toString()).substr(24, 8);
|
||||
secretkey = CryptoJS.enc.Utf8.parse(secretkey);
|
||||
secretiv = CryptoJS.enc.Utf8.parse(secretiv);
|
||||
var result = CryptoJS.DES.encrypt(text, secretkey, {
|
||||
iv: secretiv,
|
||||
mode: CryptoJS.mode.CBC,
|
||||
padding: CryptoJS.pad.Pkcs7
|
||||
});
|
||||
return result.toString();
|
||||
},
|
||||
decrypt: function (text, key, iv) {
|
||||
var secretkey = (CryptoJS.MD5(key).toString()).substr(0, 16);
|
||||
var secretiv = (CryptoJS.MD5(iv).toString()).substr(24, 8);
|
||||
secretkey = CryptoJS.enc.Utf8.parse(secretkey);
|
||||
secretiv = CryptoJS.enc.Utf8.parse(secretiv);
|
||||
var result = CryptoJS.DES.decrypt(text, secretkey, {
|
||||
iv: secretiv,
|
||||
mode: CryptoJS.mode.CBC,
|
||||
padding: CryptoJS.pad.Pkcs7
|
||||
});
|
||||
return result.toString(CryptoJS.enc.Utf8);
|
||||
}
|
||||
};
|
||||
|
||||
var AES = {
|
||||
encrypt: function (text, key, iv) {
|
||||
var secretkey = (CryptoJS.MD5(key).toString()).substr(16, 16);
|
||||
var secretiv = (CryptoJS.MD5(iv).toString()).substr(0, 16);
|
||||
secretkey = CryptoJS.enc.Utf8.parse(secretkey);
|
||||
secretiv = CryptoJS.enc.Utf8.parse(secretiv);
|
||||
var result = CryptoJS.AES.encrypt(text, secretkey, {
|
||||
iv: secretiv,
|
||||
mode: CryptoJS.mode.CBC,
|
||||
padding: CryptoJS.pad.Pkcs7
|
||||
});
|
||||
return result.toString();
|
||||
},
|
||||
decrypt: function (text, key, iv) {
|
||||
var secretkey = (CryptoJS.MD5(key).toString()).substr(16, 16);
|
||||
var secretiv = (CryptoJS.MD5(iv).toString()).substr(0, 16);
|
||||
secretkey = CryptoJS.enc.Utf8.parse(secretkey);
|
||||
secretiv = CryptoJS.enc.Utf8.parse(secretiv);
|
||||
var result = CryptoJS.AES.decrypt(text, secretkey, {
|
||||
iv: secretiv,
|
||||
mode: CryptoJS.mode.CBC,
|
||||
padding: CryptoJS.pad.Pkcs7
|
||||
});
|
||||
return result.toString(CryptoJS.enc.Utf8);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
function osmThj4lKY(obj) {
|
||||
var newObject = {};
|
||||
Object.keys(obj).sort().map(function (key) {
|
||||
newObject[key] = obj[key];
|
||||
});
|
||||
return newObject;
|
||||
}
|
||||
|
||||
function getParams(city, salt, a7, a8) {
|
||||
var _city = {city: city}
|
||||
var mP227jOOD = "GETMONTHDATA"
|
||||
var a6Eh = salt;
|
||||
var cT4un = 'WEB';
|
||||
var tfTWU9k = new Date().getTime();
|
||||
|
||||
var peqbJNB = {
|
||||
appId: a6Eh,
|
||||
method: mP227jOOD,
|
||||
timestamp: tfTWU9k,
|
||||
clienttype: cT4un,
|
||||
object: _city,
|
||||
secret: md5(a6Eh + mP227jOOD + tfTWU9k + cT4un + JSON.stringify(osmThj4lKY(_city)))
|
||||
};
|
||||
peqbJNB = BASE64.encrypt(JSON.stringify(peqbJNB));
|
||||
peqbJNB = DES.encrypt(peqbJNB, a7, a8);
|
||||
return peqbJNB;
|
||||
}
|
||||
|
||||
function type1(city, salt) {
|
||||
var _city = {city: city}
|
||||
var mP227jOOD = "GETMONTHDATA"
|
||||
var a6Eh = salt;
|
||||
var cT4un = 'WEB';
|
||||
var tfTWU9k = new Date().getTime();
|
||||
|
||||
var peqbJNB = {
|
||||
appId: a6Eh,
|
||||
method: mP227jOOD,
|
||||
timestamp: tfTWU9k,
|
||||
clienttype: cT4un,
|
||||
object: _city,
|
||||
secret: md5(a6Eh + mP227jOOD + tfTWU9k + cT4un + JSON.stringify(osmThj4lKY(_city)))
|
||||
};
|
||||
peqbJNB = BASE64.encrypt(JSON.stringify(peqbJNB));
|
||||
return peqbJNB;
|
||||
}
|
||||
|
||||
function type2(city, salt, a7, a8) {
|
||||
var _city = {city: city}
|
||||
var mP227jOOD = "GETMONTHDATA"
|
||||
var a6Eh = salt;
|
||||
var cT4un = 'WEB';
|
||||
var tfTWU9k = new Date().getTime();
|
||||
|
||||
var peqbJNB = {
|
||||
appId: a6Eh,
|
||||
method: mP227jOOD,
|
||||
timestamp: tfTWU9k,
|
||||
clienttype: cT4un,
|
||||
object: _city,
|
||||
secret: md5(a6Eh + mP227jOOD + tfTWU9k + cT4un + JSON.stringify(osmThj4lKY(_city)))
|
||||
};
|
||||
peqbJNB = BASE64.encrypt(JSON.stringify(peqbJNB));
|
||||
peqbJNB = DES.encrypt(peqbJNB, a7, a8);
|
||||
return peqbJNB;
|
||||
}
|
||||
function type3(city, salt, a1, a2) {
|
||||
var _city = {city: city}
|
||||
var mP227jOOD = "GETMONTHDATA"
|
||||
var a6Eh = salt;
|
||||
var cT4un = 'WEB';
|
||||
var tfTWU9k = new Date().getTime();
|
||||
|
||||
var peqbJNB = {
|
||||
appId: a6Eh,
|
||||
method: mP227jOOD,
|
||||
timestamp: tfTWU9k,
|
||||
clienttype: cT4un,
|
||||
object: _city,
|
||||
secret: md5(a6Eh + mP227jOOD + tfTWU9k + cT4un + JSON.stringify(osmThj4lKY(_city)))
|
||||
};
|
||||
peqbJNB = BASE64.encrypt(JSON.stringify(peqbJNB));
|
||||
peqbJNB = AES.encrypt(peqbJNB, a1, a2);
|
||||
return peqbJNB;
|
||||
}
|
||||
|
||||
function decrypt(data, a1, a2, a5, a6) {
|
||||
data = BASE64.decrypt(data);
|
||||
data = DES.decrypt(data, a5, a6);
|
||||
data = AES.decrypt(data, a1, a2);
|
||||
data = BASE64.decrypt(data);
|
||||
return JSON.parse(data)
|
||||
}
|
||||
|
||||
function file(p, a, c, k, e, d) {
|
||||
e = function (c) {
|
||||
return (c < a ? '' : e(parseInt(c / a))) + ((c = c % a) > 35 ? String.fromCharCode(c + 29) : c.toString(36))
|
||||
}
|
||||
;
|
||||
if (!''.replace(/^/, String)) {
|
||||
while (c--) {
|
||||
d[e(c)] = k[c] || e(c)
|
||||
}
|
||||
k = [function (e) {
|
||||
return d[e]
|
||||
}
|
||||
];
|
||||
e = function () {
|
||||
return '\\w+'
|
||||
}
|
||||
;
|
||||
c = 1
|
||||
}
|
||||
;
|
||||
while (c--) {
|
||||
if (k[c]) {
|
||||
p = p.replace(new RegExp('\\b' + e(c) + '\\b', 'g'), k[c])
|
||||
}
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
|
||||
function get_enc(data) {
|
||||
return eval('file(' + data + ')')
|
||||
}
|
||||
|
179
js/请求头请求体加密/weather/weather_class.py
Normal file
179
js/请求头请求体加密/weather/weather_class.py
Normal file
@ -0,0 +1,179 @@
|
||||
import base64
|
||||
import re
|
||||
import time
|
||||
from collections import Counter
|
||||
|
||||
import execjs
|
||||
import requests
|
||||
|
||||
|
||||
def remove_par(pat, string) -> (int, str):
|
||||
"""
|
||||
:param pat: 需要过滤的字符
|
||||
:param string: 需要过滤的字符串
|
||||
:return: 匹配到的字符数以及过滤后的文本
|
||||
"""
|
||||
pat = '{}'.format(pat)
|
||||
count = len(re.findall(pat, string))
|
||||
result = re.sub(pat, '', string)
|
||||
return count, result
|
||||
|
||||
|
||||
def get_re_all(pat, string) -> (list, list):
|
||||
"""
|
||||
:param pat: 正则表达式
|
||||
:param string: 匹配的字符串
|
||||
:return: 匹配的名字以及匹配的值
|
||||
"""
|
||||
matches = re.findall(pat, string)
|
||||
variables = [match[1] for match in matches]
|
||||
variable_names = [match[0] for match in matches]
|
||||
return variables, variable_names
|
||||
|
||||
|
||||
def get_re_search(pat, string) -> str:
|
||||
"""
|
||||
:param pat: 正则表达式
|
||||
:param string: 匹配的字符串
|
||||
:return: 匹配到的结果
|
||||
"""
|
||||
match = re.search(pat, string)
|
||||
if match:
|
||||
key = match.group(1)
|
||||
return key
|
||||
else:
|
||||
return ''
|
||||
|
||||
|
||||
class weatherCrawler:
|
||||
def __init__(self):
|
||||
self.headers = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'Referer': 'https://www.aqistudy.cn/historydata/',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
}
|
||||
self.cookies = {
|
||||
'Hm_lvt_6088e7f72f5a363447d4bafe03026db8': '1689668701',
|
||||
'Hm_lpvt_6088e7f72f5a363447d4bafe03026db8': str(int(time.time())),
|
||||
}
|
||||
self.city: str = '' # 获取的城市名称
|
||||
self.file: str = '' # 动态js
|
||||
self.salt = None # 获取md加密的盐
|
||||
self.par = None # 获取发起post请求的data的key
|
||||
self.key_iv: list = [] # 获取动态加密用的iv
|
||||
self.key_name: list = [] # 获取动态的的密钥iv名,用于统计类型
|
||||
self.param: str = '' # 用于发送请求的参数
|
||||
|
||||
def __get_file(self):
|
||||
"""
|
||||
这个函数就是获取到最终的动态js
|
||||
"""
|
||||
params = {
|
||||
'city': self.city,
|
||||
}
|
||||
response = requests.get('https://www.aqistudy.cn/historydata/monthdata.php', params=params,
|
||||
cookies=self.cookies, headers=self.headers)
|
||||
# 第一次寻找出动态js的动态链接
|
||||
match = get_re_search(r'<script[^>]*src="[^"]*\/([^\/?]+)\?t=[^"]+"', response.text)
|
||||
# 如果找到了
|
||||
if match:
|
||||
filename = 'https://www.aqistudy.cn/historydata/resource/js/' + match
|
||||
html = requests.get(filename, headers=self.headers).text
|
||||
# 执行第一次的动态js获取动态加载的参数
|
||||
pattern = r'eval\(function\(p,a,c,k,e,d\){.*?}return p}'
|
||||
_, filtered_html = remove_par(pattern, html)
|
||||
a = execjs.compile(open('getParams.js', 'r', encoding='utf-8').read()) \
|
||||
.call('get_enc', filtered_html[1:-3])
|
||||
# 计算其中是否有eval函数,如果有则要重新运行
|
||||
count, a = remove_par('eval', a)
|
||||
if count > 0:
|
||||
# 获取执行解密base64的次数,目前观察1~2次不等
|
||||
count, a = remove_par('dweklxde', a)
|
||||
# 去除括号
|
||||
result = a.replace("(", "").replace(")", "").replace("'", '')
|
||||
# 得到完整的动态js
|
||||
self.file = self.multiple_base64_decode(result, count)
|
||||
else:
|
||||
# 没有eval说明就是完整的js
|
||||
self.file = a
|
||||
# 否则抛出错误
|
||||
else:
|
||||
raise '没有找到动态js文件'
|
||||
|
||||
def __get_params(self):
|
||||
"""
|
||||
这个函数是获取变化的参数,依次是变化的key和iv以及他们的名字,变化的盐,变化的请求体的键
|
||||
"""
|
||||
key_iv, key_name = get_re_all(r'const\s*(\w+)\s*=\s*"([^"]+)"', self.file)
|
||||
par = get_re_search(r'data:\s*{\s*(\w+)\s*:\s*\w+\s*}', self.file)
|
||||
salt = get_re_search(r'var\s*\w+\s*=\s*\'(.*?)\'', self.file)
|
||||
self.key_name = key_name
|
||||
self.key_iv = key_iv
|
||||
self.par = par
|
||||
self.salt = salt
|
||||
|
||||
def __calculate_type(self):
|
||||
"""
|
||||
统计出需要加密的类型,然后得到请求体的加密参数
|
||||
"""
|
||||
punctuations = '''!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~'''
|
||||
# 解构密钥向量的名字,用于统计
|
||||
a1_name, a2_name, a3_name, a4_name, a5_name, a6_name, a7_name, a8_name = self.key_name
|
||||
count_keys = [a3_name, a4_name, a7_name, a8_name]
|
||||
translator = str.maketrans(punctuations, ' ' * len(punctuations))
|
||||
# 统计js中密钥出现的次数来决定加密的类型
|
||||
counters = Counter(self.file.translate(translator).split())
|
||||
counts = [counters[key] for key in count_keys]
|
||||
# 下面是三种不同的类型
|
||||
if counts == [1, 1, 1, 1]:
|
||||
self.param = execjs.compile(open('getParams.js', 'r', encoding='utf-8').read()) \
|
||||
.call('type1', self.city, self.salt)
|
||||
elif counts == [1, 1, 2, 2]:
|
||||
self.param = execjs.compile(open('getParams.js', 'r', encoding='utf-8').read()) \
|
||||
.call('type2', self.city, self.salt, self.key_iv[6], self.key_iv[7])
|
||||
elif counts == [2, 2, 1, 1]:
|
||||
self.param = execjs.compile(open('getParams.js', 'r', encoding='utf-8').read()) \
|
||||
.call('type3', self.city, self.salt, self.key_iv[2], self.key_iv[3])
|
||||
else:
|
||||
# 出现新的类型,查看一下,然后修改前的if条件
|
||||
print(counts)
|
||||
raise self.file
|
||||
|
||||
def __do_post(self):
|
||||
"""
|
||||
进行发送请求然后解密请求数据,获取到我们需要的
|
||||
"""
|
||||
# 结构密钥和IV
|
||||
a1, a2, a3, a4, a5, a6, a7, a8 = self.key_iv
|
||||
data = {k: v for k, v in zip([self.par], [self.param])}
|
||||
response = requests.post('https://www.aqistudy.cn/historydata/api/historyapi.php', cookies=self.cookies,
|
||||
headers=self.headers,
|
||||
data=data)
|
||||
weather_data = execjs.compile(open('getParams.js', 'r', encoding='utf-8').read())\
|
||||
.call('decrypt', response.text, a1, a2, a5, a6)
|
||||
return weather_data['result']['data']
|
||||
|
||||
def get_weather_data(self):
|
||||
"""
|
||||
获取数据接口
|
||||
:return: type: dict, 天气数据
|
||||
"""
|
||||
self.city = input('请输入城市名称: ')
|
||||
self.__get_file()
|
||||
self.__get_params()
|
||||
self.__calculate_type()
|
||||
return self.__do_post()
|
||||
|
||||
@staticmethod
|
||||
def multiple_base64_decode(string, count) -> str:
|
||||
# 解密base64
|
||||
decoded_string = string
|
||||
for _ in range(count):
|
||||
decoded_string = base64.b64decode(decoded_string).decode("utf-8")
|
||||
return decoded_string
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
obj = weatherCrawler()
|
||||
while True:
|
||||
print(obj.get_weather_data())
|
149
js/请求头请求体加密/weather/请求.py
Normal file
149
js/请求头请求体加密/weather/请求.py
Normal file
@ -0,0 +1,149 @@
|
||||
import base64
|
||||
import re
|
||||
import time
|
||||
from collections import Counter
|
||||
|
||||
import execjs
|
||||
import requests
|
||||
|
||||
|
||||
def remove_par(pat, string) -> (int, str):
|
||||
pat = '{}'.format(pat)
|
||||
count = len(re.findall(pat, string))
|
||||
result = re.sub(pat, '', string)
|
||||
return count, result
|
||||
|
||||
|
||||
def multiple_base64_decode(string, count) -> str:
|
||||
decoded_string = string
|
||||
for _ in range(count):
|
||||
decoded_string = base64.b64decode(decoded_string).decode("utf-8")
|
||||
return decoded_string
|
||||
|
||||
|
||||
def get_re_all(pat, string) -> (list, list):
|
||||
matches = re.findall(pat, string)
|
||||
variables = [match[1] for match in matches]
|
||||
variable_names = [match[0] for match in matches]
|
||||
return variables, variable_names
|
||||
|
||||
|
||||
def get_re_search(pat, string) -> str:
|
||||
match = re.search(pat, string)
|
||||
if match:
|
||||
key = match.group(1)
|
||||
return key
|
||||
else:
|
||||
return ''
|
||||
|
||||
|
||||
def get_file(city):
|
||||
headers = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'Referer': 'https://www.aqistudy.cn/historydata/',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
}
|
||||
cookies = {
|
||||
'Hm_lvt_6088e7f72f5a363447d4bafe03026db8': '1689668701',
|
||||
'Hm_lpvt_6088e7f72f5a363447d4bafe03026db8': str(int(time.time())),
|
||||
}
|
||||
params = {
|
||||
'city': city,
|
||||
}
|
||||
response = requests.get('https://www.aqistudy.cn/historydata/monthdata.php', params=params, cookies=cookies,
|
||||
headers=headers)
|
||||
match = get_re_search(r'<script[^>]*src="[^"]*\/([^\/?]+)\?t=[^"]+"', response.text)
|
||||
if match:
|
||||
filename = 'https://www.aqistudy.cn/historydata/resource/js/' + match
|
||||
html = requests.get(filename, headers=headers).text
|
||||
pattern = r'eval\(function\(p,a,c,k,e,d\){.*?}return p}'
|
||||
_, filtered_html = remove_par(pattern, html)
|
||||
a = execjs.compile(open('getParams.js', 'r', encoding='utf-8').read()).call('get_enc', filtered_html[1:-3])
|
||||
count, a = remove_par('eval', a)
|
||||
if count > 0:
|
||||
count, a = remove_par('dweklxde', a)
|
||||
result = a.replace("(", "").replace(")", "").replace("'", '')
|
||||
data = multiple_base64_decode(result, count)
|
||||
return data
|
||||
else:
|
||||
return a
|
||||
return None
|
||||
|
||||
|
||||
def get_params(data):
|
||||
key_iv, key_name = get_re_all(r'const\s*(\w+)\s*=\s*"([^"]+)"', data)
|
||||
par = get_re_search(r'data:\s*{\s*(\w+)\s*:\s*\w+\s*}', data)
|
||||
salt = get_re_search(r'var\s*\w+\s*=\s*\'(.*?)\'', data)
|
||||
return (key_name, key_iv), par, salt
|
||||
|
||||
|
||||
def calculate_type(string, keys: list):
|
||||
'''
|
||||
:param string: 得到的动态js
|
||||
:param keys: 需要统计的密钥次数
|
||||
:return: 返回加密参数的类型
|
||||
'''
|
||||
punctuations = '''!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~'''
|
||||
translator = str.maketrans(punctuations, ' ' * len(punctuations))
|
||||
counters = Counter(string.translate(translator).split())
|
||||
counts = [counters[key] for key in keys]
|
||||
print(counts)
|
||||
# 全为1说明参数加密只有base
|
||||
if counts == [1, 1, 1, 1]:
|
||||
return 1
|
||||
elif counts == [1, 1, 2, 2]:
|
||||
return 2
|
||||
elif counts == [2, 2, 1, 1]:
|
||||
return 3
|
||||
print(string)
|
||||
|
||||
|
||||
def do_post(par_key, value, decrypto_dict):
|
||||
headers = {
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'Referer': 'https://www.aqistudy.cn/historydata/',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
|
||||
}
|
||||
cookies = {
|
||||
'Hm_lvt_6088e7f72f5a363447d4bafe03026db8': '1689668701',
|
||||
'Hm_lpvt_6088e7f72f5a363447d4bafe03026db8': str(int(time.time())),
|
||||
}
|
||||
data = {k: v for k, v in zip([par_key], [value])}
|
||||
response = requests.post('https://www.aqistudy.cn/historydata/api/historyapi.php', cookies=cookies, headers=headers,
|
||||
data=data)
|
||||
decrypto_dict['data'] = response.text
|
||||
weather_data = execjs.compile(
|
||||
open('getParams.js', 'r', encoding='utf-8').read()).call(
|
||||
'decrypt', decrypto_dict['data'], decrypto_dict['a1'], decrypto_dict['a2'],
|
||||
decrypto_dict['a5'], decrypto_dict['a6']
|
||||
)
|
||||
print(weather_data['result']['data'])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
citys = ['泸州', '上海', '北京', '杭州', '重庆']
|
||||
for city in citys:
|
||||
file = get_file(city)
|
||||
datas = get_params(file)
|
||||
a1_name, a2_name, a3_name, a4_name, a5_name, a6_name, a7_name, a8_name = datas[0][0]
|
||||
a1, a2, a3, a4, a5, a6, a7, a8 = datas[0][1]
|
||||
par_key = datas[1]
|
||||
salt = datas[2]
|
||||
calculate_key = [a3_name, a4_name, a7_name, a8_name]
|
||||
types = calculate_type(file, calculate_key)
|
||||
decrypto_dict = {
|
||||
'a1': a1,
|
||||
'a2': a2,
|
||||
'a5': a5,
|
||||
'a6': a6
|
||||
}
|
||||
print(salt)
|
||||
if types == 1:
|
||||
value = execjs.compile(open('getParams.js', 'r', encoding='utf-8').read()).call('type1', city, salt)
|
||||
do_post(par_key, value, decrypto_dict)
|
||||
elif types == 2:
|
||||
value = execjs.compile(open('getParams.js', 'r', encoding='utf-8').read()).call('type2', city, salt, a7, a8)
|
||||
do_post(par_key, value, decrypto_dict)
|
||||
elif types == 3:
|
||||
value = execjs.compile(open('getParams.js', 'r', encoding='utf-8').read()).call('type3', city, salt, a3, a4)
|
||||
do_post(par_key, value, decrypto_dict)
|
18
js/请求头请求体加密/娱乐指数/demo.js
Normal file
18
js/请求头请求体加密/娱乐指数/demo.js
Normal file
@ -0,0 +1,18 @@
|
||||
const Crypto = require('crypto-js')
|
||||
var lastFetchTime = 1674106181275
|
||||
|
||||
var r = "BTk5eL+/n9qX/mzrBE6Zs2F0oLJbn4B25d27rF3+D68svjMzlhXYcPegq/9L2aOEB36xL9TPRrAspOkjYkjQ1ghYPVfy7LQ1OXunJc++mfJQzLR7vIBTKJNsiGNDW8o5HSQpJPI3EcFf78dIqfGrl7J07BAmQ5R2RWycjhMlp10ciYuz9cdWRjdmBkQdCef1C/Czy/du+msFps/4/IgqcRLkTuWfuSFQdGaif5TFL3qS7qGBjJU15T04L6oxwbDHcCvI5kI9xv7bdRh8RMwCmV3ZqVY/3Gly55XkIdwHFb5YDUbgpS3NspzX/7fj/k+VC7dAscaqXfZHZSiXhXHWTgirfoUR4RWTSMNu1XbOwERbpfkF5lNQ3YFumIDbDa6JnW4t12aBTiclj9N3+eIpntV8/f+MRo5kwWeV2FqyP6vu8mjVD++Sywo0owLmNoNdNjsE8jInX7FF4z/RLdMzLOZh/2TwJZXiw5c2EBMCM4MKDg+s0yo/RZ9tHX7A1Kq2I5ExByvAFkf64w8p+j5Vz10vRB+3g2bqARNnSwZc2Jtr65IexcFQX2w9/4n/3gzDqZcjpfXBrJ4ZemEwrXJN05DmnG29hVXjPDYk8WvmHOOKTM+V7MuN8MfnOkC9kGYXvYI/gU7jwY+asGwu9sHgQagpz4Z5vy1M5VhaGq71ob4HStLDrlqG521aoT9eBmRN8BcRG0GMpaC1gJtD8wtIM2u7EQi0XPMMAW+WVQIhJSOpLTOfxYttFJUMG9ZyBwU8Lrp+szVXiFvRA3Qt1d2dmZgdXspcbly8PH7pvRZYT5HN1cJSgpHnh+p3qA+Nw3UzMr9zJo0jntr42bLg3ifNka38gjSwybnEpk3/9NNkACNnibWjj91VtaV0ht9fAggDTYAw3oIk8Jsb2EzewPdk5QyeJIo8EWElKK11Y5BCgMYoWFtFzVjpkZdJBzxtFqrXiHi8twUqvkWAF0tr391/Z7zpzBrG1rwaDtXtRHRZksL1itae9ktvs6l260ZgY72MsmzkeCWwVHPQUD8uu1O0T7ij8JdIHxRlfVSWFMiaKtl7ZxsutN3uTWHt9NWl2aZfYDR+Sx05rzr2rIOjSDoInyh9oXZVc7SZLP72ESUIWn00KXCWJ0wyQGXXYtcR1J0joZ9V0NxdvoY1iwgzbDbRPzOi4i7dA/L/UBeC+Hjpo13TjsmMEyr/eaEvWVQCstY471o8+mpv0gnJ8HdRnb6Blfe+yQ+EINlDUMgFh/MkUSkqAFRv4UzwOeiA9X3L9gqjBsGc9y4RX1l8zQklC46qcjz4cyuoTbBsppyMPs0CureZ3EkI2FZdslvj9CJKOIzG6CwrxtfeNdoks9K2cl2d1S/rn2GNvLqsnZSav1V5dONsC0rlYMaT2H/SEF3Yr5RvhCUroym1F0U/E2ZJ7aN1M/HmfdrTFok1ac5TEW3NPqkAYMcZeZ6UKpsDDZrBdGA06QCLlNReJ7MwLuCalKBf0b/JRH1VTWwmnLccfiWhhCP37Go5hZQfxZO3aJbYGuSE/i5sjuiw04xT+YW6hxio5/1PAR9ishJXVktsDITju3BDIuuejAZjkw9N6s+YMr6QUDWOwwPnxD2VNy1JB2B8RLF9ri53u7QLuT0qqYaRTTvcEehpk/WMPQ9FKZztaqOfeP7XGOXQI80977WGzEU4m54oMuMZjT3AkYjoecS4aTNBwTDissuYAaockFhVDun+z8lyM83mIxO0UWHW2AmDPuLkEzHEElsvkYkZxjyuv6jIc7ZTl4gacSbe+FNuSgm3cxXR8S01/kurythhZGsjNSE0EwLcHr32kPKkBc8q+R7Ix7yfSkFT1WuF1FRHvKad3xyLfEyfG507xZEqFfWgHlv8j/Z5PBqK2aPBFlkd+HA3Zk59w7ygGpKzBcYbWdAf1ygeuFJpjW0juR7aXahV8+7JzXyVvFa50jiVygUQe1njjG1UDeWpjYOwWoFxc1oUFKN4oZZgd+skzgUVXY9E6tkZlsG37MTf0rSvsHZV+548vj1jiC1BIQ5t7rDhuRUsmlw5gw11LstnpdfcTUNE2Vz5YltTmVDok5Jk1Uf3jbqn3VhFU2vBgceiF8Ulu2cwZ3e8IQnIkcEEam25oJxckon1UcMtnkTPBZHIXEavWDRZ4pxNcjm+17TTMgjrlEvQdAX7gffAqZB5Ai1t+iBeE5Mm5ARSvBT74GaUrZ1Rvs+fPe0PA63cj1IvRDI5afqBFSIcEAUajn4e7dnRO98nItjhp9gN5hD8DCCN/J28VNoKrVcJ5tasE6BaBv2isv1FKn3AhAaMtM2zmWbzYWvo7/MUk4wV2hriVKHsu9yUceBv+sV2S/63B0GyJ6Fe5vtwNHIXkXS1oTiUNdpvkri3hW7SR2Ai8Yk+bEBQCBAWYN08xRnF9eXJym96fhEUGz0X61Pe3Uvx/m6Ix9WcfEPwQP7UVLzcKoWSf1BbEaW3HQLJJgIpN0SgVtsxsjGbIJ0Sk2Ciuuk8JkBKPCaqgE6E/LEej7sToPzpzFXwBdnWW2H/6L5Ast/AFindK05xwp5n0Q+hZH4KOmnMP/41RmAiHi0IFmPh8lu/7ZRjFvIKoPje+f8DOZatxRYqP7gZH5Y11K/zCb6RXzrZi3j1CuybaBRSZqkwLuwv2CozQAU9ABYneLNCCmwVLi2C7cuHKxdc/OjUJl9VhG7xeik1fRFp70pdAALfjgBdh8y9e4QAjxp1XoKiLomjwewrCHXnYfsVfDYQQhyAmYDCMXVY5k3oHPGViJQfBGMp/baAyxgYDE7Kp3nuEnG04Z+PuG+XiICmadq1vBAaG//2HT5TNE9BrODsKV8EYHbrFXv8gO/iqy7MnnDVYkxOvZeUnyqrE+zyGD3gqerYu9VHieQS2ya2exDHeax45631ihXiGQSvoMRGzdgiE2a5iAF5muggkLrbJ2EGgje4fwqSeo+3cDCvqRQgLe7Nfr5gcMj9KqBsW4ClvpC3/1OvWLIkT1NPKKUoT4QZuv4MjtgHHkgCpgybA1zUv0TOQZqdsJ+Dnd083ZE9DgfDH+D4n+Fcs6Lg92CpnAN7zM79vaR8/gLy0s1S53udpDe/SAkc8up/r/3M3cukmbRyFACxCsWeBXrKDlWAgH0i0ktesYtJoIWJJuQTO0GqJw0E6QgaVK9cCpmNyIVvUih2Imtv+CJv2dkxaoINc8294PnVpXt80UkEWUen2Pvzaefw7UGYOkt80NHX7YiazV89Y6c3PO2/fUwhEy/zyxj04E44YJD7jvVTcnNAnAsMMa+XRtM48ootzKu9utJKlmW6AOr/iewyZCyA0Hf98zi1NhSS3Mfvvao54HBaO0oigG2l4mUCxZwv425eHD0VWafei30AG6t3jTM0rn5YVmwZaLkGeKxO+9QfhPdSS4yfemVVi4nrX+D/VWvDCd+Vkp20wFc2GoEMpRyQg9/TTBHH0hVXmrfhu34nE/0pn2icjk/uRxE1UsmJrs4It0EcURtyZ5ypWYokYgVL5wypV0MxIs84DP9RqZ+KB3Gle+yTdlM87dJvKQZZwXMRfG815JcXJxwwiv1ifGqTtEZfN9rh3dZ7wS/EqXvw7FBf9B6no/4YTw4zBGJWxcpEEQto5W/I4x+xCoschZEvu69Lh1sEtug1Rq3zlIsDA+fXi1N1eR57OD7xcDVOqRclfn/LaRYPLX3Wht17LlifATgnSHWbjU1uUyv2/ksLl4Urthhs5c3J38XfWpm7JIKh910="
|
||||
|
||||
|
||||
function decrypt(r, lastFetchTime){
|
||||
var i = Crypto.enc.Utf8.parse(lastFetchTime + "000")
|
||||
, a = Crypto.enc.Utf8.parse(lastFetchTime + "000")
|
||||
, s = Crypto.AES.decrypt(r.toString(), i, {
|
||||
iv: a
|
||||
})
|
||||
, c = s.toString(Crypto.enc.Utf8)
|
||||
return c
|
||||
}
|
||||
|
||||
console.log(decrypt(r, lastFetchTime))
|
||||
|
45
js/请求头请求体加密/娱乐指数/ylzs.py
Normal file
45
js/请求头请求体加密/娱乐指数/ylzs.py
Normal file
@ -0,0 +1,45 @@
|
||||
import requests
|
||||
import execjs
|
||||
|
||||
cookies = {
|
||||
'mobile_iindex_uuid': '9f0ae384-2821-5797-8a76-87bb1cef4a1f',
|
||||
'Hm_lvt_2873e2b0bdd5404c734992cd3ae7253f': '1674101222,1674103567',
|
||||
'Hm_lpvt_2873e2b0bdd5404c734992cd3ae7253f': '1674103567',
|
||||
}
|
||||
|
||||
headers = {
|
||||
'authority': 'www.chinaindex.net',
|
||||
'accept': 'application/json, text/plain, */*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7',
|
||||
# 'cookie': 'mobile_iindex_uuid=9f0ae384-2821-5797-8a76-87bb1cef4a1f; Hm_lvt_2873e2b0bdd5404c734992cd3ae7253f=1674101222,1674103567; Hm_lpvt_2873e2b0bdd5404c734992cd3ae7253f=1674103567',
|
||||
'funcid': 'undefined',
|
||||
'incognitomode': '0',
|
||||
'referer': 'https://www.chinaindex.net/ranklist/5/0',
|
||||
'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
|
||||
'uuid': '9f0ae384-2821-5797-8a76-87bb1cef4a1f',
|
||||
}
|
||||
|
||||
params = {
|
||||
'keyWord': '李知恩',
|
||||
'sign': 'b3776cdf6331ee0f6653d1de544291c3'
|
||||
}
|
||||
|
||||
response = requests.get(
|
||||
'https://www.chinaindex.net/iIndexMobileServer/mobile/comm/getSearchResult',
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
)
|
||||
|
||||
r = response.json()['data']
|
||||
lastFetchTime = response.json()['lastFetchTime']
|
||||
|
||||
ctx = execjs.compile(open('./demo.js', 'r', encoding='utf-8').read()).call('decrypt', r, lastFetchTime)
|
||||
|
||||
print(ctx)
|
17
js/请求头请求体加密/有道翻译/demo.js
Normal file
17
js/请求头请求体加密/有道翻译/demo.js
Normal file
@ -0,0 +1,17 @@
|
||||
// const Crypto = require('crypto-js')
|
||||
//
|
||||
//
|
||||
// var data = 'Z21kD9ZK1ke6ugku2ccWuz4Ip5f4PLCoxWstZf_6UUyBoy8dpWc3NOXFRrnPMya7chcEL7e2Yz1xjFqcfdncOW4vOoJ66RTmRa8-dGZla_ExpWOUP0G1QJFtJ6Gj0ngir07R0ETWttaGO185v5rccLlZKqOCmJuChZSA-Dw9U6B2AOK4-RqYjAQEQ5vF7ph71eC5ZEvV6dm_xv0ywEOKi58R9xWx7fiJytxxlsz-oprAHdRXnI6kWszLLJJpr45DMBjoeArZfVssgWXzX_IlNUvTtj_1o95BpERVvV1FxGEeN-_TLgLaK9j7rjT4O-yPHpbuCk9q1BpLVSh3B4CPWCZPMIHwJiFtfQAC8_t-HWs45DWbW54DEny_doBItZ6v'
|
||||
// var key = 'ydsecret://query/key/B*RGygVywfNBwpmBaZg*WT7SIOUP2T0C9WHMZN39j^DAdaZhAnxvGcCY6VYFwnHl'
|
||||
// var iv = 'ydsecret://query/iv/C@lZe2YzHtZ2CYgaXKSVfsb7Y4QWHjITPPZ0nQp87fBeJ!Iv6v^6fvi2WN@bYpJ4'
|
||||
//
|
||||
// var ax = [8, 20, 157, 167, 60, 89, 206, 98, 85, 91, 1, 233, 47, 52, 232, 56]
|
||||
// var b = [210, 187, 27, 253, 232, 59, 56, 195, 68, 54, 99, 87, 183, 156, 174, 28]
|
||||
|
||||
let data01 = '08149da73c59ce62555b01e92f34e838'//十六进制
|
||||
|
||||
let newdata = Buffer.from(data01,'hex');//先把数据存在buf里面
|
||||
|
||||
console.log("newdata ",newdata);
|
||||
|
||||
console.log(newdata.toString("utf-8"));//使用toString函数就能转换成字符串
|
75
js/请求头请求体加密/有道翻译/youdao.py
Normal file
75
js/请求头请求体加密/有道翻译/youdao.py
Normal file
@ -0,0 +1,75 @@
|
||||
import json
|
||||
from Crypto.Cipher import AES
|
||||
import base64
|
||||
import time
|
||||
from hashlib import md5
|
||||
import requests
|
||||
|
||||
|
||||
def sign():
|
||||
t = int(time.time() * 1000)
|
||||
n = f'client=fanyideskweb&mysticTime={t}&product=webfanyi&key=fsdsogkndfokasodnaso'
|
||||
obj = md5()
|
||||
obj.update(n.encode('utf-8'))
|
||||
sign = obj.hexdigest()
|
||||
return sign
|
||||
|
||||
|
||||
def decrypto(data):
|
||||
key = b'\x08\x14\x9d\xa7\x3c\x59\xce\x62\x55\x5b\x01\xe9\x2f\x34\xe8\x38'
|
||||
iv = b'\xd2\xbb\x1b\xfd\xe8\x3b\x38\xc3\x44\x36\x63\x57\xb7\x9c\xae\x1c'
|
||||
aes = AES.new(key, AES.MODE_CBC, iv)
|
||||
den_text = aes.decrypt(base64.urlsafe_b64decode(data))
|
||||
return str(den_text, 'utf-8').strip()
|
||||
|
||||
|
||||
def post(w, f, t):
|
||||
cookies = {
|
||||
'OUTFOX_SEARCH_USER_ID': '123456789@192.168.60.5',
|
||||
}
|
||||
headers = {
|
||||
'Accept': 'application/json, text/plain, */*',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7',
|
||||
'Connection': 'keep-alive',
|
||||
# 'Cookie': 'OUTFOX_SEARCH_USER_ID_NCOO=340028215.7799288; OUTFOX_SEARCH_USER_ID=-1551186736@49.52.96.107; P_INFO=18608219667|1670406132|1|youdaonote|00&99|null&null&null#shh&null#10#0|&0||18608219667',
|
||||
'Origin': 'https://fanyi.youdao.com',
|
||||
'Referer': 'https://fanyi.youdao.com/',
|
||||
'Sec-Fetch-Dest': 'empty',
|
||||
'Sec-Fetch-Mode': 'cors',
|
||||
'Sec-Fetch-Site': 'same-site',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
}
|
||||
data = {
|
||||
'i': w,
|
||||
'from': f,
|
||||
'to': t,
|
||||
'dictResult': 'true',
|
||||
'keyid': 'webfanyi',
|
||||
'sign': sign(),
|
||||
'client': 'fanyideskweb',
|
||||
'product': 'webfanyi',
|
||||
'appVersion': '1.0.0',
|
||||
'vendor': 'web',
|
||||
'pointParam': 'client,mysticTime,product',
|
||||
'mysticTime': str(int(time.time() * 1000)),
|
||||
'keyfrom': 'fanyi.web',
|
||||
}
|
||||
response = requests.post('https://dict.youdao.com/webtranslate', headers=headers, data=data, cookies=cookies)
|
||||
return response.text
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
while True:
|
||||
try:
|
||||
From = input('请输入开始语言(自动auto, 中文zh-CHS, 韩文ko, 英文en)\n')
|
||||
To = input('请输入翻译的语言(默认, 中文zh-CHS, 韩文ko, 英文en)\n')
|
||||
word = input('请输入单词:')
|
||||
enc = post(word, From, To)
|
||||
ctx = decrypto(enc)
|
||||
print(ctx)
|
||||
except:
|
||||
print('出现异常,请重新输入!')
|
||||
continue
|
1018
js/请求头请求体加密/网易云音乐/comment/comment_of_1297486027.csv
Normal file
1018
js/请求头请求体加密/网易云音乐/comment/comment_of_1297486027.csv
Normal file
File diff suppressed because it is too large
Load Diff
1118
js/请求头请求体加密/网易云音乐/comment/comment_of_488249475.csv
Normal file
1118
js/请求头请求体加密/网易云音乐/comment/comment_of_488249475.csv
Normal file
File diff suppressed because it is too large
Load Diff
50
js/请求头请求体加密/网易云音乐/decrpyo.py
Normal file
50
js/请求头请求体加密/网易云音乐/decrpyo.py
Normal file
@ -0,0 +1,50 @@
|
||||
import random
|
||||
from binascii import hexlify
|
||||
import base64
|
||||
from Crypto.Cipher import AES
|
||||
|
||||
e = "010001"
|
||||
g = "0CoJUm6Qyw8W8jud"
|
||||
f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
|
||||
i3x = '{"csrf_token":"","cursor":"1672939386847","offset":"0","orderType":"1","pageNo":"3","pageSize":"20","rid":"R_SO_4_1835283134","threadId":"R_SO_4_1835283134"}'
|
||||
|
||||
|
||||
# 生成随机的16位字符传
|
||||
def RandomString(a):
|
||||
string = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
|
||||
randomStr = random.sample(string, a)
|
||||
return ''.join(randomStr)
|
||||
|
||||
|
||||
# AES加密算法
|
||||
def AESEncrypto(text, key):
|
||||
BS = 16
|
||||
pad = lambda s: s + (BS - len(s) % BS) * bytes([BS - len(s) % BS])
|
||||
c = key.encode("utf-8")
|
||||
d = "0102030405060708".encode("utf-8")
|
||||
e = text.encode("utf-8")
|
||||
aes = AES.new(c, AES.MODE_CBC, d)
|
||||
enc = base64.b64encode(aes.encrypt(pad(e))).decode("utf-8")
|
||||
return enc
|
||||
|
||||
|
||||
# RSA加密
|
||||
def RSAEncrypto(text):
|
||||
text = text[::-1] # 表示文本倒序
|
||||
result = pow(int(hexlify(text.encode('utf-8')), 16), int(e, 16), int(f, 16))
|
||||
return format(result, 'x').zfill(131)
|
||||
|
||||
|
||||
def d(text):
|
||||
i = RandomString(16)
|
||||
encText = AESEncrypto(text, g)
|
||||
encText = AESEncrypto(encText, i)
|
||||
encSecKey = RSAEncrypto(i)
|
||||
h = {
|
||||
"encText": encText,
|
||||
"encSecKey": encSecKey
|
||||
}
|
||||
return h
|
||||
|
||||
|
||||
|
392
js/请求头请求体加密/网易云音乐/demo.js
Normal file
392
js/请求头请求体加密/网易云音乐/demo.js
Normal file
@ -0,0 +1,392 @@
|
||||
const CryptoJS = require('crypto-js')
|
||||
const jsdom = require('jsdom') // npm install jsdom
|
||||
const { JSDOM } = jsdom
|
||||
const dom = new JSDOM('<!DOCTYPE html><p>Hello World<\p>')
|
||||
|
||||
window = dom.window
|
||||
document = window.document
|
||||
|
||||
var maxDigits, ZERO_ARRAY, bigZero, bigOne, dpl10, lr10, hexatrigesimalToChar, hexToChar, highBitMasks, lowBitMasks, biRadixBase = 2, biRadixBits = 16, bitsPerDigit = biRadixBits, biRadix = 65536, biHalfRadix = biRadix >>> 1, biRadixSquared = biRadix * biRadix, maxDigitVal = biRadix - 1, maxInteger = 9999999999999998;
|
||||
setMaxDigits(20),
|
||||
dpl10 = 15,
|
||||
lr10 = biFromNumber(1e15),
|
||||
hexatrigesimalToChar = ["0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"],
|
||||
hexToChar = ["0","1","2","3","4","5","6","7","8","9","a","b","c","d","e","f"],
|
||||
highBitMasks = [0,32768,49152,57344,61440,63488,64512,65024,65280,65408,65472,65504,65520,65528,65532,65534,65535],
|
||||
lowBitMasks = [0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535];
|
||||
|
||||
var xx = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
|
||||
i3x = {
|
||||
csrf_token: "",
|
||||
cursor: "1672939386847",
|
||||
offset: "0",
|
||||
orderType: "1",
|
||||
pageNo: "3",
|
||||
pageSize: "20",
|
||||
rid: "R_SO_4_1835283134",
|
||||
threadId:"R_SO_4_1835283134",
|
||||
}
|
||||
var bMr1x = d(JSON.stringify(i3x), '010001', xx, '0CoJUm6Qyw8W8jud');
|
||||
function a(a) {
|
||||
var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
|
||||
for (d = 0; a > d; d += 1)
|
||||
e = Math.random() * b.length,
|
||||
e = Math.floor(e),
|
||||
c += b.charAt(e);
|
||||
return c
|
||||
}
|
||||
function b(a, b) {
|
||||
var c = CryptoJS.enc.Utf8.parse(b)
|
||||
, d = CryptoJS.enc.Utf8.parse("0102030405060708")
|
||||
, e = CryptoJS.enc.Utf8.parse(a)
|
||||
, f = CryptoJS.AES.encrypt(e, c, {
|
||||
iv: d,
|
||||
mode: CryptoJS.mode.CBC
|
||||
});
|
||||
return f.toString()
|
||||
}
|
||||
function setMaxDigits(a) {
|
||||
maxDigits = a,
|
||||
ZERO_ARRAY = new Array(maxDigits);
|
||||
for (var b = 0; b < ZERO_ARRAY.length; b++)
|
||||
ZERO_ARRAY[b] = 0;
|
||||
bigZero = new BigInt,
|
||||
bigOne = new BigInt,
|
||||
bigOne.digits[0] = 1
|
||||
}
|
||||
function BigInt(a) {
|
||||
this.digits = "boolean" == typeof a && 1 == a ? null : ZERO_ARRAY.slice(0),
|
||||
this.isNeg = !1
|
||||
}
|
||||
function c(a, b, c) {
|
||||
var d, e;
|
||||
return setMaxDigits(131),
|
||||
d = new RSAKeyPair(b,"",c),
|
||||
e = encryptedString(d, a)
|
||||
}
|
||||
function reverseStr(a) {
|
||||
var c, b = "";
|
||||
for (c = a.length - 1; c > -1; --c)
|
||||
b += a.charAt(c);
|
||||
return b
|
||||
}
|
||||
function digitToHex(a) {
|
||||
var b = 15
|
||||
, c = "";
|
||||
for (i = 0; 4 > i; ++i)
|
||||
c += hexToChar[a & b],
|
||||
a >>>= 4;
|
||||
return reverseStr(c)
|
||||
}
|
||||
function biToHex(a) {
|
||||
var d, b = "";
|
||||
for (biHighIndex(a),
|
||||
d = biHighIndex(a); d > -1; --d)
|
||||
b += digitToHex(a.digits[d]);
|
||||
return b
|
||||
}
|
||||
function biModuloByRadixPower(a, b) {
|
||||
var c = new BigInt;
|
||||
return arrayCopy(a.digits, 0, c.digits, 0, b),
|
||||
c
|
||||
}
|
||||
function biDivideByRadixPower(a, b) {
|
||||
var c = new BigInt;
|
||||
return arrayCopy(a.digits, b, c.digits, 0, c.digits.length - b),
|
||||
c
|
||||
}
|
||||
function biMultiply(a, b) {
|
||||
var d, h, i, k, c = new BigInt, e = biHighIndex(a), f = biHighIndex(b);
|
||||
for (k = 0; f >= k; ++k) {
|
||||
for (d = 0,
|
||||
i = k,
|
||||
j = 0; e >= j; ++j,
|
||||
++i)
|
||||
h = c.digits[i] + a.digits[j] * b.digits[k] + d,
|
||||
c.digits[i] = h & maxDigitVal,
|
||||
d = h >>> biRadixBits;
|
||||
c.digits[k + e + 1] = d
|
||||
}
|
||||
return c.isNeg = a.isNeg != b.isNeg,
|
||||
c
|
||||
}
|
||||
function encryptedString(a, b) {
|
||||
for (var f, g, h, i, j, k, l, c = new Array, d = b.length, e = 0; d > e; )
|
||||
c[e] = b.charCodeAt(e),
|
||||
e++;
|
||||
for (; 0 != c.length % a.chunkSize; )
|
||||
c[e++] = 0;
|
||||
for (f = c.length,
|
||||
g = "",
|
||||
e = 0; f > e; e += a.chunkSize) {
|
||||
for (j = new BigInt,
|
||||
h = 0,
|
||||
i = e; i < e + a.chunkSize; ++h)
|
||||
j.digits[h] = c[i++],
|
||||
j.digits[h] += c[i++] << 8;
|
||||
k = a.barrett.powMod(j, a.e),
|
||||
l = 16 == a.radix ? biToHex(k) : biToString(k, a.radix),
|
||||
g += l + " "
|
||||
}
|
||||
return g.substring(0, g.length - 1)
|
||||
}
|
||||
function BarrettMu_modulo(a) {
|
||||
var i, b = biDivideByRadixPower(a, this.k - 1), c = biMultiply(b, this.mu), d = biDivideByRadixPower(c, this.k + 1), e = biModuloByRadixPower(a, this.k + 1), f = biMultiply(d, this.modulus), g = biModuloByRadixPower(f, this.k + 1), h = biSubtract(e, g);
|
||||
for (h.isNeg && (h = biAdd(h, this.bkplus1)),
|
||||
i = biCompare(h, this.modulus) >= 0; i; )
|
||||
h = biSubtract(h, this.modulus),
|
||||
i = biCompare(h, this.modulus) >= 0;
|
||||
return h
|
||||
}
|
||||
function BarrettMu_multiplyMod(a, b) {
|
||||
var c = biMultiply(a, b);
|
||||
return this.modulo(c)
|
||||
}
|
||||
function BarrettMu_powMod(a, b) {
|
||||
var d, e, c = new BigInt;
|
||||
for (c.digits[0] = 1,
|
||||
d = a,
|
||||
e = b; ; ) {
|
||||
if (0 != (1 & e.digits[0]) && (c = this.multiplyMod(c, d)),
|
||||
e = biShiftRight(e, 1),
|
||||
0 == e.digits[0] && 0 == biHighIndex(e))
|
||||
break;
|
||||
d = this.multiplyMod(d, d)
|
||||
}
|
||||
return c
|
||||
}
|
||||
function biShiftRight(a, b) {
|
||||
var e, f, g, h, c = Math.floor(b / bitsPerDigit), d = new BigInt;
|
||||
for (arrayCopy(a.digits, c, d.digits, 0, a.digits.length - c),
|
||||
e = b % bitsPerDigit,
|
||||
f = bitsPerDigit - e,
|
||||
g = 0,
|
||||
h = g + 1; g < d.digits.length - 1; ++g,
|
||||
++h)
|
||||
d.digits[g] = d.digits[g] >>> e | (d.digits[h] & lowBitMasks[e]) << f;
|
||||
return d.digits[d.digits.length - 1] >>>= e,
|
||||
d.isNeg = a.isNeg,
|
||||
d
|
||||
}
|
||||
function biMultiplyDigit(a, b) {
|
||||
var c, d, e, f;
|
||||
for (result = new BigInt,
|
||||
c = biHighIndex(a),
|
||||
d = 0,
|
||||
f = 0; c >= f; ++f)
|
||||
e = result.digits[f] + a.digits[f] * b + d,
|
||||
result.digits[f] = e & maxDigitVal,
|
||||
d = e >>> biRadixBits;
|
||||
return result.digits[1 + c] = d,
|
||||
result
|
||||
}
|
||||
function biSubtract(a, b) {
|
||||
var c, d, e, f;
|
||||
if (a.isNeg != b.isNeg)
|
||||
b.isNeg = !b.isNeg,
|
||||
c = biAdd(a, b),
|
||||
b.isNeg = !b.isNeg;
|
||||
else {
|
||||
for (c = new BigInt,
|
||||
e = 0,
|
||||
f = 0; f < a.digits.length; ++f)
|
||||
d = a.digits[f] - b.digits[f] + e,
|
||||
c.digits[f] = 65535 & d,
|
||||
c.digits[f] < 0 && (c.digits[f] += biRadix),
|
||||
e = 0 - Number(0 > d);
|
||||
if (-1 == e) {
|
||||
for (e = 0,
|
||||
f = 0; f < a.digits.length; ++f)
|
||||
d = 0 - c.digits[f] + e,
|
||||
c.digits[f] = 65535 & d,
|
||||
c.digits[f] < 0 && (c.digits[f] += biRadix),
|
||||
e = 0 - Number(0 > d);
|
||||
c.isNeg = !a.isNeg
|
||||
} else
|
||||
c.isNeg = a.isNeg
|
||||
}
|
||||
return c
|
||||
}
|
||||
function biCompare(a, b) {
|
||||
if (a.isNeg != b.isNeg)
|
||||
return 1 - 2 * Number(a.isNeg);
|
||||
for (var c = a.digits.length - 1; c >= 0; --c)
|
||||
if (a.digits[c] != b.digits[c])
|
||||
return a.isNeg ? 1 - 2 * Number(a.digits[c] > b.digits[c]) : 1 - 2 * Number(a.digits[c] < b.digits[c]);
|
||||
return 0
|
||||
}
|
||||
function biMultiplyByRadixPower(a, b) {
|
||||
var c = new BigInt;
|
||||
return arrayCopy(a.digits, 0, c.digits, b, c.digits.length - b),
|
||||
c
|
||||
}
|
||||
function arrayCopy(a, b, c, d, e) {
|
||||
var g, h, f = Math.min(b + e, a.length);
|
||||
for (g = b,
|
||||
h = d; f > g; ++g,
|
||||
++h)
|
||||
c[h] = a[g]
|
||||
}
|
||||
function biShiftLeft(a, b) {
|
||||
var e, f, g, h, c = Math.floor(b / bitsPerDigit), d = new BigInt;
|
||||
for (arrayCopy(a.digits, 0, d.digits, c, d.digits.length - c),
|
||||
e = b % bitsPerDigit,
|
||||
f = bitsPerDigit - e,
|
||||
g = d.digits.length - 1,
|
||||
h = g - 1; g > 0; --g,
|
||||
--h)
|
||||
d.digits[g] = d.digits[g] << e & maxDigitVal | (d.digits[h] & highBitMasks[e]) >>> f;
|
||||
return d.digits[0] = d.digits[g] << e & maxDigitVal,
|
||||
d.isNeg = a.isNeg,
|
||||
d
|
||||
}
|
||||
function biNumBits(a) {
|
||||
var e, b = biHighIndex(a), c = a.digits[b], d = (b + 1) * bitsPerDigit;
|
||||
for (e = d; e > d - bitsPerDigit && 0 == (32768 & c); --e)
|
||||
c <<= 1;
|
||||
return e
|
||||
}
|
||||
function biDivideModulo(a, b) {
|
||||
var f, g, h, i, j, k, l, m, n, o, p, q, r, s, c = biNumBits(a), d = biNumBits(b), e = b.isNeg;
|
||||
if (d > c)
|
||||
return a.isNeg ? (f = biCopy(bigOne),
|
||||
f.isNeg = !b.isNeg,
|
||||
a.isNeg = !1,
|
||||
b.isNeg = !1,
|
||||
g = biSubtract(b, a),
|
||||
a.isNeg = !0,
|
||||
b.isNeg = e) : (f = new BigInt,
|
||||
g = biCopy(a)),
|
||||
new Array(f,g);
|
||||
for (f = new BigInt,
|
||||
g = a,
|
||||
h = Math.ceil(d / bitsPerDigit) - 1,
|
||||
i = 0; b.digits[h] < biHalfRadix; )
|
||||
b = biShiftLeft(b, 1),
|
||||
++i,
|
||||
++d,
|
||||
h = Math.ceil(d / bitsPerDigit) - 1;
|
||||
for (g = biShiftLeft(g, i),
|
||||
c += i,
|
||||
j = Math.ceil(c / bitsPerDigit) - 1,
|
||||
k = biMultiplyByRadixPower(b, j - h); -1 != biCompare(g, k); )
|
||||
++f.digits[j - h],
|
||||
g = biSubtract(g, k);
|
||||
for (l = j; l > h; --l) {
|
||||
for (m = l >= g.digits.length ? 0 : g.digits[l],
|
||||
n = l - 1 >= g.digits.length ? 0 : g.digits[l - 1],
|
||||
o = l - 2 >= g.digits.length ? 0 : g.digits[l - 2],
|
||||
p = h >= b.digits.length ? 0 : b.digits[h],
|
||||
q = h - 1 >= b.digits.length ? 0 : b.digits[h - 1],
|
||||
f.digits[l - h - 1] = m == p ? maxDigitVal : Math.floor((m * biRadix + n) / p),
|
||||
r = f.digits[l - h - 1] * (p * biRadix + q),
|
||||
s = m * biRadixSquared + (n * biRadix + o); r > s; )
|
||||
--f.digits[l - h - 1],
|
||||
r = f.digits[l - h - 1] * (p * biRadix | q),
|
||||
s = m * biRadix * biRadix + (n * biRadix + o);
|
||||
k = biMultiplyByRadixPower(b, l - h - 1),
|
||||
g = biSubtract(g, biMultiplyDigit(k, f.digits[l - h - 1])),
|
||||
g.isNeg && (g = biAdd(g, k),
|
||||
--f.digits[l - h - 1])
|
||||
}
|
||||
return g = biShiftRight(g, i),
|
||||
f.isNeg = a.isNeg != e,
|
||||
a.isNeg && (f = e ? biAdd(f, bigOne) : biSubtract(f, bigOne),
|
||||
b = biShiftRight(b, i),
|
||||
g = biSubtract(b, g)),
|
||||
0 == g.digits[0] && 0 == biHighIndex(g) && (g.isNeg = !1),
|
||||
new Array(f,g)
|
||||
}
|
||||
function md(d, e, f, g) {
|
||||
d = JSON.stringify(d)
|
||||
var h = {}
|
||||
, i = a(16);
|
||||
return h.encText = b(d, g),
|
||||
h.encText = b(h.encText, i),
|
||||
h.encSecKey = c(i, e, f),
|
||||
h
|
||||
}
|
||||
function d(d, e, f, g) {
|
||||
var h = {}
|
||||
, i = a(16);
|
||||
return h.encText = b(d, g),
|
||||
h.encText = b(h.encText, i),
|
||||
h.encSecKey = c(i, e, f),
|
||||
h
|
||||
}
|
||||
function biDivide(a, b) {
|
||||
return biDivideModulo(a, b)[0]
|
||||
}
|
||||
function charToHex(a) {
|
||||
var h, b = 48, c = b + 9, d = 97, e = d + 25, f = 65, g = 90;
|
||||
return h = a >= b && c >= a ? a - b : a >= f && g >= a ? 10 + a - f : a >= d && e >= a ? 10 + a - d : 0
|
||||
}
|
||||
function biFromNumber(a) {
|
||||
var c, b = new BigInt;
|
||||
for (b.isNeg = 0 > a,
|
||||
a = Math.abs(a),
|
||||
c = 0; a > 0; )
|
||||
b.digits[c++] = a & maxDigitVal,
|
||||
a >>= biRadixBits;
|
||||
return b
|
||||
}
|
||||
function RSAKeyPair(a, b, c) {
|
||||
this.e = biFromHex(a),
|
||||
this.d = biFromHex(b),
|
||||
this.m = biFromHex(c),
|
||||
this.chunkSize = 2 * biHighIndex(this.m),
|
||||
this.radix = 16,
|
||||
this.barrett = new BarrettMu(this.m)
|
||||
}
|
||||
function biFromHex(a) {
|
||||
var d, e, b = new BigInt, c = a.length;
|
||||
for (d = c,
|
||||
e = 0; d > 0; d -= 4,
|
||||
++e)
|
||||
b.digits[e] = hexToDigit(a.substr(Math.max(d - 4, 0), Math.min(d, 4)));
|
||||
return b
|
||||
}
|
||||
function hexToDigit(a) {
|
||||
var d, b = 0, c = Math.min(a.length, 4);
|
||||
for (d = 0; c > d; ++d)
|
||||
b <<= 4,
|
||||
b |= charToHex(a.charCodeAt(d));
|
||||
return b
|
||||
}
|
||||
function biHighIndex(a) {
|
||||
for (var b = a.digits.length - 1; b > 0 && 0 == a.digits[b]; )
|
||||
--b;
|
||||
return b
|
||||
}
|
||||
function BarrettMu(a) {
|
||||
this.modulus = biCopy(a),
|
||||
this.k = biHighIndex(this.modulus) + 1;
|
||||
var b = new BigInt;
|
||||
b.digits[2 * this.k] = 1,
|
||||
this.mu = biDivide(b, this.modulus),
|
||||
this.bkplus1 = new BigInt,
|
||||
this.bkplus1.digits[this.k + 1] = 1,
|
||||
this.modulo = BarrettMu_modulo,
|
||||
this.multiplyMod = BarrettMu_multiplyMod,
|
||||
this.powMod = BarrettMu_powMod
|
||||
}
|
||||
function biCopy(a) {
|
||||
var b = new BigInt(!0);
|
||||
return b.digits = a.digits.slice(0),
|
||||
b.isNeg = a.isNeg,
|
||||
b
|
||||
}
|
||||
|
||||
|
||||
ax = {
|
||||
csrf_token: "d5e1f281f7b6f7ff2caf0af810f347d6",
|
||||
lencodeType: "aac",
|
||||
ids: "[010014984FD3A423D7EC79009184DDA27700]",
|
||||
level: "standard"
|
||||
}
|
||||
// console.log(d(JSON.stringify(ax), '010001', '8c979a9a86a6e4b3c1de07b6f93bd8d4', '0CoJUm6Qyw8W8jud'))
|
||||
console.log(bMr1x)
|
||||
|
||||
|
||||
'{"rid":"R_SO_4_%d","threadId":"R_SO_4_1297486027","pageNo":"1","pageSize":"20","cursor":"-1","offset":"0","orderType":"1","csrf_token":"ee74402ef50d2a957bccb7b540f4bc27"}'
|
||||
'{"rid":"R_VI_62_4096A8D2343DB13036C15EDE76355DE9","threadId":"R_VI_62_4096A8D2343DB13036C15EDE76355DE9","pageNo":"1","pageSize":"20","cursor":"-1","offset":"0","orderType":"1","csrf_token":"ee74402ef50d2a957bccb7b540f4bc27"}'
|
394
js/请求头请求体加密/网易云音乐/music.py
Normal file
394
js/请求头请求体加密/网易云音乐/music.py
Normal file
@ -0,0 +1,394 @@
|
||||
import execjs
|
||||
import requests
|
||||
from tqdm import tqdm
|
||||
import csv
|
||||
import os
|
||||
|
||||
xx = '010001'
|
||||
yy = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
|
||||
zz = '0CoJUm6Qyw8W8jud'
|
||||
cookies = {
|
||||
'MUSIC_U': '',
|
||||
}
|
||||
headers = {
|
||||
'authority': 'music.163.com',
|
||||
'accept': '*/*',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7',
|
||||
# 'cookie': 'P_INFO=18608219667|1659842485|1|study|00&99|null&null&null#sic&510500#10#0|&0||18608219667; __bid_n=184855102dc3deb72f4207; FPTOKEN=72X4JbpCu5OusTIYuHrCSGgjkuVlm7w0d2wuZhf+LU7tCP8d2MdyrBKXbvhWmMBQ4PKKvkagbJ51CrfoDP1FK92ujmpUvUgInBgFojtQGyau97Tpz4WSuTnUiS6fizwTsHkskf4I2RdqrQCBQFHtYEtOnpD8RQadQBQbKXjwRw/nR1DO+23Y6vcDZbzGXAtLl6Xm4RhhE95S1srhGEVyjbIKGwnyHfiAJRmy6s7aRwJy06lrHyqXmRGsl75msfYuOSPVdoqKR50yZOaIXkE9+reLCp71sfWzH6IyIuEd0tOfp2DIGQOXRwPNsfJIVhnzOmQETOjXTciGSjjqjpcB1HvV6MJEPoJTz9jtA9xEAbpdqzfdbXWd1t66tiWvMdSwOdRJufrVWLb5Kp45jXCEMg==|4i8C4fx+LJpM3RJOqHD5D/gktdXKtOZsPzv+3ONA2vU=|10|4784ce007f1eaa4073c5660cfcf93bfa; vinfo_n_f_l_n3=0f5eba99d02a8a90.1.12.1673163354239.1673496764827.1673499976392; _iuqxldmzr_=32; _ntes_nnid=f37f7a44b589883a8947dd6fca21229a,1674451825593; _ntes_nuid=f37f7a44b589883a8947dd6fca21229a; NMTID=00OpWJ6K25R-OBw305HoKc7iw1Bum4AAAGF3Rs0ZA; WEVNSM=1.0.0; WNMCID=pozalx.1674451825917.01.0; WM_TID=%2BrY9QzhJ44xAQUUBAFKBIufmFp9POPoJ; WM_NI=FZifNkYxQ5%2BsOc7UcO0iL2%2BysJb4NBTGZYVM84rxk4hET0mDURlUNWjbwIRjhuX5QLHgQRO1zicH%2BhhGxyGw5XoZKrhco9d3otC6cYq4jWsQGVO9ozzTlzitaHjcs4mocU0%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eeb0cf6f898e0096fc4eada88fb2d54f838a9fadc147edb99d98bc46bab7a9abe52af0fea7c3b92a8686a787cd619ba89fd6aa798dbdf9b6e245f4b784d3aa73b196aed5cd4a989996bad547fcecbe99ea40ac9b9cd1f050978ea4b6cd5daabeb9aad162acab98d6d4488cb99fb3ce3bbab7ada9aa64ac99b89ac67df788a1bbca7aacbda1dae739adac8dd7e253b3eca6b8f25b858a81daee72ba8ca49bec80a398a684f240afb1adb6b337e2a3; playerid=49334466; JSESSIONID-WYYY=o%5CyijWemMcjioZJ1fsF%5C9sl57SgrkFa6o9yMUd4dnNshvo11uAMNJOV%5CbIJYe0VSo2xpDI0mc%2FUvQX2xQpe1U2JCJDfEwIvO%2FhMo%5CGHCcfZI3r%2BzseRcOnnrt8NsZwR53VNvUYHNp6sYzWTZEiYbYJ9D4%2Fv%2BIUCnGPv3mypr1JBIDaev%3A1674532481499',
|
||||
'origin': 'https://music.163.com',
|
||||
'referer': 'https://music.163.com/',
|
||||
'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'sec-fetch-dest': 'empty',
|
||||
'sec-fetch-mode': 'cors',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
|
||||
}
|
||||
params = {
|
||||
'csrf_token':
|
||||
}
|
||||
|
||||
|
||||
def replace_lastChar(former_str, replacechar):
|
||||
return former_str[:-1] + replacechar
|
||||
|
||||
|
||||
def returnError(response):
|
||||
if response.json()['result'] == {}:
|
||||
print('搜索的内容无法查找到!')
|
||||
return 0
|
||||
|
||||
|
||||
def downloader(url, i3x, id):
|
||||
param = execjs.compile(open('demo.js', 'r', encoding='utf-8').read()).call('d', i3x, xx, yy, zz)
|
||||
data = {
|
||||
'params': param['encText'],
|
||||
'encSecKey': param['encSecKey']
|
||||
}
|
||||
response = requests.post(
|
||||
url,
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
data=data,
|
||||
)
|
||||
try:
|
||||
url = response.json()['data']['url']
|
||||
except:
|
||||
url = response.json()['urls'][0]['url']
|
||||
res = requests.get(url, stream=True)
|
||||
content_size = int(res.headers['Content-Length']) / 1024
|
||||
print('正在下载MV。。。')
|
||||
with open(f'./MV/{id}.mp4', 'wb') as fp:
|
||||
for data in tqdm(iterable=res.iter_content(1024),
|
||||
total=content_size,
|
||||
unit='k'):
|
||||
fp.write(data)
|
||||
print('下载完成!')
|
||||
|
||||
|
||||
def searchLoader(i3x, name, offset):
|
||||
i3x = format(i3x % (name, offset))
|
||||
param = execjs.compile(open('demo.js', 'r', encoding='utf-8').read()).call('d', i3x, xx, yy, zz)
|
||||
data = {
|
||||
'params': param['encText'],
|
||||
'encSecKey': param['encSecKey']
|
||||
}
|
||||
response = requests.post(
|
||||
'https://music.163.com/weapi/cloudsearch/get/web',
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
data=data,
|
||||
)
|
||||
return response
|
||||
|
||||
|
||||
def Post(time, id, i, key):
|
||||
if key == 1:
|
||||
i3x = {
|
||||
'csrf_token': "d5e1f281f7b6f7ff2caf0af810f347d7",
|
||||
'cursor': str(time),
|
||||
'offset': "0",
|
||||
'orderType': "2",
|
||||
'pageNo': f"{i}",
|
||||
'pageSize': "20",
|
||||
'rid': f"R_SO_4_{id}",
|
||||
'threadId': f"R_SO_4_{id}",
|
||||
}
|
||||
elif key == 2:
|
||||
if len(id) == 32:
|
||||
i3x = {
|
||||
'csrf_token': "d5e1f281f7b6f7ff2caf0af810f347d7",
|
||||
'cursor': str(time),
|
||||
'offset': "0",
|
||||
'orderType': "1",
|
||||
'pageNo': f"{i}",
|
||||
'pageSize': "20",
|
||||
'rid': f"R_VI_62_{id}",
|
||||
'threadId': f"R_VI_62_{id}",
|
||||
}
|
||||
else:
|
||||
i3x = {
|
||||
'csrf_token': "d5e1f281f7b6f7ff2caf0af810f347d7",
|
||||
'cursor': str(time),
|
||||
'offset': "0",
|
||||
'orderType': "1",
|
||||
'pageNo': f"{i}",
|
||||
'pageSize': "20",
|
||||
'rid': f"R_MV_5_{id}",
|
||||
'threadId': f"R_MV_5_{id}",
|
||||
}
|
||||
param = execjs.compile(open('demo.js', 'r', encoding='utf-8').read()).call('md', i3x, xx, yy, zz)
|
||||
data = {
|
||||
'params': param['encText'],
|
||||
'encSecKey': param['encSecKey']
|
||||
}
|
||||
response = requests.post('https://music.163.com/weapi/comment/resource/comments/get', params=params,
|
||||
headers=headers, data=data, cookies=cookies)
|
||||
return response.json()
|
||||
|
||||
|
||||
def getCursor(datas):
|
||||
userInfos = datas["data"]["comments"]
|
||||
if len(userInfos) < 20:
|
||||
return 'end'
|
||||
userInfo = userInfos[19]['time']
|
||||
return userInfo
|
||||
|
||||
|
||||
def comment(datas):
|
||||
userInfos = datas["data"]["comments"]
|
||||
infos = []
|
||||
userInfo = {}
|
||||
for info in userInfos:
|
||||
info1 = info["user"]
|
||||
if info1["vipRights"] is not None:
|
||||
userInfo = {
|
||||
'评论人的ID': info1['userId'],
|
||||
'评论人网名': info1['nickname'],
|
||||
'评论人VIP等级': info1["vipRights"]['redVipLevel'],
|
||||
'评论人头像网址': info1['avatarUrl'],
|
||||
'评论编号': info['commentId'],
|
||||
'评论时间': info['timeStr'],
|
||||
'点赞量': info['likedCount'],
|
||||
'评论内容': info['content'],
|
||||
'ip地址': info["ipLocation"]['location']
|
||||
}
|
||||
else:
|
||||
userInfo['评论人VIP等级'] = 0
|
||||
infos.append(userInfo)
|
||||
return infos
|
||||
|
||||
|
||||
def get_comment(endNum, id, key):
|
||||
header = ['评论人的ID', '评论人网名', '评论人VIP等级', '评论人头像网址', '评论编号', '评论时间', '点赞量',
|
||||
'评论内容', 'ip地址']
|
||||
fp = open(f'./comment/comment_of_{id}.csv', 'w', encoding='utf-8', newline='')
|
||||
writer = csv.DictWriter(fp, header)
|
||||
writer.writeheader()
|
||||
id = str(id)
|
||||
i = 1
|
||||
data = Post(-1, id, i, key)
|
||||
while True:
|
||||
time = getCursor(data)
|
||||
info = comment(data)
|
||||
writer.writerows(info)
|
||||
print('正在下载第', i, '页的评论')
|
||||
if time == 'end' or i == endNum:
|
||||
break
|
||||
i += 1
|
||||
data = Post(time, id, i, key)
|
||||
fp.close()
|
||||
|
||||
|
||||
def get_lyric(id):
|
||||
i3x = '{"id":"%d","lv":-1,"tv":-1,"csrf_token":"d5e1f281f7b6f7ff2caf0af810f347d7"}'
|
||||
i3x = format(i3x % id)
|
||||
param = execjs.compile(open('demo.js', 'r', encoding='utf-8').read()).call('d', i3x, xx, yy, zz)
|
||||
data = {
|
||||
'params': param['encText'],
|
||||
'encSecKey': param['encSecKey']
|
||||
}
|
||||
response = requests.post('https://music.163.com/weapi/song/lyric', params=params, cookies=cookies, headers=headers,
|
||||
data=data)
|
||||
res = response.json()["lrc"]['lyric']
|
||||
try:
|
||||
resp = response.json()["tlyric"]['lyric']
|
||||
except:
|
||||
resp = ''
|
||||
with open(f'./lyric/lyric_of_{id}.txt', 'w', encoding='utf-8') as fp:
|
||||
fp.write(res)
|
||||
if resp != '':
|
||||
fp.write('译文如下所示\n')
|
||||
fp.write(resp)
|
||||
|
||||
|
||||
def get_musicUrl(mid):
|
||||
i3x = '{"ids":"[%d]","level":"lossless","encodeType":"aac","csrf_token":"d5e1f281f7b6f7ff2caf0af810f347d7"}'
|
||||
i3x = format(i3x % mid)
|
||||
param = execjs.compile(open('demo.js', 'r', encoding='utf-8').read()).call('d', i3x, xx, yy, zz)
|
||||
data = {
|
||||
'params': param['encText'],
|
||||
'encSecKey': param['encSecKey']
|
||||
}
|
||||
response = requests.post(
|
||||
'https://music.163.com/weapi/song/enhance/player/url/v1',
|
||||
params=params,
|
||||
cookies=cookies,
|
||||
headers=headers,
|
||||
data=data,
|
||||
)
|
||||
Url = response.json()["data"][0]['url']
|
||||
if Url is None:
|
||||
return 'error'
|
||||
return Url
|
||||
|
||||
|
||||
def GetMusic(url, mid):
|
||||
if url != 'error':
|
||||
response = requests.get(
|
||||
url=url,
|
||||
headers=headers,
|
||||
stream=True
|
||||
)
|
||||
if str(response) == '<Response [403]>':
|
||||
new_url = replace_lastChar(url, 'r')
|
||||
response = requests.get(
|
||||
url=new_url,
|
||||
headers=headers,
|
||||
stream=True
|
||||
)
|
||||
content_size = int(response.headers['Content-Length']) / 1024
|
||||
print('正在下载歌曲。。。')
|
||||
with open(f'./music/{mid}.mp3', 'wb') as fp:
|
||||
for data in tqdm(iterable=response.iter_content(1024),
|
||||
total=content_size,
|
||||
unit='k'):
|
||||
fp.write(data)
|
||||
print('下载完成!')
|
||||
else:
|
||||
print('歌曲暂无音源或需要购买专辑才能下载')
|
||||
|
||||
|
||||
def searchSong(name, offset):
|
||||
i3x = '{"hlpretag":"<span class=\\"s-fc7\\">","hlposttag":"</span>","id":"160947","s":"%s","type":"1","offset":"%d","total":"true","limit":"30","csrf_token":"ee74402ef50d2a957bccb7b540f4bc27"}'
|
||||
response = searchLoader(i3x, name, offset)
|
||||
if returnError(response) == 0:
|
||||
return 0
|
||||
total = response.json()["result"]['songCount']
|
||||
songs = response.json()['result']['songs']
|
||||
print('搜索到的结果有', total, '条')
|
||||
for song in songs:
|
||||
songInfo = {
|
||||
'歌曲id': song['id'],
|
||||
'歌曲名称': song['name'],
|
||||
'歌手姓名': song['ar'][0]['name'],
|
||||
'专辑名称': song['al']['name'],
|
||||
'mvid': song['mv']
|
||||
}
|
||||
print(songInfo)
|
||||
return total
|
||||
|
||||
|
||||
def searchFunction(flag):
|
||||
name = input('请输入想要搜索的内容:\n')
|
||||
offset = 0
|
||||
while True:
|
||||
if flag == '1':
|
||||
total = searchSong(name, offset)
|
||||
elif flag == '2':
|
||||
total = searchMV(name, offset)
|
||||
else:
|
||||
print('错误的输入,请按提示进行输入')
|
||||
break
|
||||
offset += 30
|
||||
if offset > total:
|
||||
print('已超出上线,退出搜索功能\n')
|
||||
break
|
||||
keys = input(f'是否继续搜索,输入任意字符继续搜索{name}下一页的内容,退出请输入0\n')
|
||||
if keys == '0':
|
||||
break
|
||||
|
||||
|
||||
def downloadFunction(flag):
|
||||
if flag == '3':
|
||||
id = input('请输入想要下载的歌曲id\n')
|
||||
id = int(id)
|
||||
while True:
|
||||
keys = input('请输入想要进行的操作: 1.下载歌曲 2.下载歌词 3.下载评论 0.退出\n')
|
||||
if keys == '1':
|
||||
print('重复下载将会覆盖之前下载的文件')
|
||||
url = get_musicUrl(id)
|
||||
GetMusic(url, id)
|
||||
elif keys == '2':
|
||||
print('重复下载将会覆盖之前下载的文件')
|
||||
get_lyric(id)
|
||||
elif keys == '3':
|
||||
print('重复下载将会覆盖之前下载的文件')
|
||||
num = input('请输入下载评论的页数\n')
|
||||
get_comment(int(num), id, 1)
|
||||
elif keys == '0':
|
||||
break
|
||||
else:
|
||||
print('错误输入,请重新输入')
|
||||
continue
|
||||
elif flag == '4':
|
||||
id = input('请输入想要下载的MV的id:\n')
|
||||
while True:
|
||||
keys = input('请输入想要进行的操作: 1.下载MV 2.下载评论 0.退出\n')
|
||||
if keys == '1':
|
||||
print('重复下载将会覆盖之前下载的文件')
|
||||
downloadMV(id)
|
||||
elif keys == '2':
|
||||
print('重复下载将会覆盖之前下载的文件')
|
||||
num = input('请输入下载评论的页数\n')
|
||||
get_comment(int(num), id, 2)
|
||||
elif keys == '0':
|
||||
break
|
||||
else:
|
||||
print('错误输入,请重新输入')
|
||||
continue
|
||||
|
||||
|
||||
def searchMV(name, offset):
|
||||
i3x = '{"hlpretag":"<span class=\\"s-fc7\\">","hlposttag":"</span>","id":"160947","s":"%s","type":"1014","offset":"%d","total":"true","limit":"20","csrf_token":"ee74402ef50d2a957bccb7b540f4bc27"}'
|
||||
response = searchLoader(i3x, name, offset)
|
||||
if returnError(response) == 0:
|
||||
return 0
|
||||
total = response.json()["result"]['videoCount']
|
||||
videos = response.json()['result']['videos']
|
||||
print('搜索到的结果有', total, '条')
|
||||
for video in videos:
|
||||
MVInfo = {
|
||||
'MV的id': video['vid'],
|
||||
'MV名称': video['title'],
|
||||
}
|
||||
print(MVInfo)
|
||||
return total
|
||||
|
||||
|
||||
def downloadMV(id):
|
||||
if len(id) == 32:
|
||||
i3x = '{"ids":"[\\"%s\\"]","resolution":"1080","csrf_token":"ee74402ef50d2a957bccb7b540f4bc27"}'
|
||||
i3x = format(i3x % id)
|
||||
url = 'https://music.163.com/weapi/cloudvideo/playurl'
|
||||
downloader(url, i3x, id)
|
||||
else:
|
||||
id = int(id)
|
||||
i3x = '{"id":"%d","r":"1080","csrf_token":"ee74402ef50d2a957bccb7b540f4bc27"}'
|
||||
i3x = format(i3x % id)
|
||||
url = 'https://music.163.com/weapi/song/enhance/play/mv/url'
|
||||
downloader(url, i3x, id)
|
||||
|
||||
|
||||
def checkDir():
|
||||
if not os.path.exists('./music'):
|
||||
os.mkdir('./music')
|
||||
if not os.path.exists('./MV'):
|
||||
os.mkdir('./MV')
|
||||
if not os.path.exists('./lyric'):
|
||||
os.mkdir('./lyric')
|
||||
if not os.path.exists('./comment'):
|
||||
os.mkdir('./comment')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
checkDir()
|
||||
while True:
|
||||
print('欢迎使用音乐下载器')
|
||||
key = input('请输入想要选择的功能: 1.搜索歌曲 2.搜索MV 3.下载歌曲 4.下载MV 0.退出程序\n')
|
||||
if key == '1':
|
||||
searchFunction(key)
|
||||
elif key == '2':
|
||||
searchFunction(key)
|
||||
elif key == '3':
|
||||
downloadFunction(key)
|
||||
elif key == '4':
|
||||
downloadFunction(key)
|
||||
elif key == '0':
|
||||
break
|
||||
else:
|
||||
print('错误输入,请重新输入')
|
||||
continue
|
50
lxml&re/4k图片爬取.py
Normal file
50
lxml&re/4k图片爬取.py
Normal file
@ -0,0 +1,50 @@
|
||||
import requests
|
||||
from lxml import etree
|
||||
import urllib3 # 禁用安全请求警告,当目标使用htpps时使用
|
||||
import os
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
# 解决爬取网页出现中文乱码的情况
|
||||
def rebuilt_Language(url, headers):
|
||||
response = requests.get(url=url, headers=headers, verify=False)
|
||||
# response.encoding = response.apparent_encoding
|
||||
return response
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# UA伪装
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
}
|
||||
# 建立一个文件夹存储照片
|
||||
i = -1
|
||||
if not os.path.exists('./picLibs'):
|
||||
os.mkdir('./picLibs')
|
||||
# 设置一个通用的url
|
||||
url = 'https://pic.netbian.com/4kmeinv/index_%d.html'
|
||||
pageNum = 1
|
||||
src_list = [] # 存储图片的src
|
||||
img_name_list = [] # 存储图片的名字
|
||||
for pageNum in range(1, 3):
|
||||
new_url = format(url % pageNum)
|
||||
page_text = rebuilt_Language(url=new_url, headers=headers).text
|
||||
tree = etree.HTML(page_text)
|
||||
# 解析src的属性值,解析alt属性值
|
||||
li_list = tree.xpath('//div[@class="wrap clearfix"]//li')
|
||||
for li in li_list:
|
||||
src = ' https://pic.netbian.com' + li.xpath('./a/img/@src')[0]
|
||||
src_list.append(src)
|
||||
img_name = li.xpath('./a/img/@alt')[0] + '.jpg'
|
||||
# 解决中文乱码的方法
|
||||
img_name = img_name.encode('iso-8859-1').decode('gbk')
|
||||
img_name_list.append(img_name)
|
||||
# 请求图片并持续化存储
|
||||
for img_url in src_list:
|
||||
i = i + 1
|
||||
img_data = requests.get(url=img_url, headers=headers).content
|
||||
img_path = 'picLibs/' + img_name_list[i]
|
||||
with open(img_path, 'wb') as fp:
|
||||
fp.write(img_data)
|
||||
print(img_name_list[i] + '下载成功!')
|
21
lxml&re/58同城分页爬取.py
Normal file
21
lxml&re/58同城分页爬取.py
Normal file
@ -0,0 +1,21 @@
|
||||
import requests
|
||||
from lxml import etree
|
||||
|
||||
if __name__ == '__main__':
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
}
|
||||
# 设置一个通用的url
|
||||
url = 'https://sh.58.com/ershoufang/p%d/?PGTID=0d30000c-0000-2e04-d18a-9af183e2d6a4&ClickID=1'
|
||||
pageNum = 1
|
||||
fp = open('58.txt', 'w', encoding='utf-8')
|
||||
for pageNum in range(1, 9):
|
||||
new_url = format(url % pageNum) # 拼接成完整的url
|
||||
page_text = requests.get(url=new_url, headers=headers).text
|
||||
tree = etree.HTML(page_text)
|
||||
tongji_list = tree.xpath('//section[@class="list"]/div')
|
||||
for li in tongji_list:
|
||||
title = li.xpath('./a/div[2]//h3/text()')[0]
|
||||
print(title)
|
||||
fp.write(title + '\n')
|
||||
print('over!')
|
28
lxml&re/GetFakeUA.py
Normal file
28
lxml&re/GetFakeUA.py
Normal file
@ -0,0 +1,28 @@
|
||||
import requests
|
||||
from lxml import etree
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
url = 'https://useragentstring.com/pages/useragentstring.php?name=Chrome'
|
||||
|
||||
resp = requests.get(url=url, headers=headers).text
|
||||
|
||||
tree = etree.HTML(resp)
|
||||
|
||||
ul_list = tree.xpath('//*[@id="liste"]/ul')
|
||||
|
||||
USER_AGENT = []
|
||||
|
||||
fp = open('./fake_UA.txt', 'a', encoding='utf-8')
|
||||
|
||||
for ul in ul_list:
|
||||
UA = ul.xpath('./li/a/text()')
|
||||
for i in range(1, len(UA)):
|
||||
ua = '"' + UA[i] + '",\n'
|
||||
print(ua)
|
||||
fp.write(ua)
|
||||
|
||||
fp.close()
|
||||
|
35
lxml&re/bs4案例.py
Normal file
35
lxml&re/bs4案例.py
Normal file
@ -0,0 +1,35 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import urllib3 # 禁用安全请求警告,当目标使用htpps时使用
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
# 解决爬取网页出现中文乱码的情况
|
||||
def rebuilt_Language(url, headers):
|
||||
response = requests.get(url=url, headers=headers, verify=False)
|
||||
response.encoding = response.apparent_encoding
|
||||
return response
|
||||
|
||||
|
||||
# 爬取三国演义小说所有的章节标题和章节内容
|
||||
if __name__ == "__main__":
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
}
|
||||
url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
|
||||
page_text = rebuilt_Language(url, headers).text
|
||||
# 创建BeautifulSoup对象
|
||||
soup = BeautifulSoup(page_text, 'lxml')
|
||||
li_list = soup.select('.book-mulu > ul >li')
|
||||
fp = open('./sanguo.txt', 'w', encoding='utf-8')
|
||||
for li in li_list:
|
||||
title = li.a.string
|
||||
detail_url = 'https://www.shicimingju.com' + li.a['href']
|
||||
detail_page_text = rebuilt_Language(detail_url, headers).text
|
||||
# 解析详情页相关章节内容
|
||||
detail_soup = BeautifulSoup(detail_page_text, 'lxml')
|
||||
div_tag = detail_soup.find('div', class_='chapter_content')
|
||||
content = div_tag.text
|
||||
fp.write(title + ":" + content + "\n")
|
||||
print(title, '爬取成功')
|
18
lxml&re/bs4解析基础.py
Normal file
18
lxml&re/bs4解析基础.py
Normal file
@ -0,0 +1,18 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 将本地的html文件中的数据加载到该对象中
|
||||
fp = open('./test.html', 'r', encoding='utf-8')
|
||||
soup = BeautifulSoup(fp, 'lxml')
|
||||
# print(soup)
|
||||
# print(soup.a) # soup.tagName 返回的是html中第一次出现的tagName标签
|
||||
# print(soup.find('div')) # 相当于soup.div
|
||||
# print(soup.find('div', class_='song'))
|
||||
# print(soup.find_all('a'))
|
||||
# print(soup.select('.tang'))
|
||||
# print(soup.select('.tang > ul > li > a')[0])
|
||||
# print(soup.select('.tang > ul a')[0])
|
||||
# print(soup.select('.tang > ul a')[0].text)
|
||||
tag = soup.find('div', class_='song')
|
||||
print(tag.text)
|
35
lxml&re/test.html
Normal file
35
lxml&re/test.html
Normal file
@ -0,0 +1,35 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<title>测试bs4</title>
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<p>百里守约</p>
|
||||
</div>
|
||||
<div class = "song">
|
||||
<p>李清照</p>
|
||||
<p>王安石</p>
|
||||
<p>苏轼</p>
|
||||
<p>柳宗元</p>
|
||||
<a href="https://www.song.com/" title="赵匡胤" target="_self">
|
||||
<span>this is span</span>
|
||||
宋朝是最强大的王朝,不是军队的强大,而是经济很强大,国民都很有钱
|
||||
</a>
|
||||
<a href="" class="du">总为浮云能避日,长安不见使人愁</a>
|
||||
<img src="https://www.baidu.com/meinv.jpg" alt="" />
|
||||
</div>
|
||||
<div class="tang">
|
||||
<ul>
|
||||
<li><a href="https://www.baidu.com" title="qing">清明时节雨纷纷,路上行人欲断魂</a> </li>
|
||||
<li><a href="https://www.163.com" title="qin">秦时明月</a> </li>
|
||||
<li><a href="https://www.sina.com" class="du">杜甫</a> </li>
|
||||
<li><a href="https://www.dudu.com" class="du">杜牧</a> </li>
|
||||
<li><b>杜小咪</b></li>
|
||||
<li><i>度蜜月</i></li>
|
||||
<li><a href="https://www.shu.edu.cn" id="feng">凤凰台上凤凰游,风趣太空江自流</a> </li>
|
||||
</ul>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
13
lxml&re/xpath基础.py
Normal file
13
lxml&re/xpath基础.py
Normal file
@ -0,0 +1,13 @@
|
||||
from lxml import etree
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 实例化一个etree对象
|
||||
tree = etree.parse('./test.html')
|
||||
# r = tree.xpath('/html//title')
|
||||
# r = tree.xpath('//div[@class="song"]')
|
||||
# r = tree.xpath('//div[@class="song"]/p[3]')
|
||||
# r = tree.xpath('//div[@class="tang"]/ul/li[4]/a/text()')
|
||||
# r = tree.xpath('//div[@class="tang"]//text()')
|
||||
# r = tree.xpath('//div[@class="song"]/img/@src')
|
||||
r = tree.xpath('//div[@class="song"]/p/text()')
|
||||
print(r)
|
32
lxml&re/正则练习.py
Normal file
32
lxml&re/正则练习.py
Normal file
@ -0,0 +1,32 @@
|
||||
import re
|
||||
# 提取出python
|
||||
key = "java python c++ php"
|
||||
s = re.findall('python', key)[0]
|
||||
print(s)
|
||||
# key = 'https://scpic.chinaz.net/files/default/imgs/2023-01-04/610de886ffc6b37d_s.jpg'
|
||||
# s = re.sub('_s', '', key)
|
||||
# print(s)
|
||||
# 提取出hello world
|
||||
# key = "<html><h1>hello world<h1><html>"
|
||||
# s = re.findall('<h1>(.*)<h1>', key)[0]
|
||||
# print(s)
|
||||
# 提取出170
|
||||
# string = '我喜欢身高为170的女生'
|
||||
# s = re.findall('\d+', string)[0]
|
||||
# print(s)
|
||||
# 提取出http:// 和 https://
|
||||
# key = 'http://www.baidu.com and https://dong.com'
|
||||
# s = re.findall('https?://', key)
|
||||
# print(s)
|
||||
# 提取出hello
|
||||
# key = 'lalala<hTml>hello</HTMl>hahaha'
|
||||
# s = re.findall('<[Hh][Tt][mM][lL]>(.*)</[Hh][Tt][mM][Ll]>', key)
|
||||
# print(s)
|
||||
# 提取出hit.
|
||||
# key = 'bobo@hit.edu.cn'
|
||||
# s = re.findall('h.*?\.', key)[0]
|
||||
# print(s)
|
||||
# 提取出saas 和 sas
|
||||
# key = 'saas and sas and saaas'
|
||||
# s = re.findall('sa{1,2}s', key)
|
||||
# print(s)
|
28
lxml&re/正则解析.py
Normal file
28
lxml&re/正则解析.py
Normal file
@ -0,0 +1,28 @@
|
||||
import re
|
||||
import requests
|
||||
import os
|
||||
|
||||
# 爬取图片
|
||||
if __name__ == "__main__":
|
||||
# 创建一个文件夹,用来保存所有的图片
|
||||
if not os.path.exists('./imgLibs'):
|
||||
os.mkdir('./imgLibs')
|
||||
url = 'https://www.douban.com/'
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
}
|
||||
# 使用通用爬虫对url对应的一整张页面进行爬取
|
||||
page_text = requests.get(url=url, headers=headers).text
|
||||
# 使用聚焦爬虫将页面中所有的图片进行解析、提取
|
||||
ex = '<div class="pic">.*?<img src=.*? data-origin="(.*?)" alt=.*?</div>'
|
||||
img_src_list = re.findall(ex, page_text, re.S)
|
||||
# print(img_src_list)
|
||||
for src in img_src_list:
|
||||
# 将图片信息以二进制存储
|
||||
img_data = requests.get(url=src, headers=headers).content
|
||||
# 生成图片名称
|
||||
img_name = src.split('/')[-1]
|
||||
imgPath = './imgLibs/' + img_name
|
||||
with open(imgPath, 'wb') as fp:
|
||||
fp.write(img_data)
|
||||
print(img_name, '下载成功')
|
51
lxml&re/简历爬取.py
Normal file
51
lxml&re/简历爬取.py
Normal file
@ -0,0 +1,51 @@
|
||||
import requests
|
||||
from lxml import etree
|
||||
import os
|
||||
|
||||
if __name__ == '__main__':
|
||||
# UA伪装
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
|
||||
}
|
||||
url0 = 'https://sc.chinaz.com/jianli/free.html' # 访问第一页的链接,这里因为直接用free_1无法打开网页
|
||||
url = 'https://sc.chinaz.com/jianli/free_%d.html'
|
||||
pageNum = 1
|
||||
|
||||
download_list = []
|
||||
download_name_list = []
|
||||
# 新建文件夹可持续化存储
|
||||
if not os.path.exists('./CV_moban'):
|
||||
os.mkdir('./CV_moban')
|
||||
# 分页爬取
|
||||
for pageNum in range(1, 3):
|
||||
if pageNum == 1:
|
||||
new_url = url0
|
||||
else:
|
||||
new_url = format(url % pageNum)
|
||||
# 实例化对象的构建
|
||||
page_text = requests.get(url=new_url, headers=headers).text
|
||||
tree = etree.HTML(page_text)
|
||||
# 爬取需要下载的页面信息
|
||||
CV_infor_list = tree.xpath('//div[@class="main_list jl_main"]/div')
|
||||
for cv in CV_infor_list:
|
||||
CV_src = cv.xpath('./a/@href')[0]
|
||||
CV_text = requests.get(url=CV_src, headers=headers).text
|
||||
ctree = etree.HTML(CV_text)
|
||||
# 爬取简历下载链接
|
||||
download_src = ctree.xpath('//div[@class="down_wrap"]/div[2]/ul/li/a/@href')[0]
|
||||
download_list.append(download_src)
|
||||
# 爬取简历名称
|
||||
download_name = ctree.xpath('//div[@class="bgwhite"]/div//h1/text()')[0]
|
||||
download_name = download_name.encode('iso-8859-1').decode('utf-8') + '.rar'
|
||||
download_name_list.append(download_name)
|
||||
|
||||
# 批量下载简历模板
|
||||
i = -1
|
||||
for cvv in download_list:
|
||||
i = i + 1
|
||||
cvv = download_list[i]
|
||||
cv_content = requests.get(url=cvv, headers=headers).content
|
||||
cv_path = 'CV_moban/' + download_name_list[i]
|
||||
with open(cv_path, 'wb') as fp:
|
||||
fp.write(cv_content)
|
||||
print(download_name_list[i] + '下载完成!')
|
451
note.txt
Normal file
451
note.txt
Normal file
@ -0,0 +1,451 @@
|
||||
爬虫在使用场景中的分类:
|
||||
- 通用爬虫
|
||||
抓取系统重要组成部分。抓取的是一整张页面的数据。
|
||||
- 聚焦爬虫
|
||||
是建立在通用爬虫的基础之上。抓取的是页面中特定的局部内容。
|
||||
- 增量式爬虫
|
||||
检测网站中数据更新的情况。只会抓取网站中最新更新出来的数据。
|
||||
|
||||
爬虫的矛与盾:
|
||||
- 反爬机制
|
||||
门户网站,可以通过制定相应的策略或者技术手段,防止爬虫程序进行网站数据的爬取。
|
||||
- 反反爬策略
|
||||
爬虫程序可以通过制定相关的策略或者技术手段,破解门户网站中具备的反爬机制,从而获取门户网站中的相关数据。
|
||||
|
||||
http协议:
|
||||
- 概念:服务器与客户端进行数据交互的一种形式。
|
||||
- 常用请求头信息:
|
||||
(1)User-Agent:请求载体的身份标识。
|
||||
(2)Connection:请求完毕后,是断开连接还是保持连接。
|
||||
- 常见响应头信息:
|
||||
(1)Content-Type:服务器响应回客户端的数据类型
|
||||
|
||||
https协议:
|
||||
- 概念:安全的超文本传输协议
|
||||
- 加密方式
|
||||
(1)对称密钥加密
|
||||
(2)非对称密钥加密
|
||||
(3)证书密钥加密
|
||||
|
||||
requests模块:python中原生的一款基于网络请求的模块,功能强大,简单便捷,效率极高。
|
||||
- 作用:模拟浏览器发请求
|
||||
- 如何使用
|
||||
(1)指定url
|
||||
(2)发起请求(Get or Post)
|
||||
(3)获取响应数据
|
||||
(4)持久化存储
|
||||
|
||||
动态加载数据:
|
||||
网页信息可能是动态加载的,ajax动态请求,可以在XHR中查看真正url
|
||||
|
||||
数据解析:
|
||||
- 正则
|
||||
- bs4
|
||||
- xpath(***)
|
||||
|
||||
数据解析原理:
|
||||
大部分文本内容都存在标签中或者标签对应的属性中
|
||||
进行指定标签定位
|
||||
标签或标签对应的属性中存储的数据值进行提取(解析)
|
||||
|
||||
聚焦爬虫编码流程:
|
||||
- 指定url
|
||||
- 发起请求
|
||||
- 获取响应数据
|
||||
- 数据解析
|
||||
- 持久化存储
|
||||
|
||||
正则表达式:(import re)
|
||||
- 单字符:
|
||||
. : 除换行以外的所有字符
|
||||
[] : [aoe] [a-w] 匹配集合任意一个字符
|
||||
\d : 数字 [0-9]
|
||||
\D : 非数字
|
||||
\w : 数字,字谜,下划线,中文
|
||||
\W : 非\w
|
||||
\s : 所有的空白字符,包括空格,制表符,换页符等等。等价于 [\f\n\r\t\v]
|
||||
\S : 非空白
|
||||
- 数量修饰:
|
||||
* : 任意多次 >=0
|
||||
+ : 至少依次 >=1
|
||||
? : 可有可无,0次或1次
|
||||
{m} : 固定m次 hello{3, }
|
||||
{m,} : 至少m次
|
||||
{m,n} : m-n次
|
||||
- 边界:
|
||||
$ : 以某某结尾
|
||||
^ : 以某某开头
|
||||
- 分组:
|
||||
(ab)
|
||||
- 贪婪模式: .*
|
||||
- 非贪婪模式: .*?
|
||||
- re.I: 忽略大小写
|
||||
- re.M: 多行匹配
|
||||
- re.S: 单行匹配
|
||||
- re.sub(正则表达式,替换内容, 字符串)
|
||||
|
||||
<div class="pic">
|
||||
<a href="https://www.douban.com/photos/album/1727324287/">
|
||||
<img src="https://img1.doubanio.com/view/photo/albumcover/public/p2578730628.webp"
|
||||
data-origin="https://img1.doubanio.com/view/photo/albumcover/public/p2578730628.webp" alt="">
|
||||
</a>
|
||||
</div>
|
||||
|
||||
ex = '<div class="pic">.*?<img src=.*? data-origin="(.*?)" alt=.*?</div>'
|
||||
|
||||
bs4进行数据解析:
|
||||
- 数据解析原理:
|
||||
标签定位
|
||||
提取标签、标签属性的数据值
|
||||
- bs4数据解析原理:
|
||||
1.实例化一个BeautifulSoup对象,并将页面源码数据加载到该对象中
|
||||
2.通过调用BeautifulSoup对象中相关属性或者方法进行标签定位和数据提取
|
||||
- 进行环境的安装:
|
||||
pip install bs4
|
||||
pip install lxml
|
||||
- 如何实例化BeautifulSoup对象:
|
||||
1.from bs4 import BeautifulSoup
|
||||
2.对象的实例化
|
||||
- 将本地的html文档中的数据加载到该对象中
|
||||
fp = open('./test.html', 'r', encoding='utf-8')
|
||||
soup = BeautifulSoup(fp, 'lxml')
|
||||
- 将互联网上获取的页面源码加载到该对象中
|
||||
page_text = response.text
|
||||
soup = BeautifulSoup(page_text, 'lxml')
|
||||
- 提供的用于数据解析的方法和属性:
|
||||
soup.tagName: 返回的是文档中出现的第一个tagName标签
|
||||
soup.find()
|
||||
- soup.find('tagName'): 相当于soup.tagName
|
||||
- soup.find('tagName', class_='song'): 属性定位
|
||||
- soup.find_all('tagName'): 返回符合要求的所有标签,是一个列表
|
||||
soup.select():
|
||||
- select('某种选择器'): 返回的是一个列表
|
||||
- 层级选择器:
|
||||
soup.select('.tang > ul > li > a') > 表示一个层级
|
||||
soup.select('.tang > ul a') ' '空格表示多个层级
|
||||
- 获取标签中的文本数据
|
||||
soup.a.text/.strong/.get_text():
|
||||
text/get_text(): 可以获取某一个标签中的所有文本内容
|
||||
string: 只可以获取该标签下面直系的文本内容
|
||||
- 获取标签中属性值
|
||||
soup.a['属性名']
|
||||
|
||||
xpath解析: 最常用且最便捷高效的一种解析方式,通用性强。
|
||||
- 原理:1.实例化一个etree对象,且需要将被解析的页面源码数据加载到该对象中。
|
||||
2.调用etree对象中的xpath方法结合着xpath表达式实现标签的定位和内容的捕获。
|
||||
- 环境的安装:
|
||||
pip install lxml
|
||||
- 如何实例化etree对象: from lxml import etree
|
||||
将本地的html文档中的数据加载到该对象中
|
||||
etree.parse(filePath)
|
||||
将互联网上获取的页面源码加载到该对象中
|
||||
etree.HTML(page_text)
|
||||
- xpath("xpath表达式")
|
||||
/: 表示从根节点开始定位,一个/表示一个层级.
|
||||
//: 表示多个层级(可以表示从任意位置定位)
|
||||
属性定位: //div[@class="song"] tag[@attrNAme="attrValue"]
|
||||
索引定位: tree.xpath('//div[@class="song"]/p[3]') 索引是从1开始的
|
||||
取文本:/text() 只能取到标签的直系文本 //text() 可以取到标签中非直系的文本内容(所有文本内容)
|
||||
取属性:/@attrName ==>img/@src
|
||||
局部解析: title = li.xpath('./a/div[2]//h3/text()')[0] 一定要加"."
|
||||
|
||||
验证码识别:
|
||||
- 识别验证码操作:
|
||||
- 人工肉眼识别。(不推荐)
|
||||
- 第三方自动识别。(推荐)
|
||||
- ddddocr库:
|
||||
import ddddocr
|
||||
ocr = ddddocr.DdddOcr()
|
||||
with open('1.png', 'rb') as f:
|
||||
img_bytes = f.read()
|
||||
res = ocr.classification(img_bytes)
|
||||
print(res)
|
||||
|
||||
模拟登录:
|
||||
- 爬取基于某些用户的用户信息。
|
||||
- 点击登录按钮之后可能会发起一个post请求
|
||||
- post请求中会携带相关的用户信息(用户名,密码,验证码....)
|
||||
- 页面会更新_VIEWSTATE 页面隐藏域和__VIEWSTATEGENERATOR 页面隐藏域时,我们需要对这个数据也进行爬取
|
||||
viewstate = tree.xpath("//input[@id='__VIEWSTATE']/@value")[0]
|
||||
viewstategenerator = tree.xpath("//input[@id='__VIEWSTATEGENERATOR']/@value")[0]
|
||||
EVENTVALIDATION = tree.xpath("//input[@id='__EVENTVALIDATION']/@value")
|
||||
- 我们一次只能用requests发一次请求,之后再需要发请求时,用Session(),将请求包装成一个对象,这样就不会导致访问失败
|
||||
session = requests.Session()
|
||||
code_data = session.get(url=code_img_src, headers=headers).content
|
||||
http/https协议特性: 无状态
|
||||
- 发起的第二次基于个人页面请求的时候,服务器端并不知道此请求是基于登录状态下的请求
|
||||
- cookie: 用来让服务器端记录客户端的相关状态
|
||||
- 自动处理: cookie值的来源: 登陆时post请求中携带有cookie值
|
||||
session会话对象:
|
||||
- 作用:
|
||||
- 可以进行请求的发送
|
||||
- 如果请求过程中产生了cookie,则该cookie会被自动存储携带在该session对象中
|
||||
|
||||
代理: 破解封IP这种反爬机制
|
||||
什么是代理:
|
||||
- 代理服务器
|
||||
代理的作用:
|
||||
- 突破自身ip访问的限制
|
||||
- 可以隐藏自身真实的ip
|
||||
代理IP的匿名度:
|
||||
- 透明:服务器知道使用了代理,知道真实ip
|
||||
- 匿名:服务器知道使用了代理,不知道真实IP
|
||||
- 高匿:服务器不知道使用了代理
|
||||
|
||||
高性能异步爬虫:
|
||||
- 异步爬虫的方式:
|
||||
- 多线程,多进程(不建议): 无法无限制的开启多线程和多进程。
|
||||
- 进程池、线程池:池的容量是有上线的。
|
||||
- 单线程 + 异步协程(推荐):
|
||||
- event_loop: 事件循环,相当于一个无限循环,我们可以把一些函数注册到这个事件循环上,当满足某些条件时,函数就会被循环执行。
|
||||
- coroutine: 协程对象,我们可以将协程对象注册到事件循环中,它会被事件循环调用。我们可以使用async关键字来定义一个方法,这个方法在调用的时候不会立即执行,而是返回一个协程对象。
|
||||
- task: 任务,它是对协程对象的进一步封装,包含了任务的各个状态。
|
||||
- future: 代表将要执行或还没有执行的任务,实际上和task没有本质区别。
|
||||
- async: 定义一个协程。
|
||||
- await: 用来挂起阻塞方法的执行。
|
||||
|
||||
|
||||
selenium模块的基本使用:
|
||||
- 下载selenium: pip install selenium
|
||||
- 下载一个浏览器驱动程序(谷歌)
|
||||
- 下载路径:http://chromedriver.storage.googleapis.com/index.html
|
||||
- 实例化一个浏览器对象
|
||||
- 编写基于浏览器自动化操作代码
|
||||
- 发起请求: get(url)
|
||||
- 标签定位: find系列方法
|
||||
- 标签交互: send_keys('xxx')
|
||||
- 执行js程序: execute_script('jsCode')
|
||||
- 前进,后退: back(),forward()
|
||||
- 关闭浏览器: quit()
|
||||
- selenium处理iframe
|
||||
- 如果定位的标签存在iframe标签之中,则必须使用switch_to.iframe(id)
|
||||
- 动作链: from selenium.webdriver import ActionChains
|
||||
- 实例化一个动作链对象: action = ActionChains(bro)
|
||||
- 执行操作: action.click_and_hold(div) action.move_by_offset(17, 0).perform()
|
||||
- 释放对象: action.release()
|
||||
|
||||
scrapy框架
|
||||
- 什么是框架?
|
||||
- 集成了很多功能并且具有很强通用性的一个项目模板。
|
||||
- 如何学习框架?
|
||||
- 专门学习框架封装的各种功能的详细用法。
|
||||
|
||||
- 什么是scrapy?
|
||||
- 爬虫中封装好的一个明星框架。
|
||||
- 功能: 高性能的持久化存储,异步的数据下载,高性能的数据解析操作,分布式
|
||||
- 基本使用
|
||||
- 环境安装: mac/linux: pip install scrapy
|
||||
: windows: pip install wheel
|
||||
下载twisted,下载地址为: http://www.lfd.uci.edu/~gohlke/pythonlibs/#twisted
|
||||
下载twisted: pip install Twisted-20.3.0-cp39-cp39-win_amd64.whl
|
||||
pip install pywin32
|
||||
pip install scrapy
|
||||
- 创建一个工程: scrapy startproject xxxPro
|
||||
- cd xxxPro
|
||||
- 在spiders子目录中创建一个爬虫文件
|
||||
- scrapy genspider spiderName www.xxx.com
|
||||
- 执行: scrapy crawl spiderName(scrapy crawl test --nolog 采用无日志信息输出,但是这样不好,我们使用接下来的方法)
|
||||
- 在配置文件中添加: LOG_LEVEL = 'ERROR' 表示只输出错误信息
|
||||
- 修改user-agent: USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
- scrapy数据解析
|
||||
- 在parse(self, response)函数中进行编写,详细操作方法可见qiubai案例
|
||||
- scrapy持久化存储
|
||||
- 基于终端命令的持久化存储:
|
||||
- 要求: 只可以将parse方法的返回值存储到本地的文本文件中
|
||||
- 注意持久化存储的文件类型只可以为 'json', 'jsonlines', 'jl', 'csv', 'xml', 'marshal', 'pickle'
|
||||
- scrapy crawl xxx -o filePath
|
||||
- 优点: 简介高效便捷
|
||||
- 缺点: 局限性比较强(数据只可以存储到指定文件后缀的文本文件中)
|
||||
- 基于管道持久化存储:
|
||||
- 数据解析
|
||||
- 在item类中定义相关的属性
|
||||
- 将解析到的数据封装到item类型的对象
|
||||
- 将item类型的对象提交给管道进行持久化存储
|
||||
- 在管道类的process_item中要将其接受到的item对象中存储的数据进行持久化存储操作
|
||||
- 在配置文件中开启管道
|
||||
- 好处:通用性强
|
||||
- 爬虫文件提交的item类型对象最终会提交给哪一个管道类?
|
||||
- 先执行的管道类
|
||||
|
||||
面试题: 将爬取到的数据一份存储到本地一份存储到数据库,如何实现?
|
||||
- 管道文件中一个管道类对应的是将数据存储到一种平台
|
||||
- 爬虫文件提交的item只会给管道文件种第一个被执行的管道类接受
|
||||
- process_item中的return item表示item将会被传递给下一个即将被执行的管道类
|
||||
|
||||
基于spider的全站数据爬取
|
||||
- 就是将网站下的某板块下的全部页码对应的页面数据进行爬取
|
||||
- 爬取笑话网的段子数据
|
||||
- 将所有页面的url添加到start_urls列表(不推荐)
|
||||
- 自行手动进行请求发送
|
||||
|
||||
scrapy五大核心部件
|
||||
- 管道
|
||||
- 与引擎交互,之后持久化存储
|
||||
- spider
|
||||
- 爬虫文件中的爬虫类
|
||||
- 调度器
|
||||
- 过滤器: 过滤重复的请求对象
|
||||
- 队列: 存放请求对象
|
||||
- 下载器
|
||||
- 与互联网通信,数据下载
|
||||
- 引擎
|
||||
- 接收队列以及发送response
|
||||
- 所有类都要经过引擎
|
||||
- 核心作用: 用作数据流处理以及触发事件
|
||||
|
||||
请求传参
|
||||
- 使用场景: 爬取解析的数据不在同一张页面中。(深度爬取)
|
||||
- 需求: 爬取boss直聘的岗位名称,岗位描述
|
||||
|
||||
图片数据爬取之ImagePipeline
|
||||
- 基于scrapy爬取字符串类型数据和图片类型数据的区域?
|
||||
- 字符串: 只需要基于xpath解析且提交管道进行持久化存储
|
||||
- 图片: 我们只能解析到图片地址,之后我们要单独对图片地址发起请求获取图片二进制类型数据
|
||||
- 基于ImagePipeline:
|
||||
- 只需要将img的src属性值提交给管道,管道就会对图片的src进行请求发送获取图片的二进制类型的数据,且还会进行持久化存储
|
||||
- 需求: 爬取站长素材中的高清图片
|
||||
|
||||
中间件
|
||||
- 下载中间件
|
||||
- 位置: 引擎和下载器之间
|
||||
- 作用: 批量拦截到整个工程中所有的请求和响应
|
||||
- 拦截请求:
|
||||
- 请求头信息(UA伪装, 代理ip)
|
||||
- 拦截响应:
|
||||
- 篡改响应数据,响应对象
|
||||
|
||||
CrawlSpider: 类,Spider的一个子类
|
||||
- 全站数据爬取的方式
|
||||
- 基于spider实现: 手动请求发送
|
||||
- 基于CrawlSpider实现
|
||||
- CrawlSpider的使用:
|
||||
- 创建一个工程
|
||||
- 创建爬虫文件: scrapy genspider -t crawl xxx www.xxx.com
|
||||
- 链接提取器
|
||||
- 作用: 根据指定规则(allow=r'正则表达式')进行指定链接提取
|
||||
- 规则解析器
|
||||
- 作用: 将链接提取器提取到的链接进行指定规则(callback)的解析操作
|
||||
|
||||
分布式爬虫
|
||||
- 概念: 我们需要搭建一个分布式机群,让其对一组资源进行分布联合爬取
|
||||
- 作用: 提示爬取数据的效率
|
||||
- 如何实现分布式?
|
||||
- 安装一个scrapy-redis的组件
|
||||
- 原生的scrapy是不可以实现分布式爬虫,必须要让scrapy结合这scrapy-redis组件一起实现分布式爬虫
|
||||
- 为什么原生的scrapy不可以实现分布式爬虫?
|
||||
- 不同电脑的scrapy调度器不能共享
|
||||
- 管道不可以被分布式机群共享
|
||||
- scrapy-redis组件的作用:
|
||||
- 可以给原生的scrapy框架提供可以被共享的管道和调度器
|
||||
- 实现流程
|
||||
- 创建一个工程
|
||||
- 创建一个基于crawlspider的爬虫文件
|
||||
- 修改当前的爬虫文件:
|
||||
- from scrapy_redis.spiders import RedisCrawlSpider
|
||||
- 将start_url allowed_domain进行注释
|
||||
- redis_key = 'name' 可以被共享的调度器队列的名称
|
||||
- 编写相关的数据解析操作
|
||||
- 将当前爬虫类的父类修改成RedisCrawlSpider
|
||||
- 修改配置文件
|
||||
- 指定可以被共享的管道:
|
||||
- ITEM_PIPELINES = {
|
||||
'scrapy_redis.pipelines.RedisPipeLine': 400,
|
||||
}
|
||||
- 指定调度器
|
||||
- DUPEFILTER_CLASS = 'scrapy_redis.dupefilter.RFPDuperFilter'
|
||||
- SCHEDULER = 'scrapy_redis.scheduler.Scheduler'
|
||||
- SCHEDULER_PERSIST = True
|
||||
- 指定redis服务器
|
||||
- REDIS_HOST = '127.0.0.1' # 服务器ip
|
||||
- REDIS_PORT = 6379
|
||||
- redis相关操作配置:
|
||||
- 配置redis的配置文件:
|
||||
- linux或者mac: redis.conf
|
||||
- windows: redis.windows.conf
|
||||
- 打开配置文件修改:
|
||||
- 注释绑定: # bind 127.0.0.1
|
||||
- 关闭保护模式: protected-mode no
|
||||
- 结合配置文件启动redis数据库
|
||||
- redis-server 配置文件
|
||||
- ./redis-cli
|
||||
- 执行工程
|
||||
- scrapy runspider xxx.py
|
||||
- 向调度器的队列中放入起始url
|
||||
- 调度器的队列在redis客户端中
|
||||
- lpush xxx www.xxx.com
|
||||
|
||||
增量式爬虫
|
||||
- 概念: 检测网站数据更新情况,只会爬取网站最新更新出来的数据
|
||||
- 分析:
|
||||
- 起始url
|
||||
- 基于CrawlSpider获取其他页面链接
|
||||
- 基于Rule将其他页码链接进行请求
|
||||
- 从每一个页面对应的页面源码中解析出每一部电影详情页的url
|
||||
|
||||
- 核心: 检测详情页的url之前有没有请求过
|
||||
- 将爬取过的电影详情页url存储
|
||||
- 存储到redis的set数据库
|
||||
conn = Redis(host='127.0.0.1', port=6379)
|
||||
|
||||
ex = self.conn.sadd('urls', detail_url)
|
||||
if ex == 1:
|
||||
print('该url没有被爬取过,可以进行数据爬取!'
|
||||
yield scrapy.Request(detail_url, callback=self.parse_detail)
|
||||
else:
|
||||
print('数据还没有更新,暂无新数据!')
|
||||
|
||||
class PipeLine(object):
|
||||
conn = None
|
||||
def open_spider(self, spider):
|
||||
self.conn = spider.conn
|
||||
def process_item(self, item, spider):
|
||||
dic = {
|
||||
'name': item['name'],
|
||||
'desc': item['desc'],
|
||||
}
|
||||
print(dic)
|
||||
self.conn.lpush('movieData', dic)
|
||||
return item
|
||||
|
||||
- 对详情页的url发起请求,然后解析出电影的名称和时间
|
||||
- 进行持久化存储
|
||||
|
||||
scrapy_splash
|
||||
- 使用scrapy_splash最终拿到的response相当于在浏览器全部渲染完成之后的网页页面
|
||||
- 作用: 模拟浏览器加载js,并返回js运行后的数据
|
||||
- 安装环境:
|
||||
- 安装docker
|
||||
- sudo docker pull scrapinghub/splash
|
||||
- 尝试运行镜像:
|
||||
- 在前台运行: sudo docker run -p 8050:8050 scrapinghub/splash
|
||||
- 在后台运行: sudo docker run -d -p 8050:8050 scrapinghub/splash
|
||||
|
||||
JS逆向
|
||||
- 数据加密
|
||||
- 看到的是一堆密文
|
||||
- 请求头加密
|
||||
- 表单加密
|
||||
- 模拟生成规则,在被加密前是什么内容
|
||||
- 参数加密
|
||||
- cookie加密
|
||||
- 通常是在浏览器有正确地响应,但是爬虫返回的是一堆js代码或者非正常的响应
|
||||
|
||||
Web逆向技巧
|
||||
- 爬虫的接口定位
|
||||
- 字体加密;Unicode编码;数据加密
|
||||
- 无混淆的js
|
||||
- 关键字搜索
|
||||
- 解密搜decrypt
|
||||
- 加密搜encrypt
|
||||
- ajax渲染搜JSON.parse (JSON.parse(函数或者方法(密文),a = 函数或者方法(密文)|JSON.parse(a)
|
||||
- 搜接口自带的关键字(特点:方法或者函数包裹密文数据)
|
||||
- xhr断点
|
||||
- 路径搜索
|
||||
- 跟栈
|
||||
- hook
|
||||
- 反debug: 内存写入变量
|
||||
- 注入: 控制台注入 本地替换
|
||||
|
||||
|
||||
1. html -- lxml -- re
|
||||
2. json -- 如何提取键值以及组装成自己想要的样子
|
16
request/01-Request.py
Normal file
16
request/01-Request.py
Normal file
@ -0,0 +1,16 @@
|
||||
# requests模块的使用
|
||||
import requests
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 指定url
|
||||
url = 'https://wz.sun0769.com/political/index/politicsNewest'
|
||||
# 发起请求
|
||||
# get方法会返回一个响应对象
|
||||
response = requests.get(url=url)
|
||||
# 获取响应数据
|
||||
page_txt = response.text
|
||||
# 持久化存储
|
||||
with open('./sogou.html', 'w', encoding='utf-8') as fp:
|
||||
fp.write(page_txt)
|
||||
print('爬取数据结束!')
|
||||
|
24
request/02-(UA)网页采集器.py
Normal file
24
request/02-(UA)网页采集器.py
Normal file
@ -0,0 +1,24 @@
|
||||
# UA检测(反爬机制):门户网站的服务器会检测对应请求的载体身份标识,如果检测到请求的载体身份为某一浏览器,说明该请求是一个正常请求。
|
||||
# 但是如果检测到不是某一浏览器,则表示该请求为非正常请求。服务器端拒绝该次请求。
|
||||
# UA:User-Agent(请求载体的身份标识)
|
||||
# UA伪装:让爬虫身份标识伪装成浏览器
|
||||
import requests
|
||||
if __name__ == '__main__':
|
||||
# UA伪装
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
}
|
||||
url = 'https://www.sogou.com/web?'
|
||||
# 处理url携带的参数:封装到字典中
|
||||
kw = input('enter a word:')
|
||||
param = {
|
||||
'query': kw,
|
||||
}
|
||||
# 对指定的url发起的请求对应的url是携带参数的,并且请求过程中处理了参数
|
||||
response = requests.get(url=url, params=param, headers=headers)
|
||||
|
||||
page_text = response.text
|
||||
fileName = kw+ '.html'
|
||||
with open(fileName, 'w', encoding='utf-8') as fp:
|
||||
fp.write(page_text)
|
||||
print(fileName, '保存成功!')
|
26
request/03-(POST)百度翻译.py
Normal file
26
request/03-(POST)百度翻译.py
Normal file
@ -0,0 +1,26 @@
|
||||
# post请求(携带了参数)
|
||||
# 响应数据是一组json数据
|
||||
import requests
|
||||
import json
|
||||
if __name__ == '__main__':
|
||||
# 指定url
|
||||
post_url = 'https://fanyi.baidu.com/sug'
|
||||
# UA伪装
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
}
|
||||
# post请求参数处理
|
||||
word = input('enter a word:')
|
||||
data = {
|
||||
'kw': word
|
||||
}
|
||||
# 请求发送
|
||||
response = requests.post(url=post_url, data=data, headers=headers)
|
||||
# 获取响应数据:json方法返回的是obj(如果确认响应数据是json类型的,才可以使用jason()
|
||||
dic_obj = response.json()
|
||||
# 持久化存储
|
||||
filename = word + '.json'
|
||||
fp = open(filename, 'w', encoding='utf-8')
|
||||
json.dump(dic_obj, fp=fp, ensure_ascii=False)
|
||||
|
||||
print('over!!')
|
23
request/04-豆瓣电影爬取.py
Normal file
23
request/04-豆瓣电影爬取.py
Normal file
@ -0,0 +1,23 @@
|
||||
import requests
|
||||
import json
|
||||
if __name__ == '__main__':
|
||||
# 指定url
|
||||
url = 'https://movie.douban.com/j/chart/top_list'
|
||||
param = {
|
||||
'type': '24',
|
||||
'interval_id': '100:90',
|
||||
'action': '',
|
||||
'start': '1',
|
||||
'limit': '20',
|
||||
}
|
||||
# UA伪装
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
}
|
||||
response = requests.get(url=url, params=param, headers=headers)
|
||||
list_data = response.json()
|
||||
print(list_data)
|
||||
fp = open('./douban.json', 'w', encoding='utf-8')
|
||||
json.dump(list_data, fp=fp, ensure_ascii=False)
|
||||
|
||||
print('Over!!')
|
27
request/05-肯德基餐厅位置查询.py
Normal file
27
request/05-肯德基餐厅位置查询.py
Normal file
@ -0,0 +1,27 @@
|
||||
import requests
|
||||
import json
|
||||
if __name__ == '__main__':
|
||||
url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
place = input('enter a place:')
|
||||
page = 1 # 从第1页开始
|
||||
fileName = place + 'KFC餐厅位置信息' + '.json'
|
||||
for i in range(0, 20): # 设置一个较大参数直到爬完所有页码
|
||||
param = {
|
||||
'cname': '',
|
||||
'pid': '',
|
||||
'keyword': place, # 查询地点
|
||||
'pageIndex': page, # 查询页码
|
||||
'pageSize': '10', # 每页最多显示10个
|
||||
}
|
||||
response = requests.post(url=url, params=param, headers=headers)
|
||||
page_text = response.text
|
||||
# print(page_text)
|
||||
with open(fileName, 'a', encoding='utf-8') as fp:
|
||||
json.dump(page_text, fp=fp, ensure_ascii=False)
|
||||
fp.write('\n') # 注意这里还是在for循环当中,每爬取完一页内容,就敲个回车
|
||||
page = page + 1 # 佛如循环的循环变量,注意前文默认为1
|
||||
print('over!!!')
|
0
scrapy/bossjob/bossjob/__init__.py
Normal file
0
scrapy/bossjob/bossjob/__init__.py
Normal file
15
scrapy/bossjob/bossjob/fakeCookie.py
Normal file
15
scrapy/bossjob/bossjob/fakeCookie.py
Normal file
@ -0,0 +1,15 @@
|
||||
import random
|
||||
|
||||
COOKIE_LIST = [
|
||||
'wd_guid=544d13f9-f072-4fdc-9989-84452f1ecd52; historyState=state; _bl_uid=XtlO5cqLjv05qpj3t0d0nna8msI4; lastCity=101020100; __g=-; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1673095377,1673165470,1673257271,1673333037; boss_login_mode=sms; __fid=c58f56b0daac21ec5273e9b4b258f537; wt2=DY4IX_Pe18l5jPqD0AYgnA-G9UnTNtDaZ_zMhCpK7UovHjn5bKxYiZ6NtwTrfsFzsgpxFtIBCopvwd7HdvXTGrg~~; wbg=0; __zp_stoken__=887aefCE3dDAxC0wecFokLmdqeARKZz80V3cWbnglEDsONSs%2FVCMzL295aWdxVWw6Ry4PehcuLyROcX4mdTpZXyFXVEtiREADYGooaVQmYhwcSUtZVAQoNVpLLXZRQkdxBRc9G0QGUFhyNA0%3D; geek_zp_token=V1RN0kEOL031ZiVtRvyB4bLCuw6zrQxCo~; __l=l=%2Fwww.zhipin.com%2Fshanghai%2F&r=&g=&s=3&friend_source=0&s=3&friend_source=0; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1673349533; __c=1673333037; __a=68265253.1672926940.1673257271.1673333037.431.9.106.431'
|
||||
]
|
||||
|
||||
|
||||
def cookie_dic():
|
||||
cookie_string = random.choice(COOKIE_LIST)
|
||||
cookie_dict = {}
|
||||
for kv in cookie_string.split(';'):
|
||||
k = kv.split('=')[0]
|
||||
v = kv.split('=')[1]
|
||||
cookie_dict[k] = v
|
||||
return cookie_dict
|
454
scrapy/bossjob/bossjob/fake_useragent.py
Normal file
454
scrapy/bossjob/bossjob/fake_useragent.py
Normal file
@ -0,0 +1,454 @@
|
||||
import random
|
||||
|
||||
import requests
|
||||
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/537.13 (KHTML, like Gecko) Chrome/24.0.1290.1 Safari/537.13",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.6 Safari/537.11",
|
||||
"Mozilla/5.0 (Windows NT 6.0) yi; AppleWebKit/345667.12221 (KHTML, like Gecko) Chrome/23.0.1271.26 Safari/453667.1221",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.45 Safari/535.19",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.66 Safari/535.11",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.04 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/10.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Ubuntu/11.10 Chromium/17.0.963.65 Chrome/17.0.963.65 Safari/535.11",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11",
|
||||
"Mozilla/5.0 (X11; FreeBSD amd64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.65 Safari/535.11",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
|
||||
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.8 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.8",
|
||||
"Mozilla/5.0 (Windows NT 5.2; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.63 Safari/535.7",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7",
|
||||
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.861.0 Safari/535.2",
|
||||
"Chrome/15.0.860.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/15.0.860.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.814.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.813.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.810.0 Safari/535.1",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/10.04 Chromium/14.0.808.0 Chrome/14.0.808.0 Safari/535.1",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/14.0.803.0 Chrome/14.0.803.0 Safari/535.1",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.803.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.801.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.794.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 5.2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.792.0 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.790.0 Safari/535.1",
|
||||
"Mozilla/5.0 ArchLinux (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Ubuntu/11.04 Chromium/13.0.782.41 Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 5.2; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_3) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.41 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.220 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.220 Safari/535.1",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.215 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.215 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.215 Safari/535.1",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.20 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.20 Safari/535.1",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.20 Safari/535.1",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_2) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.107 Safari/535.1",
|
||||
"Mozilla/5.0 (X11; Linux amd64) AppleWebKit/534.36 (KHTML, like Gecko) Chrome/13.0.766.0 Safari/534.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 12.0.742.91) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.93 Safari/534.30",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.91 Chromium/12.0.742.91 Safari/534.30",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.04 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/11.04 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.10 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Ubuntu/10.04 Chromium/12.0.742.112 Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 (Windows NT 7.1) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 (Windows NT 5.2) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 (Windows 8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.112 Safari/534.30",
|
||||
"Mozilla/5.0 ArchLinux (X11; U; Linux x86_64; en-US) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Slackware/Chrome/12.0.742.100 Safari/534.30",
|
||||
"Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30",
|
||||
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_4) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.100 Safari/534.30",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/12.0.702.0 Safari/534.24",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/12.0.702.0 Safari/534.24",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.699.0 Safari/534.24",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.68 Safari/534.24",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.34 Safari/534.24",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24",
|
||||
"Mozilla/5.0 (Windows NT 6.0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.12 Safari/534.24",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.0 Safari/534.24",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_7_0; en-US) AppleWebKit/534.21 (KHTML, like Gecko) Chrome/11.0.678.0 Safari/534.21",
|
||||
"Mozilla/5.0 (Windows NT) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.18 (KHTML, like Gecko) Chrome/11.0.661.0 Safari/534.18",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/11.0.655.0 Safari/534.17",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.17 (KHTML, like Gecko) Chrome/10.0.649.0 Safari/534.17",
|
||||
"Mozilla/5.0 (X11; U; FreeBSD x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16",
|
||||
"Mozilla/5.0 (X11; U; FreeBSD i386; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204 Safari/534.16",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.204",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.134 Safari/534.16",
|
||||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.133 Chrome/10.0.648.133 Safari/534.16",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.133 Safari/534.16",
|
||||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.127 Safari/534.16",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU; AppleWebKit/534.16; KHTML; like Gecko; Chrome/10.0.648.11;Safari/534.16)",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; ru-RU) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.11 Safari/534.16",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.648.0 Chrome/10.0.648.0 Safari/534.16",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.648.0 Safari/534.16",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.16 (KHTML, like Gecko) Chrome/10.0.634.0 Safari/534.16",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.15 (KHTML, like Gecko) Ubuntu/10.10 Chromium/10.0.613.0 Chrome/10.0.613.0 Safari/534.15",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14",
|
||||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/540.0 (KHTML, like Gecko) Ubuntu/10.10 Chrome/9.1.0.0 Safari/540.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.600.0 Safari/534.14",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.15 Safari/534.13",
|
||||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416748405.3871",
|
||||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416670950.695",
|
||||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1416664997.4379",
|
||||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.107 Safari/534.13 v1333515017.9196",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.13 (KHTML, like Gecko) Chrome/9.0.597.0 Safari/534.13",
|
||||
"Mozilla/5.0 (X11; U; CrOS i686 0.9.128; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.339",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.3 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/533.3",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.540.0 Safari/534.10",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.540.0 Safari/534.10",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/7.0.540.0 Safari/534.10",
|
||||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.514.0 Safari/534.7",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.514.0 Safari/534.7",
|
||||
"Mozilla/5.0 (ipad Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.6 (KHTML, like Gecko) Chrome/7.0.498.0 Safari/534.6",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.464.0 Safari/534.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.462.0 Safari/534.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.461.0 Safari/534.3",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.461.0 Safari/534.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.458.1 Safari/534.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.458.1 Safari/534.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.458.1 Safari/534.3",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.458.1 Safari/534.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.458.0 Safari/534.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/534.2 (KHTML, like Gecko) Chrome/6.0.454.0 Safari/534.2",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-US) AppleWebKit/534.2 (KHTML, like Gecko) Chrome/6.0.453.1 Safari/534.2",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/534.2 (KHTML, like Gecko) Chrome/6.0.453.1 Safari/534.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.1 (KHTML, like Gecko) Chrome/6.0.428.0 Safari/534.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB) AppleWebKit/534.1 (KHTML, like Gecko) Chrome/6.0.428.0 Safari/534.1",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_3; en-US) AppleWebKit/534.1 (KHTML, like Gecko) Chrome/6.0.428.0 Safari/534.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.99 Safari/533.4",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.99 Safari/533.4",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.99 Safari/533.4",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.99 Safari/533.4",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.86 Safari/533.4",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.86 Safari/533.4",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; fr-FR) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.126 Safari/533.4",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.366.0 Safari/533.4",
|
||||
"Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/533.3 (KHTML, like Gecko) Chrome/5.0.358.0 Safari/533.3",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/533.3 (KHTML, like Gecko) Chrome/5.0.358.0 Safari/533.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.3 (KHTML, like Gecko) Chrome/5.0.354.0 Safari/533.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.3 (KHTML, like Gecko) Chrome/5.0.353.0 Safari/533.3",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.343.0 Safari/533.2",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.7 Safari/533.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.3 Safari/533.2",
|
||||
"Mozilla/5.0 (X11; U; Linux i586; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.1 Safari/533.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/5.0.342.1 Safari/533.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN) AppleWebKit/533.16 (KHTML, like Gecko) Chrome/5.0.335.0 Safari/533.16",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_8; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; de-DE) Chrome/4.0.223.3 Safari/532.2",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.2 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.2 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.2 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.1 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.223.1 Safari/532.2",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.6 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.6 Safari/532.2",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.5 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.5 Safari/532.2",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.5 Safari/532.2",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.4 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.4 Safari/532.2",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.4 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.3 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.3 Safari/532.2",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.2 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.12 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.222.12 Safari/532.2",
|
||||
"Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.221.8 Safari/532.2",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.221.8 Safari/532.2",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.221.8 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.221.6 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.2 (KHTML, like Gecko) Chrome/4.0.221.6 Safari/532.2",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.5 Safari/532.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.3 Safari/532.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.219.3 Safari/532.1",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.1 Safari/532.1",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.213.0 Safari/532.1",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.212.1 Safari/532.1",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.1 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.1",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.212.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.7 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.4 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.4 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.2 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.211.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.210.0 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.209.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.209.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.209.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.208.0 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.207.0 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; FreeBSD i386; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.207.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.207.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.207.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.207.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.207.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.206.1 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.206.1 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.206.1 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.206.1 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.206.1 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.206.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.206.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.206.0 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.204.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.204.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.204.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.204.0 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.2 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.203.0 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0 (x86_64); de-DE) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; de-DE) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/525.13.",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.202.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/4.0.201.1 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.198.1 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.198.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.198.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.198.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.198 Safari/532.0",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_7; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.198 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.197.11 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.197.11 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.197.11 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.197.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.197.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.196.2 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.196.2 Safari/532.0",
|
||||
"Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.196.0 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0",
|
||||
"Mozilla/4.0 (Windows; U; Windows NT 5.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.33 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.3 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML,like Gecko) Chrome/3.0.195.27",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.24 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.21 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.20 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.17 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.10 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.10 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.1 Safari/532.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/531.4 (KHTML, like Gecko) Chrome/3.0.194.0 Safari/531.4",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/531.3 (KHTML, like Gecko) Chrome/3.0.193.2 Safari/531.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/531.3 (KHTML, like Gecko) Chrome/3.0.193.2 Safari/531.3",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.0 (KHTML, like Gecko) Chrome/3.0.191.0 Safari/531.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/531.0 (KHTML, like Gecko) Chrome/2.0.182.0 Safari/531.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/530.0 (KHTML, like Gecko) Chrome/2.0.182.0 Safari/531.0",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.7 (KHTML, like Gecko) Chrome/2.0.177.0 Safari/530.7",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.7 (KHTML, like Gecko) Chrome/2.0.176.0 Safari/530.7",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.7 (KHTML, like Gecko) Chrome/2.0.175.0 Safari/530.7",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/2.0.175.0 Safari/530.6",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/2.0.174.0 Safari/530.6",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/2.0.174.0 Safari/530.6",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.174.0 Safari/530.5",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/2.0.174.0 Safari/530.6",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.173.1 Safari/530.5",
|
||||
"Mozilla/6.0 (Windows; U; Windows NT 6.0; en-US) Gecko/2009032609 (KHTML, like Gecko) Chrome/2.0.172.6 Safari/530.7",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.6 Safari/530.5",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.43 Safari/530.5",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.43 Safari/530.5",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.43 Safari/530.5",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.40 Safari/530.5",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.39 Safari/530.5",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.2 Safari/530.5",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; eu) AppleWebKit/530.4 (KHTML, like Gecko) Chrome/2.0.172.0 Safari/530.4",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/530.4 (KHTML, like Gecko) Chrome/2.0.172.0 Safari/530.4",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/2.0.172.0 Safari/530.5",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.10 (KHTML, like Gecko) Chrome/2.0.157.2 Safari/528.10",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_0; en-US) AppleWebKit/528.10 (KHTML, like Gecko) Chrome/2.0.157.2 Safari/528.10",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.9 (KHTML, like Gecko) Chrome/2.0.157.0 Safari/528.9",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.11 (KHTML, like Gecko) Chrome/2.0.157.0 Safari/528.11",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.10 (KHTML, like Gecko) Chrome/2.0.157.0 Safari/528.10",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/528.8 (KHTML, like Gecko) Chrome/2.0.156.1 Safari/528.8",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.8 (KHTML, like Gecko) Chrome/2.0.156.1 Safari/528.8",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/528.8 (KHTML, like Gecko) Chrome/2.0.156.0 Safari/528.8",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.59 Safari/525.19",
|
||||
"Mozilla/4.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.59 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.55 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.53 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.50 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.43 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.43 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/1.0.154.43 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.3.155.0 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.0 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.152.0 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.151.0 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.151.0 Safari/525.19",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.6 Safari/525.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.30 Safari/525.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.29 Safari/525.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.0; de) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13(KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13",
|
||||
"Mozilla/5.0 (Linux; U; en-US) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13",
|
||||
"Mozilla/5.0 (Macintosh; U; Mac OS X 10_5_7; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.9 (KHTML, like Gecko) Chrome/ Safari/530.9",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.6 (KHTML, like Gecko) Chrome/ Safari/530.6",
|
||||
"Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5",
|
||||
]
|
||||
|
||||
|
||||
def get_ua():
|
||||
return random.choice(USER_AGENTS)
|
||||
|
||||
|
||||
def get_requests_headers():
|
||||
headers = {
|
||||
'User-Agent': random.choice(USER_AGENTS),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7',
|
||||
'Connection': 'close',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'Referer': 'https://www.zhipin.com/job_detail/?city=101020100&source=10&query=python',
|
||||
'sec-ch-ua-platform': '"Android"',
|
||||
'sec-ch-ua-mobile': '?1',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'upgrade-insecure-requests': '1'
|
||||
}
|
||||
return headers
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 模块检查
|
||||
print(get_requests_headers())
|
||||
response = requests.get('http://www.ip3366.net/?stype=1&page=1', headers=get_requests_headers())
|
||||
print(response.content.decode("gb2312", "ignore"))
|
18
scrapy/bossjob/bossjob/items.py
Normal file
18
scrapy/bossjob/bossjob/items.py
Normal file
@ -0,0 +1,18 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class BossjobItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
pay = scrapy.Field() # 薪资
|
||||
job_name = scrapy.Field() # 岗位
|
||||
detail_url = scrapy.Field() # 职位详情链接
|
||||
company_name = scrapy.Field() # 公司名称
|
||||
requirement = scrapy.Field() # 要求
|
||||
detail = scrapy.Field()
|
||||
|
184
scrapy/bossjob/bossjob/middlewares.py
Normal file
184
scrapy/bossjob/bossjob/middlewares.py
Normal file
@ -0,0 +1,184 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
import random
|
||||
import time
|
||||
from scrapy import signals
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
# 中间件1 -随机UA
|
||||
from scrapy.http import HtmlResponse
|
||||
from .requset import SeleniumRequest
|
||||
|
||||
from .fake_useragent import get_ua
|
||||
|
||||
|
||||
class BossjobRandomuaDownloaderMiddleware(object):
|
||||
def process_request(self, request, spider):
|
||||
headers = get_ua()
|
||||
request.headers['User-Agent'] = headers
|
||||
return None
|
||||
|
||||
|
||||
# 中间件2 -随机代理
|
||||
import random
|
||||
from .settings import proxy_list
|
||||
|
||||
|
||||
class BossjobRandomProxyDownloadMiddleware(object):
|
||||
def process_request(self, request, spider):
|
||||
proxy = "u286.kdltps.com:15818"
|
||||
|
||||
# 用户名密码认证
|
||||
username = "t17335887797243"
|
||||
password = "n62s2uvp"
|
||||
request.meta['proxy'] = "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password,
|
||||
"proxy": proxy}
|
||||
|
||||
# 白名单认证
|
||||
# request.meta['proxy'] = "http://%(proxy)s/" % {"proxy": proxy}
|
||||
|
||||
request.headers["Connection"] = "close"
|
||||
return None
|
||||
|
||||
|
||||
# 中间件3 -Cookie
|
||||
from .fakeCookie import COOKIE_LIST
|
||||
|
||||
|
||||
class BossjobCookieDownloaderMiddleware(object):
|
||||
|
||||
def process_request(self, request, spider):
|
||||
cookie_dict = self.get_cookies()
|
||||
request.cookies = cookie_dict
|
||||
return None
|
||||
|
||||
def get_cookies(self):
|
||||
cookie_string = 'wd_guid=544d13f9-f072-4fdc-9989-84452f1ecd52; historyState=state; _bl_uid=XtlO5cqLjv05qpj3t0d0nna8msI4; lastCity=101020100; wt2=DY4IX_Pe18l5jPqD0AYgnA-G9UnTNtDaZ_zMhCpK7UovHjn5bKxYiZ6NtwTrfsFzsgpxFtIBCopvwd7HdvXTGrg~~; wbg=0; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1673257271,1673333037,1673421249,1673621120; __g=-; __l=l=%2Fwww.zhipin.com%2Fjob_detail%2F01fd3a4e0ace71af1nx_0t-1F1pZ.html&s=3&friend_source=0&s=3&friend_source=0; geek_zp_token=V1RN0kEOL031ZiVtRvyB4eKymy7j3Vwi4~; __c=1673621123; __a=68265253.1672926940.1673421249.1673621123.475.11.15.475; __zp_stoken__=357feaV5aXwJLbUlmOy4uTW43dBlpeEsAbV5LT1RBZ10vQAMUSG4OBXFMIDkiIkJ0D3Z%2Bb35WOlduHEoVLlt3bnRiWQNiGnw7AgQdWhkjdlJNETohVUMiZCUfHx8IKAQ%2FTU9MDi1fN3RRXTk%3D; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1673621689'
|
||||
cookie_dict = {}
|
||||
for kv in cookie_string.split(';'):
|
||||
k = kv.split('=')[0]
|
||||
v = kv.split('=')[1]
|
||||
cookie_dict[k] = v
|
||||
return cookie_dict
|
||||
|
||||
|
||||
import zipfile
|
||||
import string
|
||||
from selenium import webdriver
|
||||
|
||||
|
||||
class seleniumDownloaderMiddleware(object):
|
||||
|
||||
def __init__(self):
|
||||
self.option = webdriver.ChromeOptions()
|
||||
def create_proxyauth_extension(tunnelhost, tunnelport, proxy_username, proxy_password, scheme='http',
|
||||
plugin_path=None):
|
||||
if plugin_path is None:
|
||||
plugin_path = 'vimm_chrome_proxyauth_plugin.zip'
|
||||
|
||||
manifest_json = """
|
||||
{
|
||||
"version": "1.0.0",
|
||||
"manifest_version": 2,
|
||||
"name": "Chrome Proxy",
|
||||
"permissions": [
|
||||
"proxy",
|
||||
"tabs",
|
||||
"unlimitedStorage",
|
||||
"storage",
|
||||
"<all_urls>",
|
||||
"webRequest",
|
||||
"webRequestBlocking"
|
||||
],
|
||||
"background": {
|
||||
"scripts": ["background.js"]
|
||||
},
|
||||
"minimum_chrome_version":"22.0.0"
|
||||
}
|
||||
"""
|
||||
|
||||
background_js = string.Template(
|
||||
"""
|
||||
var config = {
|
||||
mode: "fixed_servers",
|
||||
rules: {
|
||||
singleProxy: {
|
||||
scheme: "${scheme}",
|
||||
host: "${host}",
|
||||
port: parseInt(${port})
|
||||
},
|
||||
bypassList: ["foobar.com"]
|
||||
}
|
||||
};
|
||||
|
||||
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
|
||||
|
||||
function callbackFn(details) {
|
||||
return {
|
||||
authCredentials: {
|
||||
username: "${username}",
|
||||
password: "${password}"
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
chrome.webRequest.onAuthRequired.addListener(
|
||||
callbackFn,
|
||||
{urls: ["<all_urls>"]},
|
||||
['blocking']
|
||||
);
|
||||
"""
|
||||
).substitute(
|
||||
host=tunnelhost,
|
||||
port=tunnelport,
|
||||
username=proxy_username,
|
||||
password=proxy_password,
|
||||
scheme=scheme,
|
||||
)
|
||||
with zipfile.ZipFile(plugin_path, 'w') as zp:
|
||||
zp.writestr("manifest.json", manifest_json)
|
||||
zp.writestr("background.js", background_js)
|
||||
return plugin_path
|
||||
|
||||
proxyauth_plugin_path = create_proxyauth_extension(
|
||||
tunnelhost="u286.kdltps.com", # 隧道域名
|
||||
tunnelport="15818", # 端口号
|
||||
proxy_username="t17335887797243", # 用户名
|
||||
proxy_password="n62s2uvp" # 密码
|
||||
)
|
||||
self.option.add_extension(proxyauth_plugin_path)
|
||||
# elf.option.add_argument('--headless')
|
||||
self.option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||
self.option.add_experimental_option('excludeSwitches', ['enable-logging'])
|
||||
self.option.add_experimental_option('useAutomationExtension', False)
|
||||
self.option.add_argument('blink-settings=imagesEnabled=false')
|
||||
self.option.add_argument("--no-sandbox")
|
||||
self.option.add_argument("--disable-dev-shm-usage")
|
||||
self.option.add_argument('--disable-gpu')
|
||||
self.bro = webdriver.Chrome(executable_path='D:\爬虫\selenium\chromedriver.exe', options=self.option)
|
||||
self.bro.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
|
||||
'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
|
||||
})
|
||||
|
||||
def __del__(self):
|
||||
self.bro.close()
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.__del__, signal=signals.spider_closed)
|
||||
return s
|
||||
|
||||
def process_request(self, spider, request):
|
||||
# 所有的请求都会到这里,判断是否需要selenium来处理请求
|
||||
if isinstance(request, SeleniumRequest):
|
||||
# selenium操作
|
||||
self.bro.get(request.url)
|
||||
time.sleep(2)
|
||||
page_text = self.bro.page_source
|
||||
return HtmlResponse(url=request.url, status=200, body=page_text, request=request, encoding='utf-8')
|
||||
else:
|
||||
return None
|
45
scrapy/bossjob/bossjob/pipelines.py
Normal file
45
scrapy/bossjob/bossjob/pipelines.py
Normal file
@ -0,0 +1,45 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
import pymysql
|
||||
|
||||
|
||||
class BossjobPipeline:
|
||||
def process_item(self, item, spider):
|
||||
print(item['detail'])
|
||||
return item
|
||||
|
||||
|
||||
class mysqlPipeLine(object):
|
||||
# 数据库连接
|
||||
conn = None
|
||||
cursor = None
|
||||
|
||||
def open_spider(self, spider):
|
||||
self.conn = pymysql.Connect(host='127.0.0.1', port=3306, user='root', password='dxs666dxs', db='Spider',
|
||||
charset='utf8')
|
||||
|
||||
def process_item(self, item, spider):
|
||||
self.cursor = self.conn.cursor()
|
||||
|
||||
try:
|
||||
self.cursor.execute('insert into bossjob values("%s", "%s", "%s", "%s", "%s")' % (
|
||||
item["company_name"], item["detail_url"], item["job_name"], item["pay"], item["requirement"]))
|
||||
self.conn.commit()
|
||||
print('成功插入', item['job_name'], '的工作信息到数据库中!')
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.conn.rollback()
|
||||
|
||||
return item
|
||||
|
||||
def close_spider(self, spider):
|
||||
if self.cursor:
|
||||
self.cursor.close()
|
||||
if self.conn:
|
||||
self.conn.close()
|
5
scrapy/bossjob/bossjob/requset.py
Normal file
5
scrapy/bossjob/bossjob/requset.py
Normal file
@ -0,0 +1,5 @@
|
||||
from scrapy import Request
|
||||
|
||||
|
||||
class SeleniumRequest(Request):
|
||||
pass
|
108
scrapy/bossjob/bossjob/settings.py
Normal file
108
scrapy/bossjob/bossjob/settings.py
Normal file
@ -0,0 +1,108 @@
|
||||
# Scrapy settings for bossjob project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = 'bossjob'
|
||||
|
||||
SPIDER_MODULES = ['bossjob.spiders']
|
||||
NEWSPIDER_MODULE = 'bossjob.spiders'
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
# USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = False
|
||||
LOG_LEVEL = 'ERROR'
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
# clear
|
||||
# The download delay setting will honor only one of:
|
||||
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
# CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
COOKIES_ENABLED = True
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
# TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
# DEFAULT_REQUEST_HEADERS = {
|
||||
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
# 'Accept-Language': 'zh-CN,zh;q=0.9'
|
||||
# }
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
# SPIDER_MIDDLEWARES = {
|
||||
# 'bossjob.middlewares.BossjobSpiderMiddleware': 543,
|
||||
# }
|
||||
proxy_list = [
|
||||
"61.216.185.88:60808",
|
||||
"121.13.252.60:41564",
|
||||
"202.109.157.64:9000",
|
||||
"120.24.76.81:8123",
|
||||
"210.5.10.87:53281",
|
||||
"117.41.38.16:9000",
|
||||
"117.41.38.18:9000",
|
||||
"121.13.252.62:41564",
|
||||
"112.14.47.6:52024",
|
||||
"222.74.73.202:42055",
|
||||
"121.13.252.58:41564",
|
||||
"117.114.149.66:55443",
|
||||
"27.42.168.46:55481",
|
||||
"121.13.252.61:41564",
|
||||
"183.236.232.160:8080",
|
||||
]
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
DOWNLOADER_MIDDLEWARES = {
|
||||
'bossjob.middlewares.BossjobRandomuaDownloaderMiddleware': 500,
|
||||
'bossjob.middlewares.BossjobCookieDownloaderMiddleware': 400,
|
||||
#'bossjob.middlewares.BossjobRandomProxyDownloadMiddleware': 98,
|
||||
#'bossjob.middlewares.seleniumDownloaderMiddleware': 99,
|
||||
}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
# EXTENSIONS = {
|
||||
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
||||
# }
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
# 'bossjob.pipelines.BossjobPipeline': 300,
|
||||
'bossjob.pipelines.mysqlPipeLine': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
# AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
# AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
# AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
# AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
# HTTPCACHE_ENABLED = True
|
||||
# HTTPCACHE_EXPIRATION_SECS = 0
|
||||
# HTTPCACHE_DIR = 'httpcache'
|
||||
# HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
4
scrapy/bossjob/bossjob/spiders/__init__.py
Normal file
4
scrapy/bossjob/bossjob/spiders/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
43
scrapy/bossjob/bossjob/spiders/boss.py
Normal file
43
scrapy/bossjob/bossjob/spiders/boss.py
Normal file
@ -0,0 +1,43 @@
|
||||
import json
|
||||
import scrapy
|
||||
from ..items import BossjobItem
|
||||
from lxml import etree
|
||||
from ..requset import SeleniumRequest
|
||||
|
||||
|
||||
|
||||
class BossSpider(scrapy.Spider):
|
||||
name = 'boss'
|
||||
|
||||
|
||||
|
||||
def start_requests(self):
|
||||
for pageNum in range(51, 90):
|
||||
url = f'https://www.zhipin.com/wapi/zpgeek/mobile/search/joblist.json?page={pageNum}&city=101020100&query='
|
||||
yield scrapy.Request(url=url, callback=self.parse)
|
||||
|
||||
|
||||
def parse(self, response, **kwargs):
|
||||
res = json.loads(response.text)
|
||||
it = {'html': res['zpData']['html']}
|
||||
tree = etree.HTML(it['html'])
|
||||
li_list = tree.xpath('//li')
|
||||
|
||||
for li in li_list:
|
||||
item = BossjobItem()
|
||||
job_name = li.xpath('./a/div[1]/span[1]/text()')[0]
|
||||
item['job_name'] = job_name
|
||||
detail_url = 'https://www.zhipin.com' + li.xpath('./a/@href')[0]
|
||||
item['detail_url'] = detail_url
|
||||
pay = li.xpath('a/div[1]/span[2]/text()')[0]
|
||||
item['pay'] = pay
|
||||
company_name = li.xpath('./a/div[2]/span[1]/text()')[0]
|
||||
item['company_name'] = company_name
|
||||
requirement = li.xpath('./a/div[3]//text()')
|
||||
re = ''
|
||||
for i in range(1, len(requirement)):
|
||||
re = re + requirement[i].strip() + ' '
|
||||
item['requirement'] = re
|
||||
|
||||
yield item
|
||||
|
11
scrapy/bossjob/scrapy.cfg
Normal file
11
scrapy/bossjob/scrapy.cfg
Normal file
@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = bossjob.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = bossjob
|
BIN
scrapy/bossjob/vimm_chrome_proxyauth_plugin.zip
Normal file
BIN
scrapy/bossjob/vimm_chrome_proxyauth_plugin.zip
Normal file
Binary file not shown.
0
scrapy/caipiao/caipiao/__init__.py
Normal file
0
scrapy/caipiao/caipiao/__init__.py
Normal file
196
scrapy/caipiao/caipiao/fake_useragent.py
Normal file
196
scrapy/caipiao/caipiao/fake_useragent.py
Normal file
@ -0,0 +1,196 @@
|
||||
import random
|
||||
|
||||
import requests
|
||||
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.90 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; NetBSD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.14 (KHTML, like Gecko) Chrome/24.0.1292.0 Safari/537.14"
|
||||
"Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16",
|
||||
"Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14",
|
||||
"Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14",
|
||||
"Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02",
|
||||
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0",
|
||||
"Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11",
|
||||
"Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11",
|
||||
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101213 Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
|
||||
"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
|
||||
"Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/29.0",
|
||||
"Mozilla/5.0 (X11; OpenBSD amd64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:27.0) Gecko/20121011 Firefox/27.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Microsoft Windows NT 6.2.9200.0); rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) Gecko/20121011 Firefox/21.0.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20130514 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:21.0) Gecko/20130326 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130330 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130328 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0",
|
||||
"Mozilla/5.0 (Windows x86; rv:19.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)",
|
||||
"Mozilla/4.0 (Compatible; MSIE 8.0; Windows NT 5.2; Trident/6.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/1.22 (compatible; MSIE 10.0; Windows 3.1)",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 7.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7",
|
||||
]
|
||||
|
||||
|
||||
def get_ua():
|
||||
return random.choice(USER_AGENTS)
|
||||
|
||||
|
||||
def get_requests_headers():
|
||||
headers = {
|
||||
'User-Agent': random.choice(USER_AGENTS),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8',
|
||||
'Connection': 'close',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
# 'Host': 'www.zhipin.com',
|
||||
# 'Origin': 'https://www.zhipin.com',
|
||||
# 'Referer': 'https://www.zhipin.com/',
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 模块检查
|
||||
print(get_requests_headers())
|
||||
response = requests.get('http://www.ip3366.net/?stype=1&page=1', headers=get_requests_headers())
|
||||
print(response.content.decode("gb2312", "ignore"))
|
12
scrapy/caipiao/caipiao/items.py
Normal file
12
scrapy/caipiao/caipiao/items.py
Normal file
@ -0,0 +1,12 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class CaipiaoItem(scrapy.Item):
|
||||
qihao = scrapy.Field()
|
||||
red_ball = scrapy.Field()
|
||||
blue_ball = scrapy.Field()
|
46
scrapy/caipiao/caipiao/middlewares.py
Normal file
46
scrapy/caipiao/caipiao/middlewares.py
Normal file
@ -0,0 +1,46 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
import random
|
||||
from time import sleep
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
from scrapy.http import HtmlResponse
|
||||
|
||||
from .fake_useragent import USER_AGENTS
|
||||
|
||||
|
||||
class CaipiaoDownloaderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the downloader middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# UA伪装
|
||||
request.headers['User-Agent'] = random.choice(USER_AGENTS)
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
bro = spider.bro
|
||||
bro.get(request.url)
|
||||
sleep(0.5)
|
||||
click = bro.find_element_by_xpath('//*[@id="link248"]/img').click()
|
||||
start = bro.find_element_by_id('from')
|
||||
start.clear()
|
||||
start.send_keys('16001')
|
||||
end = bro.find_element_by_id('to')
|
||||
end.clear()
|
||||
end.send_keys('23004')
|
||||
find = bro.find_element_by_id('link176').click()
|
||||
page_text = bro.page_source
|
||||
new_response = HtmlResponse(url=request.url, body=page_text, encoding='utf-8', request=request)
|
||||
|
||||
return new_response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
|
||||
pass
|
63
scrapy/caipiao/caipiao/pipelines.py
Normal file
63
scrapy/caipiao/caipiao/pipelines.py
Normal file
@ -0,0 +1,63 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
import pymysql
|
||||
|
||||
'''
|
||||
存储数据的方案:
|
||||
1、数据要存在csv文件中
|
||||
2、数据要存在mysql数据库中
|
||||
3、数据要存在mongodb数据库中
|
||||
4.文件的存储
|
||||
'''
|
||||
|
||||
|
||||
class CaipiaoPipeline:
|
||||
|
||||
def open_spider(self, spider):
|
||||
print('开始存储!')
|
||||
self.f = open('./双色球.csv', mode='w', encoding='utf-8')
|
||||
self.f.write("期数,红球号码,蓝球号码\n")
|
||||
|
||||
def close_spider(self, spider):
|
||||
print('存储完毕!')
|
||||
if self.f:
|
||||
self.f.close()
|
||||
|
||||
def process_item(self, item, spider):
|
||||
# print(item)
|
||||
self.f.write(f"{item['qihao']},{' '.join(item['red_ball'])},{item['blue_ball']}\n")
|
||||
return item
|
||||
|
||||
|
||||
class mySQLPipeline:
|
||||
|
||||
def open_spider(self, spider):
|
||||
print('开始存储!')
|
||||
self.conn = pymysql.Connect(
|
||||
host="localhost",
|
||||
port=3306,
|
||||
user="root",
|
||||
password="dxs666dxs",
|
||||
database="spider"
|
||||
)
|
||||
|
||||
def close_spider(self, spider):
|
||||
print('存储完毕!')
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
|
||||
def process_item(self, item, spider):
|
||||
cur = self.conn.cursor()
|
||||
sql = "insert into caipiao values(%s, %s, %s)"
|
||||
try:
|
||||
cur.execute(sql, (item['qihao'], item['red_ball'], item['blue_ball']))
|
||||
self.conn.commit()
|
||||
except Exception as e:
|
||||
print(e)
|
||||
self.conn.rollback()
|
89
scrapy/caipiao/caipiao/settings.py
Normal file
89
scrapy/caipiao/caipiao/settings.py
Normal file
@ -0,0 +1,89 @@
|
||||
# Scrapy settings for caipiao project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = 'caipiao'
|
||||
|
||||
SPIDER_MODULES = ['caipiao.spiders']
|
||||
NEWSPIDER_MODULE = 'caipiao.spiders'
|
||||
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = 'caipiao (+http://www.yourdomain.com)'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = False
|
||||
LOG_LEVEL = 'WARNING'
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
# 'Accept-Language': 'en',
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
# SPIDER_MIDDLEWARES = {
|
||||
# 'caipiao.middlewares.CaipiaoSpiderMiddleware': 543,
|
||||
# }
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
DOWNLOADER_MIDDLEWARES = {
|
||||
'caipiao.middlewares.CaipiaoDownloaderMiddleware': 543,
|
||||
}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'caipiao.pipelines.CaipiaoPipeline': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = 'httpcache'
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
4
scrapy/caipiao/caipiao/spiders/__init__.py
Normal file
4
scrapy/caipiao/caipiao/spiders/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
43
scrapy/caipiao/caipiao/spiders/seq.py
Normal file
43
scrapy/caipiao/caipiao/spiders/seq.py
Normal file
@ -0,0 +1,43 @@
|
||||
import scrapy
|
||||
from ..items import CaipiaoItem
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver import ChromeOptions
|
||||
|
||||
class SeqSpider(scrapy.Spider):
|
||||
name = 'seq'
|
||||
# allowed_domains = ['www.xxx.com']
|
||||
start_urls = ['https://datachart.500.com/ssq/']
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
# 实现让selenium规避被检测到的风险
|
||||
super().__init__(**kwargs)
|
||||
option = ChromeOptions()
|
||||
option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||
option.add_experimental_option('excludeSwitches', ['enable-logging'])
|
||||
option.add_argument("--no-sandbox")
|
||||
option.add_argument("--disable-dev-shm-usage")
|
||||
option.add_argument("--window-size=1920,1080") # 建议设置窗口大小
|
||||
option.add_argument('--headless')
|
||||
option.add_argument('--disable-gpu')
|
||||
# option.add_argument('blink-settings=imagesEnabled=false')
|
||||
self.bro = webdriver.Chrome(executable_path='D:\爬虫\selenium\chromedriver.exe', options=option)
|
||||
|
||||
def closed(self, spider):
|
||||
self.bro.quit()
|
||||
|
||||
def parse(self, response):
|
||||
tr_list = response.xpath('//*[@id="tdata"]/tr')
|
||||
for tr in tr_list:
|
||||
item = CaipiaoItem()
|
||||
# 过滤掉没用的标签
|
||||
if tr.xpath('./@class').extract_first() == 'tdbck':
|
||||
continue
|
||||
qishu = tr.xpath('./td[1]/text()').extract_first().strip()
|
||||
# 也可以用xpath: red_ball = tr.xpath("./td[@class="chartBall01"]/text()").extract()
|
||||
red_ball = tr.css(".chartBall01::text").extract()
|
||||
blue_ball = tr.css(".chartBall02::text").extract_first()
|
||||
item['qihao'] = qishu
|
||||
item['red_ball'] = red_ball
|
||||
item['blue_ball'] = blue_ball
|
||||
|
||||
yield item
|
11
scrapy/caipiao/scrapy.cfg
Normal file
11
scrapy/caipiao/scrapy.cfg
Normal file
@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = caipiao.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = caipiao
|
0
scrapy/imgsPro/imgsPro/__init__.py
Normal file
0
scrapy/imgsPro/imgsPro/__init__.py
Normal file
197
scrapy/imgsPro/imgsPro/fake_useragent.py
Normal file
197
scrapy/imgsPro/imgsPro/fake_useragent.py
Normal file
@ -0,0 +1,197 @@
|
||||
import random
|
||||
|
||||
import requests
|
||||
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.90 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; NetBSD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.14 (KHTML, like Gecko) Chrome/24.0.1292.0 Safari/537.14"
|
||||
"Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16",
|
||||
"Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14",
|
||||
"Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14",
|
||||
"Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02",
|
||||
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0",
|
||||
"Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11",
|
||||
"Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11",
|
||||
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101213 Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
|
||||
"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
|
||||
"Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/29.0",
|
||||
"Mozilla/5.0 (X11; OpenBSD amd64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:27.0) Gecko/20121011 Firefox/27.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Microsoft Windows NT 6.2.9200.0); rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) Gecko/20121011 Firefox/21.0.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20130514 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:21.0) Gecko/20130326 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130330 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130328 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0",
|
||||
"Mozilla/5.0 (Windows x86; rv:19.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)",
|
||||
"Mozilla/4.0 (Compatible; MSIE 8.0; Windows NT 5.2; Trident/6.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/1.22 (compatible; MSIE 10.0; Windows 3.1)",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 7.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7",
|
||||
]
|
||||
|
||||
|
||||
def get_ua():
|
||||
return random.choice(USER_AGENTS)
|
||||
|
||||
|
||||
def get_requests_headers():
|
||||
headers = {
|
||||
'User-Agent': random.choice(USER_AGENTS),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8',
|
||||
'Connection': 'keep-alive',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
# 'Host': 'www.zhipin.com',
|
||||
# 'Origin': 'https://www.zhipin.com',
|
||||
'Referer': 'https://wz.sun0769.com/political/index/politicsNewest?id=1&page=1',
|
||||
'upgrade-insecure-requests': '1',
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 模块检查
|
||||
print(get_requests_headers())
|
||||
response = requests.get('http://www.ip3366.net/?stype=1&page=1', headers=get_requests_headers())
|
||||
print(response.content.decode("gb2312", "ignore"))
|
13
scrapy/imgsPro/imgsPro/items.py
Normal file
13
scrapy/imgsPro/imgsPro/items.py
Normal file
@ -0,0 +1,13 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class ImgsproItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
img_name = scrapy.Field()
|
||||
img_src = scrapy.Field()
|
145
scrapy/imgsPro/imgsPro/middlewares.py
Normal file
145
scrapy/imgsPro/imgsPro/middlewares.py
Normal file
@ -0,0 +1,145 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
|
||||
|
||||
class ImgsproSpiderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the spider middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_spider_input(self, response, spider):
|
||||
# Called for each response that goes through the spider
|
||||
# middleware and into the spider.
|
||||
|
||||
# Should return None or raise an exception.
|
||||
return None
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
# Called with the results returned from the Spider, after
|
||||
# it has processed the response.
|
||||
|
||||
# Must return an iterable of Request, or item objects.
|
||||
for i in result:
|
||||
yield i
|
||||
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
# Called when a spider or process_spider_input() method
|
||||
# (from other spider middleware) raises an exception.
|
||||
|
||||
# Should return either None or an iterable of Request or item objects.
|
||||
pass
|
||||
|
||||
def process_start_requests(self, start_requests, spider):
|
||||
# Called with the start requests of the spider, and works
|
||||
# similarly to the process_spider_output() method, except
|
||||
# that it doesn’t have a response associated.
|
||||
|
||||
# Must return only requests (not items).
|
||||
for r in start_requests:
|
||||
yield r
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info('Spider opened: %s' % spider.name)
|
||||
|
||||
|
||||
class ImgsproDownloaderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the downloader middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# Called for each request that goes through the downloader
|
||||
# middleware.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this request
|
||||
# - or return a Response object
|
||||
# - or return a Request object
|
||||
# - or raise IgnoreRequest: process_exception() methods of
|
||||
# installed downloader middleware will be called
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
# Called with the response returned from the downloader.
|
||||
|
||||
# Must either;
|
||||
# - return a Response object
|
||||
# - return a Request object
|
||||
# - or raise IgnoreRequest
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
# Called when a download handler or a process_request()
|
||||
# (from other downloader middleware) raises an exception.
|
||||
|
||||
# Must either:
|
||||
# - return None: continue processing this exception
|
||||
# - return a Response object: stops process_exception() chain
|
||||
# - return a Request object: stops process_exception() chain
|
||||
pass
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info('Spider opened: %s' % spider.name)
|
||||
|
||||
# 中间件1 -随机UA
|
||||
from .fake_useragent import get_requests_headers
|
||||
|
||||
|
||||
class imgsProRandomuaDownloaderMiddleware(object):
|
||||
def process_request(self, request, spider):
|
||||
headers = get_requests_headers()
|
||||
request.headers['User-Agent'] = headers
|
||||
# print(agent)
|
||||
|
||||
|
||||
# 中间件2 -随机代理
|
||||
# import random
|
||||
# from .proxies import proxy_list
|
||||
#
|
||||
# class BossjobRandomProxyDownloadMiddleware(object):
|
||||
# def process_requset(self, request, spider):
|
||||
# proxy = random.choice(proxy_list)
|
||||
# request.meta['proxy'] = proxy
|
||||
# print(proxy)
|
||||
#
|
||||
# def process_exception(self, request, exception, spider):
|
||||
# # 处理代理ip无法使用情况
|
||||
# return request
|
||||
|
||||
# 中间件3 -Cookie
|
||||
class imgsProCookieDownloaderMiddleware(object):
|
||||
def process_request(self, request, spider):
|
||||
cookie_dict = self.get_cookies()
|
||||
request.cookies = cookie_dict
|
||||
# print(cookie_dict)
|
||||
|
||||
def get_cookies(self):
|
||||
cookie_string = 'cz_statistics_visitor=6a89d058-1928-b3b0-23ec-dd69be6c601a; __bid_n=184bced47869fe68784207; FPTOKEN=aJKftmn/cRusAPgCcLDE2nPw1f6AOJ8O2QUSZDc3c8DvI5BXZ30JDOFLJMgL1IRmUrXBPceos2w32lBfN2EV9YGfaTCJRsiUCa0hhZE/W7lV1yrRpNcTOHVpdJ+2coFSRUj1ah8fG8R959GOo63vzd2UuGRfjD+wf8giIlSk1FhVeFN28vpeiCScpwb6K6NH3Lu28AA/1idjRk6PUvVjZuUkUVAOb3zgBUtIvIlFH3Fy6PxnN0MYEFUBlXfGw+S5GRRrffN44WeiC1NzodYwUs78bOaxu6NxOp6a0LkOgoaWjCiGlF2sFTQNoOVMQcf3QZ+EGXVyKbhi1+YEmY4YrMMcQTkDgZGWtUlwhzkBjOi3pf8rT3axAIefUN12FZ7/D3D0tW59zkrNXqNNVbwPsg==|pnNJ+7La9ur/GH7QYr2dOE2BpmC7rfTIjxxwS6VDPJA=|10|1e90646f2dfd14de2376168eeb9968f4'
|
||||
cookie_dict = {}
|
||||
for kv in cookie_string.split(';'):
|
||||
k = kv.split('=')[0]
|
||||
v = kv.split('=')[1]
|
||||
cookie_dict[k] = v
|
||||
return cookie_dict
|
||||
|
31
scrapy/imgsPro/imgsPro/pipelines.py
Normal file
31
scrapy/imgsPro/imgsPro/pipelines.py
Normal file
@ -0,0 +1,31 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
import scrapy
|
||||
from itemadapter import ItemAdapter
|
||||
from scrapy.pipelines.images import ImagesPipeline
|
||||
|
||||
|
||||
class ImgsproPipeline:
|
||||
def process_item(self, item, spider):
|
||||
print(item)
|
||||
return item
|
||||
|
||||
|
||||
class imgsPipeLine(ImagesPipeline):
|
||||
|
||||
# 根据图片地址进行图片数据的请求
|
||||
def get_media_requests(self, item, info):
|
||||
yield scrapy.Request(item['img_src'])
|
||||
|
||||
# 指定图片存储的路径
|
||||
def file_path(self, request, response=None, info=None, *, item):
|
||||
imgName = item['img_name']
|
||||
return imgName
|
||||
|
||||
def item_completed(self, results, item, info):
|
||||
return item # 返回给下一个即将被执行的管道类
|
92
scrapy/imgsPro/imgsPro/settings.py
Normal file
92
scrapy/imgsPro/imgsPro/settings.py
Normal file
@ -0,0 +1,92 @@
|
||||
# Scrapy settings for imgsPro project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = 'imgsPro'
|
||||
|
||||
SPIDER_MODULES = ['imgsPro.spiders']
|
||||
NEWSPIDER_MODULE = 'imgsPro.spiders'
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
# USER_AGENT = 'imgsPro (+http://www.yourdomain.com)'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = False
|
||||
LOG_LEVEL = 'ERROR'
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
# CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
# COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
# TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
# DEFAULT_REQUEST_HEADERS = {
|
||||
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
# 'Accept-Language': 'en',
|
||||
# }
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
# SPIDER_MIDDLEWARES = {
|
||||
# 'imgsPro.middlewares.ImgsproSpiderMiddleware': 543,
|
||||
# }
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
DOWNLOADER_MIDDLEWARES = {
|
||||
'imgsPro.middlewares.imgsProCookieDownloaderMiddleware': 500,
|
||||
'imgsPro.middlewares.imgsProRandomuaDownloaderMiddleware': 400,
|
||||
}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
# EXTENSIONS = {
|
||||
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
||||
# }
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'imgsPro.pipelines.ImgsproPipeline': 300,
|
||||
'imgsPro.pipelines.imgsPipeLine': 400,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
# AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
# AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
# AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
# AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
# HTTPCACHE_ENABLED = True
|
||||
# HTTPCACHE_EXPIRATION_SECS = 0
|
||||
# HTTPCACHE_DIR = 'httpcache'
|
||||
# HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
||||
|
||||
# 指定图片存储的目录
|
||||
IMAGES_STORE = './img_lib'
|
4
scrapy/imgsPro/imgsPro/spiders/__init__.py
Normal file
4
scrapy/imgsPro/imgsPro/spiders/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
32
scrapy/imgsPro/imgsPro/spiders/img.py
Normal file
32
scrapy/imgsPro/imgsPro/spiders/img.py
Normal file
@ -0,0 +1,32 @@
|
||||
import scrapy
|
||||
from ..items import ImgsproItem
|
||||
import re
|
||||
|
||||
|
||||
class ImgSpider(scrapy.Spider):
|
||||
name = 'img'
|
||||
# allowed_domains = ['www.xxx.com']
|
||||
start_urls = ['https://sc.chinaz.com/tupian//']
|
||||
page_num = 2
|
||||
|
||||
def parse(self, response):
|
||||
|
||||
div_list = response.xpath('/html/body/div[3]/div[2]/div')
|
||||
for div in div_list:
|
||||
item = ImgsproItem()
|
||||
img_name = div.xpath('./img/@alt').extract()
|
||||
img_name = ''.join(img_name) + '.jpg'
|
||||
item['img_name'] = img_name
|
||||
img_src = div.xpath('./img/@data-original').extract()
|
||||
img_src = 'https:' + ''.join(img_src)
|
||||
# 去掉_s以获取高清原图,如果链接里面有_s是缩略图
|
||||
s = re.sub('_s', '', img_src)
|
||||
item['img_src'] = s
|
||||
|
||||
yield item
|
||||
# 另一种分页操作
|
||||
if self.page_num <= 3:
|
||||
new_url = f'https://sc.chinaz.com/tupian/index_{self.page_num}.html'
|
||||
self.page_num += 1
|
||||
|
||||
yield scrapy.Request(new_url, callback=self.parse)
|
11
scrapy/imgsPro/scrapy.cfg
Normal file
11
scrapy/imgsPro/scrapy.cfg
Normal file
@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = imgsPro.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = imgsPro
|
0
scrapy/paper/paper/__init__.py
Normal file
0
scrapy/paper/paper/__init__.py
Normal file
194
scrapy/paper/paper/fake_useragent.py
Normal file
194
scrapy/paper/paper/fake_useragent.py
Normal file
@ -0,0 +1,194 @@
|
||||
import random
|
||||
|
||||
import requests
|
||||
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.90 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; NetBSD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.14 (KHTML, like Gecko) Chrome/24.0.1292.0 Safari/537.14"
|
||||
"Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16",
|
||||
"Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14",
|
||||
"Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14",
|
||||
"Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02",
|
||||
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0",
|
||||
"Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11",
|
||||
"Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11",
|
||||
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101213 Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
|
||||
"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
|
||||
"Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/29.0",
|
||||
"Mozilla/5.0 (X11; OpenBSD amd64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:27.0) Gecko/20121011 Firefox/27.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Microsoft Windows NT 6.2.9200.0); rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) Gecko/20121011 Firefox/21.0.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20130514 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:21.0) Gecko/20130326 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130330 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130328 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0",
|
||||
"Mozilla/5.0 (Windows x86; rv:19.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)",
|
||||
"Mozilla/4.0 (Compatible; MSIE 8.0; Windows NT 5.2; Trident/6.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/1.22 (compatible; MSIE 10.0; Windows 3.1)",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 7.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7",
|
||||
]
|
||||
|
||||
|
||||
def get_ua():
|
||||
return random.choice(USER_AGENTS)
|
||||
|
||||
|
||||
def get_requests_headers():
|
||||
headers = {
|
||||
'User-Agent': random.choice(USER_AGENTS),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7',
|
||||
'Connection': 'keep-alive',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
}
|
||||
return headers
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 模块检查
|
||||
print(get_requests_headers())
|
||||
response = requests.get('http://www.ip3366.net/?stype=1&page=1', headers=get_requests_headers())
|
||||
print(response.content.decode("gb2312", "ignore"))
|
12
scrapy/paper/paper/items.py
Normal file
12
scrapy/paper/paper/items.py
Normal file
@ -0,0 +1,12 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class PaperItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
pass
|
41
scrapy/paper/paper/middlewares.py
Normal file
41
scrapy/paper/paper/middlewares.py
Normal file
@ -0,0 +1,41 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
from .fake_useragent import get_ua
|
||||
|
||||
|
||||
class PaperDownloaderMiddleware:
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# UA伪装
|
||||
headers = get_ua()
|
||||
request.headers['User-Agent'] = headers
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
pass
|
||||
|
||||
|
||||
class CookieDownloaderMiddleware(object):
|
||||
def process_request(self, request, spider):
|
||||
cookie_dict = self.get_cookies()
|
||||
request.cookies = cookie_dict
|
||||
|
||||
def get_cookies(self):
|
||||
# cookie_string = ''
|
||||
cookie_string = ''
|
||||
cookie_dict = {}
|
||||
for kv in cookie_string.split(';'):
|
||||
k = kv.split('=')[0]
|
||||
v = kv.split('=')[1]
|
||||
cookie_dict[k] = v
|
||||
return cookie_dict
|
13
scrapy/paper/paper/pipelines.py
Normal file
13
scrapy/paper/paper/pipelines.py
Normal file
@ -0,0 +1,13 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
|
||||
class PaperPipeline:
|
||||
def process_item(self, item, spider):
|
||||
return item
|
90
scrapy/paper/paper/settings.py
Normal file
90
scrapy/paper/paper/settings.py
Normal file
@ -0,0 +1,90 @@
|
||||
# Scrapy settings for paper project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = 'paper'
|
||||
|
||||
SPIDER_MODULES = ['paper.spiders']
|
||||
NEWSPIDER_MODULE = 'paper.spiders'
|
||||
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = 'paper (+http://www.yourdomain.com)'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = False
|
||||
|
||||
LOG_LEVEL = 'WARNING'
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
# 'Accept-Language': 'en',
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# 'paper.middlewares.PaperSpiderMiddleware': 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
DOWNLOADER_MIDDLEWARES = {
|
||||
'paper.middlewares.PaperDownloaderMiddleware': 543,
|
||||
}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'paper.pipelines.PaperPipeline': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = 'httpcache'
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
4
scrapy/paper/paper/spiders/__init__.py
Normal file
4
scrapy/paper/paper/spiders/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
28
scrapy/paper/paper/spiders/page.py
Normal file
28
scrapy/paper/paper/spiders/page.py
Normal file
@ -0,0 +1,28 @@
|
||||
import scrapy
|
||||
|
||||
|
||||
class PageSpider(scrapy.Spider):
|
||||
name = 'page'
|
||||
# allowed_domains = ['www.xxx.com']
|
||||
start_urls = ['https://user.17k.com/ck/author/shelf?page=1&appKey=2406394919']
|
||||
|
||||
def start_requests(self):
|
||||
url = 'https://passport.17k.com/ck/user/login'
|
||||
username = ''
|
||||
password = ''
|
||||
|
||||
# 发送post的方案
|
||||
yield scrapy.FormRequest(
|
||||
url=url,
|
||||
formdata={
|
||||
'loginName': username,
|
||||
'password': password
|
||||
},
|
||||
callback=self.parse
|
||||
)
|
||||
|
||||
def parse(self, response, **kwargs):
|
||||
yield scrapy.Request(url=self.start_urls[0], callback=self.detail_parse)
|
||||
|
||||
def detail_parse(self, response):
|
||||
print(response.json())
|
11
scrapy/paper/scrapy.cfg
Normal file
11
scrapy/paper/scrapy.cfg
Normal file
@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = paper.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = paper
|
11
scrapy/sunPro/scrapy.cfg
Normal file
11
scrapy/sunPro/scrapy.cfg
Normal file
@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = sunPro.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = sunPro
|
0
scrapy/sunPro/sunPro/__init__.py
Normal file
0
scrapy/sunPro/sunPro/__init__.py
Normal file
196
scrapy/sunPro/sunPro/fake_useragent.py
Normal file
196
scrapy/sunPro/sunPro/fake_useragent.py
Normal file
@ -0,0 +1,196 @@
|
||||
import random
|
||||
|
||||
import requests
|
||||
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.90 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; NetBSD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.14 (KHTML, like Gecko) Chrome/24.0.1292.0 Safari/537.14"
|
||||
"Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16",
|
||||
"Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14",
|
||||
"Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14",
|
||||
"Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02",
|
||||
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0",
|
||||
"Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11",
|
||||
"Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11",
|
||||
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101213 Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
|
||||
"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
|
||||
"Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/29.0",
|
||||
"Mozilla/5.0 (X11; OpenBSD amd64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:27.0) Gecko/20121011 Firefox/27.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Microsoft Windows NT 6.2.9200.0); rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) Gecko/20121011 Firefox/21.0.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20130514 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:21.0) Gecko/20130326 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130330 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130328 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0",
|
||||
"Mozilla/5.0 (Windows x86; rv:19.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)",
|
||||
"Mozilla/4.0 (Compatible; MSIE 8.0; Windows NT 5.2; Trident/6.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/1.22 (compatible; MSIE 10.0; Windows 3.1)",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 7.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7",
|
||||
]
|
||||
|
||||
|
||||
def get_ua():
|
||||
return random.choice(USER_AGENTS)
|
||||
|
||||
|
||||
def get_requests_headers():
|
||||
headers = {
|
||||
'User-Agent': random.choice(USER_AGENTS),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8',
|
||||
'Connection': 'keep-alive',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
# 'Host': 'www.zhipin.com',
|
||||
# 'Origin': 'https://www.zhipin.com',
|
||||
# 'Referer': 'https://www.zhipin.com/',
|
||||
}
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 模块检查
|
||||
print(get_requests_headers())
|
||||
response = requests.get('http://www.ip3366.net/?stype=1&page=1', headers=get_requests_headers())
|
||||
print(response.content.decode("gb2312", "ignore"))
|
23
scrapy/sunPro/sunPro/items.py
Normal file
23
scrapy/sunPro/sunPro/items.py
Normal file
@ -0,0 +1,23 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class SunproItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
number = scrapy.Field()
|
||||
title = scrapy.Field()
|
||||
status = scrapy.Field()
|
||||
content = scrapy.Field()
|
||||
city = scrapy.Field()
|
||||
time = scrapy.Field()
|
||||
|
||||
# class DetailItem(scrapy.Item):
|
||||
# # define the fields for your item here like:
|
||||
# # name = scrapy.Field()
|
||||
# id = scrapy.Field()
|
||||
# content = scrapy.Field()
|
114
scrapy/sunPro/sunPro/middlewares.py
Normal file
114
scrapy/sunPro/sunPro/middlewares.py
Normal file
@ -0,0 +1,114 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
from time import sleep
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
from scrapy.http import HtmlResponse
|
||||
|
||||
|
||||
class SunproSpiderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the spider middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
@classmethod
|
||||
def from_crawler(cls, crawler):
|
||||
# This method is used by Scrapy to create your spiders.
|
||||
s = cls()
|
||||
crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
|
||||
return s
|
||||
|
||||
def process_spider_input(self, response, spider):
|
||||
# Called for each response that goes through the spider
|
||||
# middleware and into the spider.
|
||||
|
||||
# Should return None or raise an exception.
|
||||
return None
|
||||
|
||||
def process_spider_output(self, response, result, spider):
|
||||
# Called with the results returned from the Spider, after
|
||||
# it has processed the response.
|
||||
|
||||
# Must return an iterable of Request, or item objects.
|
||||
for i in result:
|
||||
yield i
|
||||
|
||||
def process_spider_exception(self, response, exception, spider):
|
||||
# Called when a spider or process_spider_input() method
|
||||
# (from other spider middleware) raises an exception.
|
||||
|
||||
# Should return either None or an iterable of Request or item objects.
|
||||
pass
|
||||
|
||||
def process_start_requests(self, start_requests, spider):
|
||||
# Called with the start requests of the spider, and works
|
||||
# similarly to the process_spider_output() method, except
|
||||
# that it doesn’t have a response associated.
|
||||
|
||||
# Must return only requests (not items).
|
||||
for r in start_requests:
|
||||
yield r
|
||||
|
||||
def spider_opened(self, spider):
|
||||
spider.logger.info('Spider opened: %s' % spider.name)
|
||||
|
||||
|
||||
class SunproDownloaderMiddleware:
|
||||
def process_response(self, request, response, spider):
|
||||
# 挑选出指定的响应对象进行篡改
|
||||
# 通过url指定request,通过request指定response
|
||||
# 获取动态加载出的动态数据,基于selenium
|
||||
bro = spider.bro
|
||||
bro.get(request.url)
|
||||
sleep(0.1)
|
||||
page_text = bro.page_source
|
||||
new_response = HtmlResponse(url=request.url, body=page_text, encoding='utf-8', request=request)
|
||||
|
||||
return new_response
|
||||
|
||||
|
||||
# 中间件1 -随机UA
|
||||
from .fake_useragent import get_requests_headers
|
||||
|
||||
|
||||
class RandomuaDownloaderMiddleware(object):
|
||||
def process_request(self, request, spider):
|
||||
headers = get_requests_headers()
|
||||
request.headers['User-Agent'] = headers
|
||||
# print(agent)
|
||||
|
||||
|
||||
# 中间件2 -随机代理
|
||||
# import random
|
||||
# from .proxies import proxy_list
|
||||
#
|
||||
# class RandomProxyDownloadMiddleware(object):
|
||||
# def process_requset(self, request, spider):
|
||||
# proxy = random.choice(proxy_list)
|
||||
# request.meta['proxy'] = proxy
|
||||
# print(proxy)
|
||||
#
|
||||
# def process_exception(self, request, exception, spider):
|
||||
# # 处理代理ip无法使用情况
|
||||
# return request
|
||||
|
||||
# 中间件3 -Cookie
|
||||
class CookieDownloaderMiddleware(object):
|
||||
def process_request(self, request, spider):
|
||||
cookie_dict = self.get_cookies()
|
||||
request.cookies = cookie_dict
|
||||
# print(cookie_dict)
|
||||
|
||||
def get_cookies(self):
|
||||
cookie_string = 'tgw_l7_route=581a2b818047111abece09009aea53ba; PHPSESSID=6sq7bpo9m0vsntmr1mq7othflj; Hm_lvt_8634401b25f1b0008d9638ccfc17752d=1673232337; Hm_lvt_3ac08b9ee936f8dd8b720065d8af23d0=1673232337; Hm_lpvt_3ac08b9ee936f8dd8b720065d8af23d0=1673233037; Hm_lpvt_8634401b25f1b0008d9638ccfc17752d=1673233037'
|
||||
cookie_dict = {}
|
||||
for kv in cookie_string.split(';'):
|
||||
k = kv.split('=')[0]
|
||||
v = kv.split('=')[1]
|
||||
cookie_dict[k] = v
|
||||
return cookie_dict
|
48
scrapy/sunPro/sunPro/pipelines.py
Normal file
48
scrapy/sunPro/sunPro/pipelines.py
Normal file
@ -0,0 +1,48 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
import pymysql
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
|
||||
# class SunproPipeline:
|
||||
# def process_item(self, item, spider):
|
||||
# # 如何判断item的类型
|
||||
# # 将数据写入数据库中,如何保证数据的一致性
|
||||
# if item.__class__.__name__ == 'DetailItem':
|
||||
# print(item['id'], item['content'])
|
||||
# else:
|
||||
# print(item['number'], item['title'])
|
||||
# return item
|
||||
|
||||
|
||||
class mysqlPipeLine(object):
|
||||
# 数据库连接
|
||||
conn = None
|
||||
cursor = None
|
||||
|
||||
def open_spider(self, spider):
|
||||
self.conn = pymysql.Connect(host='127.0.0.1', port=3306, user='root', password='dxs666dxs', db='Bossjob', charset='utf8')
|
||||
|
||||
def process_item(self, item, spider):
|
||||
self.cursor = self.conn.cursor()
|
||||
|
||||
try:
|
||||
self.cursor.execute('insert into new values("%s", "%s", "%s", "%s", "%s", "%s")' %
|
||||
(item['number'], item['title'], item['content'], item['status'], item['city'], item['time']))
|
||||
self.conn.commit()
|
||||
print('成功插入编号为', item['number'], '的数据!')
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print('error!')
|
||||
self.conn.rollback()
|
||||
|
||||
return item
|
||||
|
||||
def close_spider(self, spider):
|
||||
self.cursor.close()
|
||||
self.conn.close()
|
91
scrapy/sunPro/sunPro/settings.py
Normal file
91
scrapy/sunPro/sunPro/settings.py
Normal file
@ -0,0 +1,91 @@
|
||||
# Scrapy settings for sunPro project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = 'sunPro'
|
||||
|
||||
SPIDER_MODULES = ['sunPro.spiders']
|
||||
NEWSPIDER_MODULE = 'sunPro.spiders'
|
||||
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = 'sunPro (+http://www.yourdomain.com)'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = False
|
||||
LOG_LEVEL = 'ERROR'
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
# 'Accept-Language': 'en',
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# 'sunPro.middlewares.SunproSpiderMiddleware': 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
DOWNLOADER_MIDDLEWARES = {
|
||||
'sunPro.middlewares.RandomuaDownloaderMiddleware': 543,
|
||||
'sunPro.middlewares.CookieDownloaderMiddleware': 400,
|
||||
'sunPro.middlewares.SunproDownloaderMiddleware': 300,
|
||||
}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'sunPro.pipelines.mysqlPipeLine': 200,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = 'httpcache'
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
4
scrapy/sunPro/sunPro/spiders/__init__.py
Normal file
4
scrapy/sunPro/sunPro/spiders/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
73
scrapy/sunPro/sunPro/spiders/sun.py
Normal file
73
scrapy/sunPro/sunPro/spiders/sun.py
Normal file
@ -0,0 +1,73 @@
|
||||
import re
|
||||
|
||||
import scrapy
|
||||
from scrapy.linkextractors import LinkExtractor
|
||||
from scrapy.spiders import CrawlSpider, Rule
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver import ChromeOptions
|
||||
from ..items import SunproItem
|
||||
|
||||
|
||||
class SunSpider(CrawlSpider):
|
||||
name = 'sun'
|
||||
# allowed_domains = ['www.xxx.com']
|
||||
start_urls = ['https://wz.sun0769.com/political/index/politicsNewest']
|
||||
|
||||
# 实例化一个浏览器对象
|
||||
def __init__(self, **kwargs):
|
||||
# 实现让selenium规避被检测到的风险
|
||||
super().__init__(**kwargs)
|
||||
option = ChromeOptions()
|
||||
option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||
option.add_experimental_option('excludeSwitches', ['enable-logging'])
|
||||
option.add_argument("--no-sandbox")
|
||||
option.add_argument("--disable-dev-shm-usage")
|
||||
option.add_argument("--window-size=1920,1080") # 建议设置窗口大小
|
||||
option.add_argument('--headless')
|
||||
option.add_argument('--disable-gpu')
|
||||
option.add_argument('blink-settings=imagesEnabled=false')
|
||||
self.bro = webdriver.Chrome(executable_path='D:\爬虫\selenium\chromedriver.exe', options=option)
|
||||
|
||||
def closed(self, spider):
|
||||
self.bro.quit()
|
||||
|
||||
# 链接提取器: 根据指定规则(allow=r'正则表达式')进行指定链接提取
|
||||
link = LinkExtractor(allow=r'id=1&page=\d', restrict_xpaths='/html/body/div[2]/div[3]/div[3]/div/a')
|
||||
# link_detail = LinkExtractor(restrict_xpaths='/html/body/div[2]/div[3]/ul[2]/li/span[3]/a')
|
||||
|
||||
rules = (
|
||||
# 规则解析器: 将链接提取器提取到的链接进行指定规则(callback)的解析操作
|
||||
# follow=True: 可以将链接提取器继续作用到链接提取器提取到的链接所对应的页面中
|
||||
Rule(link, callback='parse_item', follow=True),
|
||||
# Rule(link_detail, callback='parse_detail'),
|
||||
)
|
||||
|
||||
# 解析投诉的编号和标题
|
||||
def parse_item(self, response):
|
||||
li_list = response.xpath('/html/body/div[2]/div[3]/ul[2]/li')
|
||||
for li in li_list:
|
||||
item = SunproItem()
|
||||
number = li.xpath('./span[1]/text()').extract_first()
|
||||
item['number'] = number
|
||||
status = li.xpath('./span[2]/text()').extract_first().strip()
|
||||
item['status'] = status
|
||||
title = li.xpath('./span[3]/a/text()').extract_first()
|
||||
item['title'] = title
|
||||
detail_url = 'https://wz.sun0769.com' + li.xpath('./span[3]/a/@href').extract_first()
|
||||
|
||||
yield scrapy.Request(url=detail_url, callback=self.parse_detail, meta={'item': item})
|
||||
|
||||
# 解析投诉的内容
|
||||
def parse_detail(self, response):
|
||||
item = response.meta['item']
|
||||
content = response.xpath('/html/body/div[3]/div[2]/div[2]/div[2]/pre//text()').extract()
|
||||
content = ''.join(content)
|
||||
item['content'] = content
|
||||
city = response.xpath('/html/body/div[3]/div[2]/div[2]/div[1]/span[2]/text()').extract_first()
|
||||
c = re.sub(' 来自:', '', city)
|
||||
C = re.sub(' ', '', c)
|
||||
item['city'] = C
|
||||
time = response.xpath('/html/body/div[3]/div[2]/div[2]/div[1]/span[3]/text()').extract_first()
|
||||
item['time'] = time
|
||||
# print(item)
|
||||
yield item
|
12
scrapy/wangyi/news.txt
Normal file
12
scrapy/wangyi/news.txt
Normal file
@ -0,0 +1,12 @@
|
||||
(1)加拿大将为乌克兰购买美制防空系统 俄方:荒谬:
|
||||
|
||||
来源:环球网【环球网报道 见习记者 李律杉】据路透社报道,美加两国元首在墨西哥城会晤后,加拿大总理特鲁多办公室周二(10日)发表声明称,加拿大将为乌克兰购买美国制造的“国家先进地对空导弹系统”(NASAMS)。报道披露,当天特鲁多和拜登正在墨西哥参加第十届北美领导人峰会,两人在支持乌克兰方面进行了单独会晤。在此期间,特鲁多告诉拜登,加拿大将为乌克兰购买美制地空导弹系统一事。“这是加拿大首次向乌克兰捐赠防空系统。”加拿大国防部长安妮塔·阿南德在推特上写道。她还表示,乌克兰防长列兹尼科夫10日早些时候在电话中告诉她,得到防空系统是乌克兰的首要任务。阿南德介绍称,NASAMS是一种中短程地面防空系统,可抵御无人机、导弹和飞机的攻击。对于加拿大这一援乌决定,俄罗斯驻加拿大大使奥列格·斯捷潘诺夫作出回应。据俄罗斯卫星通讯社报道,斯捷潘诺夫在得知此事表示,“特鲁多总理的内阁把钱花在(进一步)激化战争上,支持一个距离加拿大上千公里之外的非法政权,这看起来很荒谬。”“尤其荒谬的是,(这是)在加拿大目前国内还面临着各种问题的背景下(做出的决定)。”另外,根据加拿大总理办公室的声明,特鲁多和拜登还就加拿大皇家空军采购F-35战斗机一事展开讨论。据央视新闻报道,加拿大国防部长安妮塔·阿南德当地时间1月9日宣布,加拿大已经签署了购买F-35战机的最终合同,初期购买金额达190亿加元。据悉,这88架战机中的第一架将在2026年之前交付,而第一批F-35中队将在2029年之前投入使用。
|
||||
|
||||
(35)台媒:57架次解放军军机进入台岛周边 "异常紧张":
|
||||
|
||||
来源:环球网【环球网报道】“解放军对台打击军演 57架次共机‘三面围台’ 我战机与地面飞弹紧盯”,中国人民解放军东部战区1月8日位台岛周边海空域组织诸军兵种联合战备警巡和实战化演练第二天,台湾中时新闻网以此为题渲染“气氛异常紧张”。台防务部门9日的说法宣称,自8日上午6时至9日上午6时止,“侦获”解放军军机57架次(其中28架次逾越“台海中线”)、军舰4艘次,持续在台湾海峡周边活动。8日夜,东部战区新闻发言人施毅陆军大校表示,当天中国人民解放军东部战区位台岛周边海空域组织诸军兵种联合战备警巡和实战化演练,重点演练对陆打击、对海突击等内容,旨在检验部队联合作战能力,坚决反击外部势力、“台独”分裂势力勾连挑衅行径。中时新闻网9日称,解放军军机“扰台”范围明显扩大且集中在8日夜间,台空军战机整夜不断紧急升空,地面导弹部队更是进入高度警戒。台军还声称,运用任务机、舰艇及岸基导弹系统“严密监控”与“应处”。中时新闻网还称,台各空军基地8日晚气氛异常紧张,从北到南甚至东部,各基地战机接连紧急起飞,架次比平常多,状况如去年大陆军演一般,不少住在基地周边的民众都感觉到一丝不寻常的气氛,直到解放军东部战区发文,才知道原因是大陆进行演练。此次演习距东部战区位台岛周边海空域演习还不到半个月,2022年12月25日,中国人民解放军东部战区位台岛周边海空域组织诸军兵种联合战备警巡和联合火力打击演练。这是针对当前美台升级勾连挑衅的坚决回应。此前的12月23日,美国总统拜登签署“2023财年国防授权法案”,其中一项内容是未来5年将对台提供总额100亿美元、每年最多20亿美元的“军事援助”。该法案还要求“加速处理台湾军购请求”,并建议邀请台湾参与2024年“环太平洋军演”。这些严重违反一个中国原则和中美三个联合公报规定的恶性条款,给台海和平稳定造成严重损害。
|
||||
|
||||
(34)德媒:柏林正疯狂寻找向基辅承诺的40辆步兵战车:
|
||||
|
||||
来源:中国新闻网中新网1月9日电 据德国《明镜》周刊报道,德国正在“疯狂地”寻找给乌克兰承诺的40辆“黄鼠狼”步兵战车,柏林将不得不从自己的武装力量储备中取出所承诺战车的大部分。报道称,德国总理朔尔茨此前曾向基辅承诺了40辆“黄鼠狼”步兵战车,目前联邦政府正在疯狂地寻找承诺的步兵战车。“德国政府尚未准备好供应此类军备,这就是为什么德国国防军必须清空其仓库,但它储备状态其实已经很差了。”德国联邦议院议员亨宁·奥特说道。报道指出,当政府决定将“黄鼠狼”步兵战车交付给乌克兰,德国军方、政界人士和安全专家都开始怀疑柏林将从哪里获得承诺的设备。朔尔茨的话“没那么容易实现”。消息显示,德国国防企业莱茵金属(Rheinmetall)公司库存有近60辆有缺陷的“黄鼠狼”步兵战车,但将其升级会需要很长时间。据报道,德国总理朔尔茨与美国总统拜登5日通电话,就向基辅运送重型军事装备达成一致。随后德国宣布,拟向乌克兰供应40辆“黄鼠狼”步兵战车和1枚“爱国者”防空导弹。乌克兰局势升级以来,德国已向乌克兰提供价值22.5亿欧元的武器和军事装备。
|
||||
|
11
scrapy/wangyi/scrapy.cfg
Normal file
11
scrapy/wangyi/scrapy.cfg
Normal file
@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = wangyi.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = wangyi
|
0
scrapy/wangyi/wangyi/__init__.py
Normal file
0
scrapy/wangyi/wangyi/__init__.py
Normal file
194
scrapy/wangyi/wangyi/fake_useragent.py
Normal file
194
scrapy/wangyi/wangyi/fake_useragent.py
Normal file
@ -0,0 +1,194 @@
|
||||
import random
|
||||
|
||||
import requests
|
||||
|
||||
USER_AGENTS = [
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10",
|
||||
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.90 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; NetBSD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.14 (KHTML, like Gecko) Chrome/24.0.1292.0 Safari/537.14"
|
||||
"Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16",
|
||||
"Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14",
|
||||
"Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14",
|
||||
"Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02",
|
||||
"Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00",
|
||||
"Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0",
|
||||
"Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52",
|
||||
"Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50",
|
||||
"Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11",
|
||||
"Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11",
|
||||
"Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10",
|
||||
"Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01",
|
||||
"Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101213 Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01",
|
||||
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01",
|
||||
"Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00",
|
||||
"Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1",
|
||||
"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0",
|
||||
"Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/29.0",
|
||||
"Mozilla/5.0 (X11; OpenBSD amd64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:27.0) Gecko/20121011 Firefox/27.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Microsoft Windows NT 6.2.9200.0); rv:22.0) Gecko/20130405 Firefox/22.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) Gecko/20121011 Firefox/21.0.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (X11; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20130514 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; rv:21.0) Gecko/20130326 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130330 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130328 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0",
|
||||
"Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0",
|
||||
"Mozilla/5.0 (Windows x86; rv:19.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20100101 Firefox/19.0",
|
||||
"Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0",
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6",
|
||||
"Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)",
|
||||
"Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)",
|
||||
"Mozilla/4.0 (Compatible; MSIE 8.0; Windows NT 5.2; Trident/6.0)",
|
||||
"Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)",
|
||||
"Mozilla/1.22 (compatible; MSIE 10.0; Windows 3.1)",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))",
|
||||
"Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 7.1; Trident/5.0)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)",
|
||||
"Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7",
|
||||
]
|
||||
|
||||
|
||||
def get_ua():
|
||||
return random.choice(USER_AGENTS)
|
||||
|
||||
|
||||
def get_requests_headers():
|
||||
headers = {
|
||||
'User-Agent': random.choice(USER_AGENTS),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9,zh-TW;q=0.8,en;q=0.7',
|
||||
'Connection': 'close',
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
}
|
||||
return headers
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 模块检查
|
||||
print(get_requests_headers())
|
||||
response = requests.get('http://www.ip3366.net/?stype=1&page=1', headers=get_requests_headers())
|
||||
print(response.content.decode("gb2312", "ignore"))
|
15
scrapy/wangyi/wangyi/items.py
Normal file
15
scrapy/wangyi/wangyi/items.py
Normal file
@ -0,0 +1,15 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class WangyiItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
title = scrapy.Field()
|
||||
content = scrapy.Field()
|
||||
number = scrapy.Field()
|
||||
|
52
scrapy/wangyi/wangyi/middlewares.py
Normal file
52
scrapy/wangyi/wangyi/middlewares.py
Normal file
@ -0,0 +1,52 @@
|
||||
# Define here the models for your spider middleware
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
import random
|
||||
|
||||
from scrapy import signals
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import is_item, ItemAdapter
|
||||
|
||||
from .fake_useragent import USER_AGENTS
|
||||
from scrapy.http import HtmlResponse
|
||||
from time import sleep
|
||||
|
||||
|
||||
class WangyiDownloaderMiddleware:
|
||||
# Not all methods need to be defined. If a method is not defined,
|
||||
# scrapy acts as if the downloader middleware does not modify the
|
||||
# passed objects.
|
||||
|
||||
def process_request(self, request, spider):
|
||||
# UA伪装
|
||||
request.headers['User-Agent'] = random.choice(USER_AGENTS)
|
||||
return None
|
||||
|
||||
def process_response(self, request, response, spider):
|
||||
# 挑选出指定的响应对象进行篡改
|
||||
# 通过url指定request,通过request指定response
|
||||
# 获取动态加载出的动态数据,基于selenium
|
||||
bro = spider.bro
|
||||
|
||||
if request.url in spider.models_url:
|
||||
# 五大板块对应的响应对象
|
||||
# 针对定位到的这些response进行篡改
|
||||
# 实例化一个新响应对象,包含动态加载的新闻数据,用新的换旧的
|
||||
bro.get(request.url)
|
||||
sleep(0.5)
|
||||
bro.execute_script('window.scrollTo(0,10000)')
|
||||
page_text = bro.page_source
|
||||
# self.fp = open('./news.html', 'w', encoding='utf-8')
|
||||
# self.fp.write(page_text)
|
||||
# self.fp.close()
|
||||
new_response = HtmlResponse(url=request.url, body=page_text, encoding='utf-8', request=request)
|
||||
|
||||
return new_response
|
||||
else:
|
||||
# 其他请求对应的响应对象
|
||||
return response
|
||||
|
||||
def process_exception(self, request, exception, spider):
|
||||
pass
|
35
scrapy/wangyi/wangyi/pipelines.py
Normal file
35
scrapy/wangyi/wangyi/pipelines.py
Normal file
@ -0,0 +1,35 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
|
||||
class WangyiPipeline(object):
|
||||
fp = None
|
||||
|
||||
# 重写父类的一个方法:该方法只在开始爬虫的时候被调用一次
|
||||
def open_spider(self, spider):
|
||||
print('开始爬虫!')
|
||||
self.fp = open('./news.txt', 'w', encoding='utf-8')
|
||||
|
||||
# 专门用来处理item类型对象
|
||||
# 该方法可以接受爬虫文件提交过来的item对象
|
||||
# 该方法每接收到一个item就会被调用一次
|
||||
def process_item(self, item, spider):
|
||||
title = item['title']
|
||||
content = item['content']
|
||||
number = item['number']
|
||||
print('正在下载第', number, '个新闻。。。')
|
||||
# 持久化存储
|
||||
self.fp.write('(' + str(number) + ')' + title + ':' + '\n' + content + '\n')
|
||||
|
||||
return item # 就会传递给下一个即将被执行的管道类
|
||||
|
||||
# 重写父类
|
||||
def close_spider(self, spider):
|
||||
print('结束爬虫!')
|
||||
self.fp.close()
|
89
scrapy/wangyi/wangyi/settings.py
Normal file
89
scrapy/wangyi/wangyi/settings.py
Normal file
@ -0,0 +1,89 @@
|
||||
# Scrapy settings for wangyi project
|
||||
#
|
||||
# For simplicity, this file contains only settings considered important or
|
||||
# commonly used. You can find more settings consulting the documentation:
|
||||
#
|
||||
# https://docs.scrapy.org/en/latest/topics/settings.html
|
||||
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
|
||||
BOT_NAME = 'wangyi'
|
||||
|
||||
SPIDER_MODULES = ['wangyi.spiders']
|
||||
NEWSPIDER_MODULE = 'wangyi.spiders'
|
||||
|
||||
LOG_LEVEL = 'ERROR'
|
||||
|
||||
# Crawl responsibly by identifying yourself (and your website) on the user-agent
|
||||
#USER_AGENT = 'wangyi (+http://www.yourdomain.com)'
|
||||
|
||||
# Obey robots.txt rules
|
||||
ROBOTSTXT_OBEY = False
|
||||
|
||||
# Configure maximum concurrent requests performed by Scrapy (default: 16)
|
||||
CONCURRENT_REQUESTS = 32
|
||||
|
||||
# Configure a delay for requests for the same website (default: 0)
|
||||
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
|
||||
# See also autothrottle settings and docs
|
||||
DOWNLOAD_DELAY = 3
|
||||
# The download delay setting will honor only one of:
|
||||
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
|
||||
#CONCURRENT_REQUESTS_PER_IP = 16
|
||||
|
||||
# Disable cookies (enabled by default)
|
||||
#COOKIES_ENABLED = False
|
||||
|
||||
# Disable Telnet Console (enabled by default)
|
||||
#TELNETCONSOLE_ENABLED = False
|
||||
|
||||
# Override the default request headers:
|
||||
#DEFAULT_REQUEST_HEADERS = {
|
||||
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
# 'Accept-Language': 'en',
|
||||
#}
|
||||
|
||||
# Enable or disable spider middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
|
||||
#SPIDER_MIDDLEWARES = {
|
||||
# 'wangyi.middlewares.WangyiSpiderMiddleware': 543,
|
||||
#}
|
||||
|
||||
# Enable or disable downloader middlewares
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
|
||||
DOWNLOADER_MIDDLEWARES = {
|
||||
'wangyi.middlewares.WangyiDownloaderMiddleware': 543,
|
||||
}
|
||||
|
||||
# Enable or disable extensions
|
||||
# See https://docs.scrapy.org/en/latest/topics/extensions.html
|
||||
#EXTENSIONS = {
|
||||
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
||||
#}
|
||||
|
||||
# Configure item pipelines
|
||||
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
ITEM_PIPELINES = {
|
||||
'wangyi.pipelines.WangyiPipeline': 300,
|
||||
}
|
||||
|
||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||
#AUTOTHROTTLE_ENABLED = True
|
||||
# The initial download delay
|
||||
#AUTOTHROTTLE_START_DELAY = 5
|
||||
# The maximum download delay to be set in case of high latencies
|
||||
#AUTOTHROTTLE_MAX_DELAY = 60
|
||||
# The average number of requests Scrapy should be sending in parallel to
|
||||
# each remote server
|
||||
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
|
||||
# Enable showing throttling stats for every response received:
|
||||
#AUTOTHROTTLE_DEBUG = False
|
||||
|
||||
# Enable and configure HTTP caching (disabled by default)
|
||||
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
|
||||
#HTTPCACHE_ENABLED = True
|
||||
#HTTPCACHE_EXPIRATION_SECS = 0
|
||||
#HTTPCACHE_DIR = 'httpcache'
|
||||
#HTTPCACHE_IGNORE_HTTP_CODES = []
|
||||
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
|
4
scrapy/wangyi/wangyi/spiders/__init__.py
Normal file
4
scrapy/wangyi/wangyi/spiders/__init__.py
Normal file
@ -0,0 +1,4 @@
|
||||
# This package will contain the spiders of your Scrapy project
|
||||
#
|
||||
# Please refer to the documentation for information on how to create and manage
|
||||
# your spiders.
|
68
scrapy/wangyi/wangyi/spiders/news.py
Normal file
68
scrapy/wangyi/wangyi/spiders/news.py
Normal file
@ -0,0 +1,68 @@
|
||||
import scrapy
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver import ChromeOptions
|
||||
from ..items import WangyiItem
|
||||
|
||||
|
||||
class NewsSpider(scrapy.Spider):
|
||||
name = 'news'
|
||||
# allowed_domains = ['www.xxx.com']
|
||||
start_urls = ['https://news.163.com/']
|
||||
models_url = [] # 存放板块的详情页url
|
||||
number = 1
|
||||
|
||||
# 实例化一个浏览器对象
|
||||
def __init__(self, **kwargs):
|
||||
# 实现让selenium规避被检测到的风险
|
||||
super().__init__(**kwargs)
|
||||
option = ChromeOptions()
|
||||
option.add_experimental_option('excludeSwitches', ['enable-automation'])
|
||||
option.add_experimental_option('useAutomationExtension', False)
|
||||
option.add_experimental_option('excludeSwitches', ['enable-logging'])
|
||||
option.add_argument("--no-sandbox")
|
||||
option.add_argument("--disable-dev-shm-usage")
|
||||
option.add_argument("--window-size=1920,1080") # 建议设置窗口大小
|
||||
option.add_argument('--headless')
|
||||
option.add_argument('--disable-gpu')
|
||||
self.bro = webdriver.Chrome(executable_path='D:\爬虫\selenium\chromedriver.exe', options=option)
|
||||
|
||||
def closed(self, spider):
|
||||
self.bro.quit()
|
||||
|
||||
# 解析每一个板块对应的详情页url
|
||||
# 每一个板块对应新闻相关的内容都是动态加载出来的
|
||||
def detail_parse(self, response):
|
||||
div_list = response.xpath('//div[@class="ndi_main"]/div[@class="data_row news_article clearfix news_first"] | //div[@class="ndi_main"]/div[@class="data_row news_article clearfix "]')
|
||||
# print(div_list)
|
||||
for div in div_list:
|
||||
item = WangyiItem()
|
||||
title = div.xpath('./div/div/h3/a/text()').extract_first()
|
||||
item['title'] = title
|
||||
item['number'] = self.number
|
||||
self.number += 1
|
||||
content_url = div.xpath('./div/div/h3/a/@href').extract_first()
|
||||
|
||||
yield scrapy.Request(url=content_url, callback=self.content_parse, meta={'item': item})
|
||||
|
||||
# 解析新闻内容
|
||||
def content_parse(self, response):
|
||||
item = response.meta['item']
|
||||
content = response.xpath('//*[@id="content"]/div[2]//text()').extract()
|
||||
content = ''.join(content)
|
||||
item['content'] = content
|
||||
# print(item)
|
||||
yield item
|
||||
|
||||
# 解析五大板块的详情页url
|
||||
def parse(self, response):
|
||||
li_list = response.xpath('//*[@id="index2016_wrap"]/div[3]/div[2]/div[2]/div[2]/div/ul/li')
|
||||
alist = [1, 2, 4, 5] # 存储各个领域的li标签编号
|
||||
|
||||
for index in alist:
|
||||
model_url = li_list[index].xpath('./a/@href').extract_first()
|
||||
# print(model_url)
|
||||
self.models_url.append(model_url)
|
||||
|
||||
# 依次对每个板块进行发起请求
|
||||
for url in self.models_url:
|
||||
yield scrapy.Request(url=url, callback=self.detail_parse)
|
11
scrapy/xiaohua/scrapy.cfg
Normal file
11
scrapy/xiaohua/scrapy.cfg
Normal file
@ -0,0 +1,11 @@
|
||||
# Automatically created by: scrapy startproject
|
||||
#
|
||||
# For more information about the [deploy] section see:
|
||||
# https://scrapyd.readthedocs.io/en/latest/deploy.html
|
||||
|
||||
[settings]
|
||||
default = xiaohua.settings
|
||||
|
||||
[deploy]
|
||||
#url = http://localhost:6800/
|
||||
project = xiaohua
|
0
scrapy/xiaohua/xiaohua/__init__.py
Normal file
0
scrapy/xiaohua/xiaohua/__init__.py
Normal file
13
scrapy/xiaohua/xiaohua/items.py
Normal file
13
scrapy/xiaohua/xiaohua/items.py
Normal file
@ -0,0 +1,13 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class XiaohuaItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
author = scrapy.Field()
|
||||
content = scrapy.Field()
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user