"Phb重构Cookie处理逻辑,将代理配置与Cookie配置分离,优化Cookie加载流程并增强可读性"
This commit is contained in:
parent
1ef2c56330
commit
6acd799129
@ -32,8 +32,8 @@ class Spider(Spider):
|
|||||||
},
|
},
|
||||||
注:http(s)代理都是http
|
注:http(s)代理都是http
|
||||||
'''
|
'''
|
||||||
try:self.proxies = json.loads(extend)
|
try:self.extendDict = json.loads(extend)
|
||||||
except:self.proxies = {}
|
except:self.extendDict = {}
|
||||||
self.headers = {
|
self.headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
|
||||||
'pragma': 'no-cache',
|
'pragma': 'no-cache',
|
||||||
@ -51,65 +51,31 @@ class Spider(Spider):
|
|||||||
self.host = self.gethost()
|
self.host = self.gethost()
|
||||||
self.headers.update({'referer': f'{self.host}/', 'origin': self.host})
|
self.headers.update({'referer': f'{self.host}/', 'origin': self.host})
|
||||||
self.session = Session()
|
self.session = Session()
|
||||||
self.session.proxies.update(self.proxies)
|
# self.session.proxies.update(self.proxies)
|
||||||
self.session.headers.update(self.headers)
|
self.session.headers.update(self.headers)
|
||||||
|
|
||||||
# --- 外部导入 Cookie 逻辑开始 ---
|
# ====== 在这里处理并传入指定的 Cookie ======
|
||||||
raw_cookie_data = '' # 用于暂存获取到的原始 cookie 数据
|
|
||||||
|
|
||||||
# 1. 从 extendDict 中尝试获取 'cookie' 字段
|
# 你提供的 Cookie 字符串
|
||||||
if 'cookie' in self.proxies: # self.proxies 实际上就是 extendDict
|
cookie_string = ''#"KEY=2800769*3022907:116762244:2236104702:1; platform=mobile; ss=830813246555776814; sessid=300353306947678113; comp_detect-cookies=13166.100000; fg_afaf12e4c5419a855dd0bf120670f=41719.100000; fg_757a7e3b2b97e62caeae14647b10ab8a=62682.100000; fg_63bebf99a35b4f802b2ee7589fece7c6=48941.100000; fg_7d466c3b3b825a5e5f974868b9d87c01=43396.100000; fg_7d31324eedb583147b6dcbea0051c868=81750.100000; __s=683BD997-42FE722901BB234E92-52ADB4D; __l=683BD997-42FE722901BB234E92-52ADB4D; _ga=GA1.1.865788991.1748752797; accessAgeDisclaimerPH=1; googtrans=/auto/zh-CN; googtrans=/auto/zh-CN; ua=5088e51afb755e3294a3359f17dd8eee; il=v11msoKzX_h0h258sWpnEAzxOpywRHt3No__oI88GHuIUxNzY0Njk2NzIzOVY2Q1c2d2l2eGp4T19pOW5CVV9JSU1FQ0NGWW01YWlLNnZ5Y251LQ..; cookieConsent=2; bs=e1649232670c3a49db241055d6ccf891; bsdd=e1649232670c3a49db241055d6ccf891; htjf-mobile=3; tj_UUID=ChBNOIxil6REvZRCDWP3pmmCEgwIjYi1vgYQmfiBowEYASIgZTE2NDkyMzI2NzBjM2E0OWRiMjQxMDU1ZDZjY2Y4OTE=; tj_UUID_v2=ChBNOIxil6REvZRCDWP3pmmCEgwIjYi1vgYQmfiBowEYASIgZTE2NDkyMzI2NzBjM2E0OWRiMjQxMDU1ZDZjY2Y4OTE=; _ga_B39RFFWGYY=GS2.1.s1748884900$o2$g1$t1748885526$j58$l0$h0"
|
||||||
raw_cookie_data = self.proxies['cookie']
|
|
||||||
|
|
||||||
# 2. 如果 extendDict 中有 'json' 字段,从该 URL 获取 JSON 并提取 cookie
|
extendDict = self.extendDict
|
||||||
if 'json' in self.proxies:
|
|
||||||
try:
|
|
||||||
# 注意:这里假设 self.fetch 是一个已有的方法来执行请求
|
|
||||||
# 如果没有,你需要替换为 requests.get() 或其他请求方式
|
|
||||||
r = requests.get(self.proxies['json'], timeout=10, proxies=self.proxies, headers=self.headers)
|
|
||||||
r.raise_for_status() # 检查请求是否成功
|
|
||||||
if 'cookie' in r.json():
|
|
||||||
raw_cookie_data = r.json()['cookie']
|
|
||||||
except Exception as e:
|
|
||||||
print(f"从 JSON URL 获取 Cookie 失败: {e}")
|
|
||||||
raw_cookie_data = '' # 获取失败则清空
|
|
||||||
|
|
||||||
# 3. 如果 raw_cookie_data 是一个 URL (http/https开头),则请求该 URL 获取纯文本 Cookie
|
if 'cookie' in extendDict:
|
||||||
if isinstance(raw_cookie_data, str) and raw_cookie_data.startswith('http'):
|
cookie_string = extendDict['cookie']
|
||||||
try:
|
|
||||||
r = requests.get(raw_cookie_data, timeout=10, proxies=self.proxies, headers=self.headers)
|
|
||||||
r.raise_for_status()
|
|
||||||
raw_cookie_data = r.text.strip()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"从纯文本 URL 获取 Cookie 失败: {e}")
|
|
||||||
raw_cookie_data = ''
|
|
||||||
|
|
||||||
# 4. 将获取到的原始 Cookie 数据解析并更新到 session 中
|
if cookie_string == '':
|
||||||
# 如果 raw_cookie_data 仍然为空或 '{}',则不处理
|
cookie_string = '{}'
|
||||||
if raw_cookie_data and raw_cookie_data != '{}':
|
|
||||||
cookies_to_set = {}
|
|
||||||
if isinstance(raw_cookie_data, dict):
|
|
||||||
# 如果直接是字典类型,直接使用
|
|
||||||
cookies_to_set = raw_cookie_data
|
|
||||||
elif isinstance(raw_cookie_data, str):
|
|
||||||
try:
|
|
||||||
# 尝试解析为 JSON 字典 (例如 {"name": "value"})
|
|
||||||
cookies_to_set = json.loads(raw_cookie_data)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
# 如果不是 JSON,尝试按字符串格式解析 (例如 "name1=value1; name2=value2")
|
|
||||||
for part in raw_cookie_data.split('; '):
|
|
||||||
if '=' in part:
|
|
||||||
key, value = part.split('=', 1)
|
|
||||||
cookies_to_set[key.strip()] = value.strip()
|
|
||||||
|
|
||||||
if cookies_to_set:
|
|
||||||
self.session.cookies.update(cookies_to_set)
|
|
||||||
print("Cookie 已成功加载并更新到会话。")
|
|
||||||
else:
|
else:
|
||||||
print("未解析到有效的 Cookie 数据。")
|
# 将 Cookie 字符串解析为字典
|
||||||
else:
|
parsed_cookies = {}
|
||||||
print("未指定外部 Cookie 或 Cookie 数据为空。")
|
for part in cookie_string.split('; '):
|
||||||
# --- 外部导入 Cookie 逻辑结束 ---
|
if '=' in part:
|
||||||
|
key, value = part.split('=', 1) # 只在第一个等号处分割,因为值可能包含等号
|
||||||
|
parsed_cookies[key.strip()] = value.strip() # strip() 用于去除可能存在的空格
|
||||||
|
|
||||||
|
self.session.cookies.update(parsed_cookies)
|
||||||
|
# ==================================
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user