feat(Xhm.py): 优化请求头配置与图片代理逻辑

- 移除冗余的 `requests` 和 `urlparse` 导入
- 简化初始化流程,统一 referer 设置
- 更新默认 User-Agent 及请求头字段,增强浏览器模拟
- 移除自定义 m3u8 和 ts 代理方法,改用通用处理
- 调整图片链接获取方式,不再强制使用代理
- 格式化部分代码以提升可读性
This commit is contained in:
Wang.Luo 2025-10-19 23:28:31 +08:00
parent 3b3360e902
commit e444d7eecd

View File

@ -3,9 +3,6 @@
import json import json
import sys import sys
from base64 import b64decode, b64encode from base64 import b64decode, b64encode
from urllib.parse import urlparse
import requests
from pyquery import PyQuery as pq from pyquery import PyQuery as pq
from requests import Session from requests import Session
sys.path.append('..') sys.path.append('..')
@ -15,28 +12,9 @@ from base.spider import Spider
class Spider(Spider): class Spider(Spider):
def init(self, extend=""): def init(self, extend=""):
try:self.proxies = json.loads(extend)
except:self.proxies = {}
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'dnt': '1',
'sec-ch-ua-mobile': '?0',
'origin': '',
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': f'',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=1, i',
}
self.host = self.gethost() self.host = self.gethost()
self.headers['referer'] = f'{self.host}/'
self.session = Session() self.session = Session()
self.headers.update({'origin': self.host,'referer': f'{self.host}/'})
self.session.proxies.update(self.proxies)
self.session.headers.update(self.headers) self.session.headers.update(self.headers)
pass pass
@ -52,6 +30,27 @@ class Spider(Spider):
def destroy(self): def destroy(self):
pass pass
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-full-version': '"133.0.6943.98"',
'sec-ch-ua-arch': '"x86"',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua-platform-version': '"19.0.0"',
'sec-ch-ua-model': '""',
'sec-ch-ua-full-version-list': '"Not(A:Brand";v="99.0.0.0", "Google Chrome";v="133.0.6943.98", "Chromium";v="133.0.6943.98"',
'dnt': '1',
'upgrade-insecure-requests': '1',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=0, i'
}
def homeContent(self, filter): def homeContent(self, filter):
result = {} result = {}
cateManual = { cateManual = {
@ -70,7 +69,7 @@ class Spider(Spider):
'type_name': k, 'type_name': k,
'type_id': cateManual[k] 'type_id': cateManual[k]
}) })
if k != '4K': filters[cateManual[k]] = [{'key': 'type', 'name': '类型', 'value': [{'n': '4K', 'v': '/4k'}]}] if k !='4K':filters[cateManual[k]]=[{'key':'type','name':'类型','value':[{'n':'4K','v':'/4k'}]}]
result['class'] = classes result['class'] = classes
result['filters'] = filters result['filters'] = filters
return result return result
@ -88,7 +87,7 @@ class Spider(Spider):
result['total'] = 999999 result['total'] = 999999
if tid in ['/4k', '/newest', '/best'] or 'two_click_' in tid: if tid in ['/4k', '/newest', '/best'] or 'two_click_' in tid:
if 'two_click_' in tid: tid = tid.split('click_')[-1] if 'two_click_' in tid: tid = tid.split('click_')[-1]
data = self.getpq(f'{tid}{extend.get("type", "")}/{pg}') data = self.getpq(f'{tid}{extend.get("type","")}/{pg}')
vdata = self.getlist(data(".thumb-list--sidebar .thumb-list__item")) vdata = self.getlist(data(".thumb-list--sidebar .thumb-list__item"))
elif tid == '/channels': elif tid == '/channels':
data = self.getpq(f'{tid}/{pg}') data = self.getpq(f'{tid}/{pg}')
@ -97,7 +96,7 @@ class Spider(Spider):
vdata.append({ vdata.append({
'vod_id': f"two_click_" + i.get('channelURL'), 'vod_id': f"two_click_" + i.get('channelURL'),
'vod_name': i.get('channelName'), 'vod_name': i.get('channelName'),
'vod_pic': self.proxy(i.get('siteLogoURL')), 'vod_pic': i.get('siteLogoURL'),
'vod_year': f'videos:{i.get("videoCount")}', 'vod_year': f'videos:{i.get("videoCount")}',
'vod_tag': 'folder', 'vod_tag': 'folder',
'vod_remarks': f'subscribers:{i["subscriptionModel"].get("subscribers")}', 'vod_remarks': f'subscribers:{i["subscriptionModel"].get("subscribers")}',
@ -122,7 +121,7 @@ class Spider(Spider):
vdata.append({ vdata.append({
'vod_id': f"two_click_" + i.get('pageURL'), 'vod_id': f"two_click_" + i.get('pageURL'),
'vod_name': i.get('name'), 'vod_name': i.get('name'),
'vod_pic': self.proxy(i.get('imageThumbUrl')), 'vod_pic': i.get('imageThumbUrl'),
'vod_remarks': i.get('translatedCountryName'), 'vod_remarks': i.get('translatedCountryName'),
'vod_tag': 'folder', 'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'} 'style': {'ratio': 1.33, 'type': 'rect'}
@ -136,7 +135,7 @@ class Spider(Spider):
vdata.append({ vdata.append({
'vod_id': f"two_click_" + j.get('url'), 'vod_id': f"two_click_" + j.get('url'),
'vod_name': j.get('name'), 'vod_name': j.get('name'),
'vod_pic': self.proxy(j.get('thumb')), 'vod_pic': j.get('thumb'),
'vod_tag': 'folder', 'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'} 'style': {'ratio': 1.33, 'type': 'rect'}
}) })
@ -164,13 +163,12 @@ class Spider(Spider):
plist = [] plist = []
d = djs['xplayerSettings']['sources'] d = djs['xplayerSettings']['sources']
f = d.get('standard') f = d.get('standard')
def custom_sort_key(url): def custom_sort_key(url):
quality = url.split('$')[0] quality = url.split('$')[0]
number = ''.join(filter(str.isdigit, quality)) number = ''.join(filter(str.isdigit, quality))
number = int(number) if number else 0 number = int(number) if number else 0
return -number, quality return -number, quality
if f: if f:
for key, value in f.items(): for key, value in f.items():
if isinstance(value, list): if isinstance(value, list):
@ -183,7 +181,7 @@ class Spider(Spider):
if url := info.get('url'): if url := info.get('url'):
encoded = self.e64(f'{0}@@@@{url}') encoded = self.e64(f'{0}@@@@{url}')
plist.append(f"{format_type}${encoded}") plist.append(f"{format_type}${encoded}")
except Exception as e: except Exception as e:
plist = [f"{vn}${self.e64(f'{1}@@@@{ids[0]}')}"] plist = [f"{vn}${self.e64(f'{1}@@@@{ids[0]}')}"]
print(f"获取视频信息失败: {str(e)}") print(f"获取视频信息失败: {str(e)}")
@ -195,20 +193,31 @@ class Spider(Spider):
return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item")), 'page': pg} return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item")), 'page': pg}
def playerContent(self, flag, id, vipFlags): def playerContent(self, flag, id, vipFlags):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'dnt': '1',
'sec-ch-ua-mobile': '?0',
'origin': self.host,
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': f'{self.host}/',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=1, i',
}
ids = self.d64(id).split('@@@@') ids = self.d64(id).split('@@@@')
if '.m3u8' in ids[1]: ids[1] = self.proxy(ids[1], 'm3u8') return {'parse': int(ids[0]), 'url': ids[1], 'header': headers}
return {'parse': int(ids[0]), 'url': ids[1], 'header': self.headers}
def localProxy(self, param): def localProxy(self, param):
url = self.d64(param['url']) pass
if param.get('type') == 'm3u8':
return self.m3Proxy(url)
else:
return self.tsProxy(url)
def gethost(self): def gethost(self):
try: try:
response = requests.get('https://xhamster.com',proxies=self.proxies,headers=self.headers,allow_redirects=False) response = self.fetch('https://xhamster.com', headers=self.headers, allow_redirects=False)
return response.headers['Location'] return response.headers['Location']
except Exception as e: except Exception as e:
print(f"获取主页失败: {str(e)}") print(f"获取主页失败: {str(e)}")
@ -238,7 +247,7 @@ class Spider(Spider):
vlist.append({ vlist.append({
'vod_id': i('.role-pop').attr('href'), 'vod_id': i('.role-pop').attr('href'),
'vod_name': i('.video-thumb-info a').text(), 'vod_name': i('.video-thumb-info a').text(),
'vod_pic': self.proxy(i('.role-pop img').attr('src')), 'vod_pic': i('.role-pop img').attr('src'),
'vod_year': i('.video-thumb-info .video-thumb-views').text().split(' ')[0], 'vod_year': i('.video-thumb-info .video-thumb-views').text().split(' ')[0],
'vod_remarks': i('.role-pop div[data-role="video-duration"]').text(), 'vod_remarks': i('.role-pop div[data-role="video-duration"]').text(),
'style': {'ratio': 1.33, 'type': 'rect'} 'style': {'ratio': 1.33, 'type': 'rect'}
@ -259,30 +268,3 @@ class Spider(Spider):
jst = json.loads(vhtml.split('initials=')[-1][:-1]) jst = json.loads(vhtml.split('initials=')[-1][:-1])
return jst return jst
def m3Proxy(self, url):
ydata = requests.get(url, headers=self.headers, proxies=self.proxies, allow_redirects=False)
data = ydata.content.decode('utf-8')
if ydata.headers.get('Location'):
url = ydata.headers['Location']
data = requests.get(url, headers=self.headers, proxies=self.proxies).content.decode('utf-8')
lines = data.strip().split('\n')
last_r = url[:url.rfind('/')]
parsed_url = urlparse(url)
durl = parsed_url.scheme + "://" + parsed_url.netloc
for index, string in enumerate(lines):
if '#EXT' not in string:
if 'http' not in string:
domain = last_r if string.count('/') < 2 else durl
string = domain + ('' if string.startswith('/') else '/') + string
lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
data = '\n'.join(lines)
return [200, "application/vnd.apple.mpegur", data]
def tsProxy(self, url):
data = requests.get(url, headers=self.headers, proxies=self.proxies, stream=True)
return [200, data.headers['Content-Type'], data.content]
def proxy(self, data, type='img'):
if data and len(self.proxies):return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
else:return data