feat(adult): 添加 GayVidsClub 网站支持

- 新增 GayVidsClub 网站的视频爬虫实现
- 支持最新、分类、搜索等功能
- 增加阿里云盘、夸克网盘等解析服务支持
- 优化海报图片处理,确保使用横版图片
This commit is contained in:
Wang.Luo 2025-09-06 00:57:18 +08:00
parent 5d9fe392a9
commit 5fd834b793
6 changed files with 1168 additions and 0 deletions

View File

@ -0,0 +1,362 @@
# -*- coding: utf-8 -*-
#author:嗷呜群fans&claude4⚡
import json
import sys
import re
import time
from base64 import b64encode
from urllib.parse import urljoin, urlencode
import requests
from pyquery import PyQuery as pq
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
try:
self.proxies = json.loads(extend) if extend else {}
except:
self.proxies = {}
if isinstance(self.proxies, dict) and 'proxy' in self.proxies and isinstance(self.proxies['proxy'], dict):
self.proxies = self.proxies['proxy']
fixed = {}
for k, v in (self.proxies or {}).items():
if isinstance(v, str) and not v.startswith('http'):
fixed[k] = f'http://{v}'
else:
fixed[k] = v
self.proxies = fixed
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.3,en;q=0.2',
'Referer': 'https://gayvidsclub.com/',
'Origin': 'https://gayvidsclub.com',
}
self.host = "https://gayvidsclub.com"
self.session = Session()
self.session.proxies.update(self.proxies)
self.session.headers.update(self.headers)
def getName(self):
return "GayVidsClub"
def isVideoFormat(self, url):
return '.m3u8' in url or '.mp4' in url
def manualVideoCheck(self):
return True
def destroy(self):
pass
def homeContent(self, filter):
result = {}
cateManual = {
"最新": "/all-gay-porn/",
"COAT": "/all-gay-porn/coat/",
"MEN'S RUSH.TV": "/all-gay-porn/mens-rush-tv/",
"HUNK CHANNEL": "/all-gay-porn/hunk-channel/",
"KO": "/all-gay-porn/ko/",
"EXFEED": "/all-gay-porn/exfeed/",
"BRAVO!": "/all-gay-porn/bravo/",
"STR8BOYS": "/all-gay-porn/str8boys/",
"G-BOT": "/all-gay-porn/g-bot/"
}
classes = []
filters = {}
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
result['filters'] = filters
return result
def homeVideoContent(self):
data = self.fetchPage("/")
vlist = self.getlist(data("article"))
if not vlist:
data = self.fetchPage('/all-gay-porn/')
vlist = self.getlist(data("article"))
return {'list': vlist}
def categoryContent(self, tid, pg, filter, extend):
result = {}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
if pg == 1:
url = tid
else:
url = f"{tid}page/{pg}/"
data = self.fetchPage(url)
vdata = self.getlist(data("article"))
result['list'] = vdata
return result
def detailContent(self, ids):
data = self.fetchPage(ids[0])
title = data('h1').text().strip()
iframe_src = None
iframe_elem = data('iframe')
if iframe_elem:
iframe_src = iframe_elem.attr('src')
if not iframe_src:
scripts = data('script')
for script in scripts.items():
script_text = script.text()
if 'iframe' in script_text and 'src' in script_text:
matches = re.findall(r'iframe.*?src=[\'"](https?://[^\'"]+)[\'"]', script_text)
if matches:
iframe_src = matches[0]
break
# 获取海报图片 - 确保使用横版图片
vod_pic = ""
img_elem = data('img')
if img_elem:
vod_pic = img_elem.attr('src')
# 确保使用横版海报图
if vod_pic and ('poster' in vod_pic or 'cover' in vod_pic):
# 已经是横版图片,不做处理
pass
elif vod_pic:
# 尝试转换为横版图片
vod_pic = self.ensure_horizontal_poster(vod_pic)
vod = {
'vod_name': title,
'vod_content': 'GayVidsClub视频',
'vod_tag': 'GayVidsClub',
'vod_pic': vod_pic, # 添加海报图片
'vod_play_from': 'GayVidsClub',
'vod_play_url': ''
}
play_lines = []
if iframe_src:
if not iframe_src.startswith('http'):
iframe_src = urljoin(self.host, iframe_src)
play_lines.append(f"直连${self.e64(iframe_src)}")
play_lines.append(f"嗅探${self.e64(ids[0])}")
if iframe_src:
play_lines.append(f"阿里云盘解析${self.e64(iframe_src)}")
play_lines.append(f"夸克网盘解析${self.e64(iframe_src)}")
play_lines.append(f"115网盘解析${self.e64(iframe_src)}")
play_lines.append(f"迅雷解析${self.e64(iframe_src)}")
play_lines.append(f"PikPak解析${self.e64(iframe_src)}")
play_lines.append(f"手机推送${iframe_src}")
else:
fallback_url = ids[0]
play_lines.append(f"阿里云盘解析${self.e64(fallback_url)}")
play_lines.append(f"夸克网盘解析${self.e64(fallback_url)}")
play_lines.append(f"115网盘解析${self.e64(fallback_url)}")
play_lines.append(f"迅雷解析${self.e64(fallback_url)}")
play_lines.append(f"PikPak解析${self.e64(fallback_url)}")
play_lines.append(f"手机推送${fallback_url}")
vod['vod_play_url'] = '#'.join(play_lines)
return {'list': [vod]}
def searchContent(self, key, quick, pg="1"):
if pg == 1:
url = f"/?s={key}"
else:
url = f"/page/{pg}/?s={key}"
data = self.fetchPage(url)
return {'list': self.getlist(data("article")), 'page': pg}
def playerContent(self, flag, id, vipFlags):
url = self.d64(id)
if "直连" in flag:
return {'parse': 0, 'url': url, 'header': self.headers}
elif "嗅探" in flag:
return {'parse': 1, 'url': url, 'header': self.headers}
elif "阿里云盘解析" in flag:
return self.parse_with_aliyun(url)
elif "夸克网盘解析" in flag:
return self.parse_with_quark(url)
elif "115网盘解析" in flag:
return self.parse_with_115(url)
elif "迅雷解析" in flag:
return self.parse_with_thunder(url)
elif "PikPak解析" in flag:
return self.parse_with_pikpak(url)
elif "手机推送" in flag:
return {'parse': 1, 'url': url, 'header': self.headers}
else:
return {'parse': 1, 'url': url, 'header': self.headers}
def fetchPage(self, url):
if not url.startswith('http'):
url = urljoin(self.host, url)
response = self.session.get(url)
return pq(response.text)
def getlist(self, items):
vlist = []
for item in items.items():
vid = item.find('a').attr('href')
img = item.find('img').attr('src')
name = item.find('h2').text()
if not name:
name = item.find('h3').text()
# 确保使用横版海报图
if img:
if '?' in img:
img = img.split('?')[0]
# 确保使用横版图片
img = self.ensure_horizontal_poster(img)
vlist.append({
'vod_id': vid,
'vod_name': name,
'vod_pic': img,
'vod_remarks': '',
'style': {'type': 'rect', 'ratio': 1.33} # 添加横版样式
})
return vlist
def ensure_horizontal_poster(self, img_url):
"""
确保使用横版海报图片
"""
if not img_url:
return img_url
# 如果已经是横版图片,直接返回
if 'poster' in img_url or 'cover' in img_url:
return img_url
# 尝试转换为横版图片
# 常见的竖版图片标识
vertical_indicators = ['thumb', 'vertical', 'portrait', 'square']
# 常见的横版图片标识
horizontal_indicators = ['poster', 'cover', 'horizontal', 'landscape']
# 检查是否是竖版图片
is_vertical = any(indicator in img_url for indicator in vertical_indicators)
if is_vertical:
# 尝试转换为横版图片
for v_indicator in vertical_indicators:
for h_indicator in horizontal_indicators:
if v_indicator in img_url:
# 替换竖版标识为横版标识
new_url = img_url.replace(v_indicator, h_indicator)
# 检查新URL是否有效
try:
response = self.session.head(new_url, timeout=3)
if response.status_code == 200:
return new_url
except:
continue
# 如果无法转换,尝试添加横版参数
if '?' in img_url:
new_url = img_url + '&type=horizontal'
else:
new_url = img_url + '?type=horizontal'
return new_url
return img_url
def e64(self, data):
return b64encode(data.encode()).decode()
def d64(self, data):
from base64 import b64decode
return b64decode(data).decode()
def parse_with_aliyun(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': 'aliyun',
'message': '使用阿里云盘解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}
def parse_with_quark(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': 'quark',
'message': '使用夸克网盘解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}
def parse_with_115(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': '115',
'message': '使用115网盘解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}
def parse_with_thunder(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': 'thunder',
'message': '使用迅雷解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}
def parse_with_pikpak(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': 'pikpak',
'message': '使用PikPak解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}

View File

@ -0,0 +1,243 @@
# coding=utf-8
# !/usr/bin/python
import sys, re
import base64
import hashlib
import requests
from typing import Tuple
from base.spider import Spider
from datetime import datetime, timedelta
from urllib.parse import quote, unquote
from urllib3.util.retry import Retry
sys.path.append('..')
# 搜索用户名,关键词格式为“类别+空格+关键词”
# 类别在标签上已注明比如“女主播g”则搜索类别为“g”
# 搜索“g per”则在“女主播”中搜索“per”, 关键词不区分大小写但至少3位否则空结果
class Spider(Spider):
def init(self, extend="{}"):
origin = 'https://zh.stripchat.com'
self.host = origin
self.headers = {
'Origin': origin,
'Referer': f"{origin}/",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0'
}
self.stripchat_key = self.decode_key_compact()
# 缓存字典
self._hash_cache = {}
self.create_session_with_retry()
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
def homeContent(self, filter):
CLASSES = [{'type_name': '女主播g', 'type_id': 'girls'}, {'type_name': '情侣c', 'type_id': 'couples'}, {'type_name': '男主播m', 'type_id': 'men'}, {'type_name': '跨性别t', 'type_id': 'trans'}]
VALUE = ({'n': '中国', 'v': 'tagLanguageChinese'}, {'n': '亚洲', 'v': 'ethnicityAsian'}, {'n': '白人', 'v': 'ethnicityWhite'}, {'n': '拉丁', 'v': 'ethnicityLatino'}, {'n': '混血', 'v': 'ethnicityMultiracial'}, {'n': '印度', 'v': 'ethnicityIndian'}, {'n': '阿拉伯', 'v': 'ethnicityMiddleEastern'}, {'n': '黑人', 'v': 'ethnicityEbony'})
VALUE_MEN = ({'n': '情侣', 'v': 'sexGayCouples'}, {'n': '直男', 'v': 'orientationStraight'})
TIDS = ('girls', 'couples', 'men', 'trans')
filters = {
tid: [{'key': 'tag', 'value': VALUE_MEN + VALUE if tid == 'men' else VALUE}]
for tid in TIDS
}
return {
'class': CLASSES,
'filters': filters
}
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
limit = 60
offset = limit * (int(pg) - 1)
url = f"{self.host}/api/front/models?improveTs=false&removeShows=false&limit={limit}&offset={offset}&primaryTag={tid}&sortBy=stripRanking&rcmGrp=A&rbCnGr=true&prxCnGr=false&nic=false"
if 'tag' in extend:
url += "&filterGroupTags=%5B%5B%22" + extend['tag'] + "%22%5D%5D"
rsp = self.fetch(url).json()
videos = [
{
"vod_id": str(vod['username']).strip(),
"vod_name": f"{self.country_code_to_flag(str(vod['country']).strip())}{str(vod['username']).strip()}",
"vod_pic": f"https://img.doppiocdn.net/thumbs/{vod['snapshotTimestamp']}/{vod['id']}",
"vod_remarks": "" if vod.get('status') == "public" else "🎫"
}
for vod in rsp.get('models', [])
]
total = int(rsp.get('filteredCount', 0))
return {
"list": videos,
"page": pg,
"pagecount": (total + limit - 1) // limit,
"limit": limit,
"total": total
}
def detailContent(self, array):
username = array[0]
rsp = self.fetch(f"{self.host}/api/front/v2/models/username/{username}/cam").json()
info = rsp['cam']
user = rsp['user']['user']
id = str(user['id'])
country = str(user['country']).strip()
isLive = "" if user['isLive'] else " 已下播"
flag = self.country_code_to_flag(country)
remark, startAt = '', ''
if show := info.get('show'):
startAt = show.get('createdAt')
elif show := info.get('groupShowAnnouncement'):
startAt = show.get('startAt')
if startAt:
BJtime = (datetime.strptime(startAt, "%Y-%m-%dT%H:%M:%SZ") + timedelta(hours=8)).strftime("%m月%d%H:%M")
remark = f"🎫 始于 {BJtime}"
vod = {
"vod_id": id,
"vod_name": str(info['topic']).strip(),
"vod_pic": str(user['avatarUrl']),
"vod_director": f"{flag}{username}{isLive}",
"vod_remarks": remark,
'vod_play_from': 'StripChat',
'vod_play_url': f"{id}${id}"
}
return {'list': [vod]}
def process_key(self, key: str) -> Tuple[str, str]:
tags = {'G': 'girls', 'C': 'couples', 'M': 'men', 'T': 'trans'}
parts = key.split(maxsplit=1) # 仅分割第一个空格
if len(parts) > 1 and (tag := tags.get(parts[0].upper())):
return tag, parts[1].strip()
return 'girls', key.strip()
def searchContent(self, key, quick, pg="1"):
result = {}
if int(pg) > 1:
return result
tag, key = self.process_key(key)
url = f"{self.host}/api/front/v4/models/search/group/username?query={key}&limit=900&primaryTag={tag}"
rsp = self.fetch(url).json()
result['list'] = [
{
"vod_id": str(user['username']).strip(),
"vod_name": f"{self.country_code_to_flag(str(user['country']).strip())}{user['username']}",
"vod_pic": f"https://img.doppiocdn.net/thumbs/{user['snapshotTimestamp']}/{user['id']}",
"vod_remarks": "" if user['status'] == "public" else "🎫"
}
for user in rsp.get('models', [])
if user['isLive'] # 过滤条件
]
return result
def playerContent(self, flag, id, vipFlags):
url = f"https://edge-hls.doppiocdn.net/hls/{id}/master/{id}_auto.m3u8?playlistType=lowLatency"
rsp = self.fetch(url)
lines = rsp.text.strip().split('\n')
psch, pkey = '', ''
url = []
for i, line in enumerate(lines):
if line.startswith('#EXT-X-MOUFLON:'):
if parts := line.split(':'):
if len(parts) >= 4:
psch, pkey = parts[2], parts[3]
if '#EXT-X-STREAM-INF' in line:
name_start = line.find('NAME="') + 6
name_end = line.find('"', name_start)
qn = line[name_start:name_end]
# URL在下一行
url_base = lines[i + 1]
# 组合最终的URL并加上psch和pkey参数
full_url = f"{url_base}&psch={psch}&pkey={pkey}"
proxy_url = f"{self.getProxyUrl()}&url={quote(full_url)}"
# 将画质和URL添加到列表中
url.extend([qn, proxy_url])
return {
"url": url,
"parse": '0',
"contentType": '',
"header": self.headers
}
def localProxy(self, param):
url = unquote(param['url'])
data = self.fetch(url)
if data.status_code == 403:
data = self.fetch(re.sub(r'\d+p\d*\.m3u8', '160p_blurred.m3u8', url))
if data.status_code != 200:
return [404, "text/plain", ""]
data = data.text
if "#EXT-X-MOUFLON:FILE" in data:
data = self.process_m3u8_content_v2(data)
return [200, "application/vnd.apple.mpegur", data]
def process_m3u8_content_v2(self, m3u8_content):
lines = m3u8_content.strip().split('\n')
for i, line in enumerate(lines):
if (line.startswith('#EXT-X-MOUFLON:FILE:') and 'media.mp4' in lines[i + 1]):
encrypted_data = line.split(':', 2)[2].strip()
try:
decrypted_data = self.decrypt(encrypted_data, self.stripchat_key)
except Exception as e:
decrypted_data = self.decrypt(encrypted_data, "Zokee2OhPh9kugh4")
lines[i + 1] = lines[i + 1].replace('media.mp4', decrypted_data)
return '\n'.join(lines)
def country_code_to_flag(self, country_code):
if len(country_code) != 2 or not country_code.isalpha():
return country_code
flag_emoji = ''.join([chr(ord(c.upper()) - ord('A') + 0x1F1E6) for c in country_code])
return flag_emoji
def decode_key_compact(self):
base64_str = "NTEgNzUgNjUgNjEgNmUgMzQgNjMgNjEgNjkgMzkgNjIgNmYgNGEgNjEgMzUgNjE="
decoded = base64.b64decode(base64_str).decode('utf-8')
key_bytes = bytes(int(hex_str, 16) for hex_str in decoded.split(" "))
return key_bytes.decode('utf-8')
def compute_hash(self, key: str) -> bytes:
"""计算并缓存SHA-256哈希"""
if key not in self._hash_cache:
sha256 = hashlib.sha256()
sha256.update(key.encode('utf-8'))
self._hash_cache[key] = sha256.digest()
return self._hash_cache[key]
def decrypt(self, encrypted_b64: str, key: str) -> str:
# 修复Base64填充
padding = len(encrypted_b64) % 4
if padding:
encrypted_b64 += '=' * (4 - padding)
# 计算哈希并解密
hash_bytes = self.compute_hash(key)
encrypted_data = base64.b64decode(encrypted_b64)
# 异或解密
decrypted_bytes = bytearray()
for i, cipher_byte in enumerate(encrypted_data):
key_byte = hash_bytes[i % len(hash_bytes)]
decrypted_bytes.append(cipher_byte ^ key_byte)
return decrypted_bytes.decode('utf-8')
def create_session_with_retry(self):
self.session = requests.Session()
retry_strategy = Retry(
total = 3,
backoff_factor = 0.3,
status_forcelist = [429, 500, 502, 503, 504] # 需要重试的状态码
)
adapter = requests.adapters.HTTPAdapter(max_retries=retry_strategy)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
def fetch(self, url):
return self.session.get(url, headers=self.headers, timeout=10)

View File

@ -0,0 +1,533 @@
import re
import sys
import urllib.parse
import threading
import time
import requests
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def __init__(self):
# 基础配置
self.name = '好色TV'
self.host = 'https://hsex.icu/'
self.candidate_hosts = [
"https://hsex.icu/",
"https://hsex1.icu/",
"https://hsex.tv/"
]
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Referer': self.host
}
self.timeout = 5000
# 分类映射关键修复视频分类url_suffix设为空适配list-{pg}.htm格式
self.class_map = {
'视频': {'type_id': 'list', 'url_suffix': ''}, # 修复点1视频分类后缀为空
'周榜': {'type_id': 'top7', 'url_suffix': 'top7'},
'月榜': {'type_id': 'top', 'url_suffix': 'top'},
'5分钟+': {'type_id': '5min', 'url_suffix': '5min'},
'10分钟+': {'type_id': 'long', 'url_suffix': 'long'}
}
def getName(self):
return self.name
def init(self, extend=""):
# 尝试获取最快可用域名
self.host = self.get_fastest_host()
self.headers['Referer'] = self.host
def isVideoFormat(self, url):
if not url:
return False
return any(fmt in url.lower() for fmt in ['.mp4', '.m3u8', '.flv', '.avi'])
def manualVideoCheck(self):
def check(url):
if not self.isVideoFormat(url):
return False
try:
resp = self.fetch(url, headers=self.headers, method='HEAD', timeout=3)
return resp.status_code in (200, 302) and 'video' in resp.headers.get('Content-Type', '')
except:
return False
return check
def get_fastest_host(self):
"""测试候选域名,返回最快可用的"""
results = {}
threads = []
def test_host(url):
try:
start_time = time.time()
resp = requests.head(url, headers=self.headers, timeout=2, allow_redirects=False)
if resp.status_code in (200, 301, 302):
delay = (time.time() - start_time) * 1000
results[url] = delay
else:
results[url] = float('inf')
except:
results[url] = float('inf')
for host in self.candidate_hosts:
t = threading.Thread(target=test_host, args=(host,))
threads.append(t)
t.start()
for t in threads:
t.join()
valid_hosts = [(h, d) for h, d in results.items() if d != float('inf')]
return valid_hosts[0][0] if valid_hosts else self.candidate_hosts[0]
def homeContent(self, filter):
result = {}
# 构造分类列表
classes = []
for name, info in self.class_map.items():
classes.append({
'type_name': name,
'type_id': info['type_id']
})
result['class'] = classes
try:
# 获取首页内容
html = self.fetch_with_retry(self.host, retry=2, timeout=5).text
data = pq(html)
# 提取视频列表
vlist = []
items = data('.row .col-xs-6.col-md-3')
for item in items.items():
try:
title = item('h5').text().strip()
if not title:
continue
# 提取图片URL
style = item('.image').attr('style') or ''
pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', style)
vod_pic = pic_match.group(1) if pic_match else ''
if vod_pic and not vod_pic.startswith('http'):
vod_pic = f"{self.host.rstrip('/')}/{vod_pic.lstrip('/')}"
# 提取时长备注
desc = item('.duration').text().strip() or '未知'
# 提取视频ID
href = item('a').attr('href') or ''
if not href:
continue
vod_id = href.split('/')[-1]
if not vod_id.endswith('.htm'):
vod_id += '.htm'
vlist.append({
'vod_id': vod_id,
'vod_name': title,
'vod_pic': vod_pic,
'vod_remarks': desc
})
except Exception as e:
print(f"解析首页视频项失败: {e}")
continue
result['list'] = vlist
except Exception as e:
print(f"首页解析失败: {e}")
result['list'] = []
return result
def homeVideoContent(self):
return []
def categoryContent(self, tid, pg, filter, extend):
result = {}
try:
# 匹配分类信息
cate_info = None
for name, info in self.class_map.items():
if info['type_id'] == tid:
cate_info = info
break
if not cate_info:
result['list'] = []
return result
# 关键修复区分视频分类与其他分类的URL格式
if tid == 'list': # 视频分类type_id为list
url = f"{self.host}list-{pg}.htm" # 格式list-1.htm、list-2.htm
else: # 其他分类(周榜/月榜等xxx_list-{pg}.htm
url = f"{self.host}{cate_info['url_suffix']}_list-{pg}.htm"
# 请求分类页
html = self.fetch(url, headers=self.headers, timeout=8).text
html = html.encode('utf-8', errors='ignore').decode('utf-8')
data = pq(html)
# 提取视频列表
vlist = []
items = data('.row .col-xs-6.col-md-3')
for item in items.items():
try:
title = item('h5').text().strip()
if not title:
continue
style = item('.image').attr('style') or ''
pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', style)
vod_pic = pic_match.group(1) if pic_match else ''
if vod_pic and not vod_pic.startswith('http'):
vod_pic = f"{self.host.rstrip('/')}/{vod_pic.lstrip('/')}"
desc = item('.duration').text().strip() or '未知'
href = item('a').attr('href') or ''
if not href:
continue
vod_id = href.split('/')[-1]
if not vod_id.endswith('.htm'):
vod_id += '.htm'
vlist.append({
'vod_id': vod_id,
'vod_name': title,
'vod_pic': vod_pic,
'vod_remarks': desc
})
except Exception as e:
print(f"解析分类视频项失败: {e}")
continue
# 提取总页数
pagecount = 1
try:
pagination = data('.pagination1 li a')
page_nums = []
for a in pagination.items():
text = a.text().strip()
if text.isdigit():
page_nums.append(int(text))
if page_nums:
pagecount = max(page_nums)
except:
pagecount = 1
result['list'] = vlist
result['page'] = pg
result['pagecount'] = pagecount
result['limit'] = len(vlist)
result['total'] = 999999
except Exception as e:
print(f"分类解析失败: {e}")
result['list'] = []
result['page'] = pg
result['pagecount'] = 1
result['limit'] = 0
result['total'] = 0
return result
def detailContent(self, ids):
try:
if not ids or not ids[0]:
return {'list': []}
vod_id = ids[0].strip()
if not vod_id.endswith('.htm'):
vod_id += '.htm'
url = f"{self.host}{vod_id.lstrip('/')}"
html = self.fetch_with_retry(url, retry=2, timeout=8).text
html = html.encode('utf-8', errors='ignore').decode('utf-8')
data = pq(html)
# 提取标题
title = data('.panel-title, .video-title, h1').text().strip() or '未知标题'
# 提取封面图
vod_pic = ''
poster_style = data('.vjs-poster').attr('style') or ''
pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', poster_style)
if pic_match:
vod_pic = pic_match.group(1)
if not vod_pic:
vod_pic = data('.video-pic img, .vjs-poster img, .thumbnail img').attr('src') or ''
if vod_pic and not vod_pic.startswith('http'):
vod_pic = f"{self.host}{vod_pic.lstrip('/')}"
# 提取时长和观看量
duration = '未知'
views = '未知'
info_items = data('.panel-body .col-md-3, .video-info .info-item, .info p')
for item in info_items.items():
text = item.text().strip()
if '时长' in text or 'duration' in text.lower():
duration = text.replace('时长:', '').replace('时长', '').strip()
elif '观看' in text or 'views' in text.lower():
views_match = re.search(r'(\d+\.?\d*[kK]?)次观看', text)
if views_match:
views = views_match.group(1)
else:
views = text.replace('观看:', '').replace('观看', '').strip()
remarks = f"{duration} | {views}"
# 提取播放地址
video_url = ''
m3u8_match = re.search(r'videoUrl\s*=\s*["\']([^"\']+\.m3u8)["\']', html)
if m3u8_match:
video_url = m3u8_match.group(1)
if not video_url:
source = data('source[src*=".m3u8"], source[src*=".mp4"]')
video_url = source.attr('src') or ''
if not video_url:
js_matches = re.findall(r'(https?://[^\s"\']+\.(?:m3u8|mp4))', html)
if js_matches:
video_url = js_matches[0]
if video_url and not video_url.startswith('http'):
video_url = f"{self.host}{video_url.lstrip('/')}"
vod = {
'vod_id': vod_id,
'vod_name': title,
'vod_pic': vod_pic,
'vod_remarks': remarks,
'vod_play_from': '好色TV',
'vod_play_url': f'正片${video_url}' if video_url else '正片$暂无地址'
}
return {'list': [vod]}
except Exception as e:
print(f"详情解析失败: {e}")
return {'list': []}
def searchContent(self, key, quick, pg=1):
try:
# 关键词合法性校验
if not key.strip():
print("搜索关键词不能为空")
return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
# 编码关键词
encoded_key = urllib.parse.quote(key.strip(), encoding='utf-8', errors='replace')
# 构造搜索URL
search_url = f"{self.host}search.htm"
params = {
'search': encoded_key,
'page': int(pg)
}
# 发起请求
resp = self.fetch(
url=search_url,
headers=self.headers,
params=params,
timeout=8
)
if resp.status_code not in (200, 302):
print(f"搜索页面请求失败URL{resp.url},状态码:{resp.status_code}")
return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
# 处理页面内容
html = resp.text.encode('utf-8', errors='ignore').decode('utf-8')
data = pq(html)
# 检测无结果场景
no_result_texts = ['没有找到相关视频', '无搜索结果', 'No results found', '未找到匹配内容']
no_result = any(data(f'div:contains("{text}"), p:contains("{text}")').text() for text in no_result_texts)
if no_result:
print(f"搜索关键词「{key}」第{pg}页无结果")
return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
# 解析搜索结果
vlist = []
items = data('.row .col-xs-6.col-md-3')
for item in items.items():
try:
title = item('h5').text().strip()
if not title:
continue
style = item('.image').attr('style') or ''
pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', style)
vod_pic = pic_match.group(1) if pic_match else ''
if vod_pic and not vod_pic.startswith(('http://', 'https://')):
vod_pic = f"{self.host.rstrip('/')}/{vod_pic.lstrip('/')}"
desc = item('.duration').text().strip() or '未知时长'
href = item('a').attr('href') or ''
if not href:
continue
vod_id = href.split('/')[-1]
if not vod_id.endswith('.htm'):
vod_id += '.htm'
vlist.append({
'vod_id': vod_id,
'vod_name': title,
'vod_pic': vod_pic,
'vod_remarks': desc
})
except Exception as e:
print(f"解析单条搜索结果失败:{e}(跳过该条)")
continue
# 解析总页数
pagecount = 1
try:
pagination = data('.pagination1 li a')
page_nums = []
for a in pagination.items():
text = a.text().strip()
if text.isdigit():
page_nums.append(int(text))
if page_nums:
pagecount = max(page_nums)
print(f"搜索关键词「{key}」分页解析完成,共{pagecount}")
except Exception as e:
print(f"解析分页失败(默认单页):{e}")
pagecount = 1
# 返回结果修复点2补全page键的引号修正语法错误
total = len(vlist) * pagecount
print(f"搜索关键词「{key}」第{pg}页处理完成,结果{len(vlist)}条,总页数{pagecount}")
return {
'list': vlist,
'page': int(pg), # 原代码此处缺少引号,导致语法错误
'pagecount': pagecount,
'limit': len(vlist),
'total': total
}
except Exception as e:
print(f"搜索功能整体异常:{e}")
return {
'list': [],
'page': int(pg), 'pagecount': 1,
'limit': 0,
'total': 0
}
def playerContent(self, flag, id, vipFlags):
headers = self.headers.copy()
headers.update({
'Referer': self.host,
'Origin': self.host.rstrip('/'),
'Host': urllib.parse.urlparse(self.host).netloc,
})
# 根据rule中的double设置
return {
'parse': 1, # 根据rule中的play_parse设置
'url': id,
'header': headers,
'double': True # 根据rule中的double设置
}
def localProxy(self, param):
try:
url = param['url']
if url and not url.startswith(('http://', 'https://')):
url = f"{self.host.rstrip('/')}/{url.lstrip('/')}"
img_headers = self.headers.copy()
img_headers.update({'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8'})
res = self.fetch(url, headers=img_headers, timeout=10)
content_type = res.headers.get('Content-Type', 'image/jpeg')
return [200, content_type, res.content]
except Exception as e:
print(f"图片代理失败: {e}")
return [200, 'image/jpeg', b'']
def fetch_with_retry(self, url, retry=2, timeout=5):
for i in range(retry + 1):
try:
resp = self.fetch(f'https://vpsdn.leuse.top/proxy?single=true&url={urllib.parse.quote(url)}',headers=self.headers, timeout=timeout)
if resp.status_code in (200, 301, 302):
return resp
print(f"请求{url}返回状态码{resp.status_code},重试中...")
except Exception as e:
print(f"{i+1}次请求{url}失败: {e}")
if i < retry:
time.sleep(0.5)
return type('obj', (object,), {'text': '', 'status_code': 404})
def fetch(self, url, headers=None, timeout=5, method='GET', params=None):
headers = headers or self.headers
params = params or {}
try:
if method.upper() == 'GET':
resp = requests.get(
f'https://vpsdn.leuse.top/proxy?single=true&url={urllib.parse.quote(url)}',
headers=headers,
timeout=timeout,
allow_redirects=True,
params=params # 支持GET请求带参数适配搜索分页
)
elif method.upper() == 'HEAD':
resp = requests.head(
f'https://vpsdn.leuse.top/proxy?single=true&url={urllib.parse.quote(url)}',
headers=headers,
timeout=timeout,
allow_redirects=False,
params=params
)
else:
resp = requests.get( # 默认GET请求兼容其他方法调用
f'https://vpsdn.leuse.top/proxy?single=true&url={urllib.parse.quote(url)}',
headers=headers,
timeout=timeout,
allow_redirects=True,
params=params
)
# 自动适配编码,避免中文乱码
if 'charset' in resp.headers.get('Content-Type', '').lower():
resp.encoding = resp.apparent_encoding
else:
resp.encoding = 'utf-8'
return resp
except Exception as e:
print(f"网络请求失败({url}): {e}")
# 返回统一格式空响应,避免后续逻辑崩溃
return type('obj', (object,), {
'text': '',
'status_code': 500,
'headers': {},
'url': url
})
# ------------------------------
# 可选测试代码(运行时注释或删除,用于验证功能)
# ------------------------------
if __name__ == "__main__":
# 初始化爬虫
spider = Spider()
spider.init()
# 测试首页内容
print("=== 测试首页 ===")
home_data = spider.homeContent(filter='')
print(f"首页分类数:{len(home_data['class'])}")
print(f"首页视频数:{len(home_data['list'])}")
# 测试视频分类(修复后的数据获取)
print("\n=== 测试视频分类第1页 ===")
cate_data = spider.categoryContent(tid='list', pg=1, filter='', extend='')
print(f"视频分类第1页视频数{len(cate_data['list'])}")
print(f"视频分类总页数:{cate_data['pagecount']}")
# 测试搜索功能(修复语法错误后)
print("\n=== 测试搜索(关键词:测试) ===")
search_data = spider.searchContent(key="测试", quick=False, pg=1)
print(f"搜索结果数:{len(search_data['list'])}")
print(f"搜索总页数:{search_data['pagecount']}")

View File

@ -1198,6 +1198,24 @@
"searchable": 1,
"quickSearch": 1,
"filterable": 1
},
{
"key": "stripchat",
"name": "stripchat",
"type": 3,
"api": "./PyramidStore/plugin/adult/stripchat.py",
"searchable": 1,
"quickSearch": 1,
"filterable": 1
},
{
"key": "好色TV",
"name": "好色TV",
"type": 3,
"api": "./PyramidStore/plugin/adult/好色TV.py",
"searchable": 1,
"quickSearch": 1,
"filterable": 1
}
],
"lives": [

View File

@ -880,6 +880,12 @@
"type": 3,
"api": "csp_MiSou"
},
{
"key": "派派",
"name": "派派|搜索",
"type": 3,
"api": "csp_PPQPan"
},
{
"key": "短剧大全",
"name": "短剧|大全",
@ -892,6 +898,12 @@
"type": 3,
"api": "csp_DuanjuHJ"
},
{
"key": "短剧鬼鬼",
"name": "短剧|鬼鬼",
"type": 3,
"api": "csp_DuanjuGG"
},
{
"key": "全盘",
"name": "全盘|搜索",

Binary file not shown.