feat(PyramidStore): 初始化项目并添加基础配置文件

添加 .gitignore 忽略子仓库的 .git 目录
添加 LICENSE 文件,使用 GNU General Public License v3.0
添加 README.md 说明文档,包含调试示例、免责声明和配置说明
添加 base/localProxy.py 基础代理配置文件
添加版本控制图片文件(二进制差异)
```
This commit is contained in:
2025-10-23 02:14:43 +08:00
commit 3572e29279
356 changed files with 120993 additions and 0 deletions

View File

@@ -0,0 +1,404 @@
# -*- coding: utf-8 -*-
# 🌈 Love
import json
import random
import re
import sys
import threading
import time
from base64 import b64decode, b64encode
from urllib.parse import urlparse
import requests
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
try:self.proxies = json.loads(extend)
except:self.proxies = {}
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cache-Control': 'no-cache',
}
# Use working dynamic URLs directly
self.host = self.get_working_host()
self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
self.log(f"使用站点: {self.host}")
print(f"使用站点: {self.host}")
pass
def getName(self):
return "🌈 51吸瓜"
def isVideoFormat(self, url):
# Treat direct media formats as playable without parsing
return any(ext in (url or '') for ext in ['.m3u8', '.mp4', '.ts'])
def manualVideoCheck(self):
return False
def destroy(self):
pass
def homeContent(self, filter):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'class': [], 'list': []}
data = self.getpq(response.text)
result = {}
classes = []
# Try to get categories from different possible locations
category_selectors = [
'.category-list ul li',
'.nav-menu li',
'.menu li',
'nav ul li'
]
for selector in category_selectors:
for k in data(selector).items():
link = k('a')
href = (link.attr('href') or '').strip()
name = (link.text() or '').strip()
# Skip placeholder or invalid entries
if not href or href == '#' or not name:
continue
classes.append({
'type_name': name,
'type_id': href
})
if classes:
break
# If no categories found, create some default ones
if not classes:
classes = [
{'type_name': '首页', 'type_id': '/'},
{'type_name': '最新', 'type_id': '/latest/'},
{'type_name': '热门', 'type_id': '/hot/'}
]
result['class'] = classes
result['list'] = self.getlist(data('#index article a'))
return result
except Exception as e:
print(f"homeContent error: {e}")
return {'class': [], 'list': []}
def homeVideoContent(self):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': []}
data = self.getpq(response.text)
return {'list': self.getlist(data('#index article a, #archive article a'))}
except Exception as e:
print(f"homeVideoContent error: {e}")
return {'list': []}
def categoryContent(self, tid, pg, filter, extend):
try:
if '@folder' in tid:
id = tid.replace('@folder', '')
videos = self.getfod(id)
else:
# Build URL properly
if tid.startswith('/'):
if pg and pg != '1':
url = f"{self.host}{tid}page/{pg}/"
else:
url = f"{self.host}{tid}"
else:
url = f"{self.host}/{tid}"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a'), tid)
result = {}
result['list'] = videos
result['page'] = pg
result['pagecount'] = 1 if '@folder' in tid else 99999
result['limit'] = 90
result['total'] = 999999
return result
except Exception as e:
print(f"categoryContent error: {e}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
def detailContent(self, ids):
try:
url = f"{self.host}{ids[0]}" if not ids[0].startswith('http') else ids[0]
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [{'vod_play_from': '51吸瓜', 'vod_play_url': f'页面加载失败${url}'}]}
data = self.getpq(response.text)
vod = {'vod_play_from': '51吸瓜'}
# Get content/description
try:
clist = []
if data('.tags .keywords a'):
for k in data('.tags .keywords a').items():
title = k.text()
href = k.attr('href')
if title and href:
clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
vod['vod_content'] = ' '.join(clist) if clist else data('.post-title').text()
except:
vod['vod_content'] = data('.post-title').text() or '51吸瓜视频'
# Get video URLs (build episode list when multiple players exist)
try:
plist = []
used_names = set()
if data('.dplayer'):
for c, k in enumerate(data('.dplayer').items(), start=1):
config_attr = k.attr('data-config')
if config_attr:
try:
config = json.loads(config_attr)
video_url = config.get('video', {}).get('url', '')
# Determine a readable episode name from nearby headings if present
ep_name = ''
try:
parent = k.parents().eq(0)
# search up to a few ancestors for a heading text
for _ in range(3):
if not parent: break
heading = parent.find('h2, h3, h4').eq(0).text() or ''
heading = heading.strip()
if heading:
ep_name = heading
break
parent = parent.parents().eq(0)
except Exception:
ep_name = ''
base_name = ep_name if ep_name else f"视频{c}"
name = base_name
count = 2
# Ensure the name is unique
while name in used_names:
name = f"{base_name} {count}"
count += 1
used_names.add(name)
if video_url:
self.log(f"解析到视频: {name} -> {video_url}")
print(f"解析到视频: {name} -> {video_url}")
plist.append(f"{name}${video_url}")
except:
continue
if plist:
self.log(f"拼装播放列表,共{len(plist)}")
print(f"拼装播放列表,共{len(plist)}")
vod['vod_play_url'] = '#'.join(plist)
else:
vod['vod_play_url'] = f"未找到视频源${url}"
except Exception as e:
vod['vod_play_url'] = f"视频解析失败${url}"
return {'list': [vod]}
except Exception as e:
print(f"detailContent error: {e}")
return {'list': [{'vod_play_from': '51吸瓜', 'vod_play_url': f'详情页加载失败${ids[0] if ids else ""}'}]}
def searchContent(self, key, quick, pg="1"):
try:
url = f"{self.host}/search/{key}/{pg}" if pg != "1" else f"{self.host}/search/{key}/"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a'))
return {'list': videos, 'page': pg}
except Exception as e:
print(f"searchContent error: {e}")
return {'list': [], 'page': pg}
def playerContent(self, flag, id, vipFlags):
url = id
p = 1
if self.isVideoFormat(url):
# m3u8/mp4 direct play; when using proxy setting, wrap to proxy for m3u8
if '.m3u8' in url:
url = self.proxy(url)
p = 0
self.log(f"播放请求: parse={p}, url={url}")
print(f"播放请求: parse={p}, url={url}")
return {'parse': p, 'url': url, 'header': self.headers}
def localProxy(self, param):
if param.get('type') == 'img':
res=requests.get(param['url'], headers=self.headers, proxies=self.proxies, timeout=10)
return [200,res.headers.get('Content-Type'),self.aesimg(res.content)]
elif param.get('type') == 'm3u8':return self.m3Proxy(param['url'])
else:return self.tsProxy(param['url'])
def proxy(self, data, type='m3u8'):
if data and len(self.proxies):return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
else:return data
def m3Proxy(self, url):
url=self.d64(url)
ydata = requests.get(url, headers=self.headers, proxies=self.proxies, allow_redirects=False)
data = ydata.content.decode('utf-8')
if ydata.headers.get('Location'):
url = ydata.headers['Location']
data = requests.get(url, headers=self.headers, proxies=self.proxies).content.decode('utf-8')
lines = data.strip().split('\n')
last_r = url[:url.rfind('/')]
parsed_url = urlparse(url)
durl = parsed_url.scheme + "://" + parsed_url.netloc
iskey=True
for index, string in enumerate(lines):
if iskey and 'URI' in string:
pattern = r'URI="([^"]*)"'
match = re.search(pattern, string)
if match:
lines[index] = re.sub(pattern, f'URI="{self.proxy(match.group(1), "mkey")}"', string)
iskey=False
continue
if '#EXT' not in string:
if 'http' not in string:
domain = last_r if string.count('/') < 2 else durl
string = domain + ('' if string.startswith('/') else '/') + string
lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
data = '\n'.join(lines)
return [200, "application/vnd.apple.mpegur", data]
def tsProxy(self, url):
url = self.d64(url)
data = requests.get(url, headers=self.headers, proxies=self.proxies, stream=True)
return [200, data.headers['Content-Type'], data.content]
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def get_working_host(self):
"""Get working host from known dynamic URLs"""
# Known working URLs from the dynamic gateway
dynamic_urls = [
'https://artist.vgwtswi.xyz',
'https://ability.vgwtswi.xyz',
'https://am.vgwtswi.xyz'
]
# Test each URL to find a working one
for url in dynamic_urls:
try:
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
if response.status_code == 200:
# Verify it has the expected content structure
data = self.getpq(response.text)
articles = data('#index article a')
if len(articles) > 0:
self.log(f"选用可用站点: {url}")
print(f"选用可用站点: {url}")
return url
except Exception as e:
continue
# Fallback to first URL if none work (better than crashing)
self.log(f"未检测到可用站点,回退: {dynamic_urls[0]}")
print(f"未检测到可用站点,回退: {dynamic_urls[0]}")
return dynamic_urls[0]
def getlist(self, data, tid=''):
videos = []
l = '/mrdg' in tid
for k in data.items():
a = k.attr('href')
b = k('h2').text()
# Some pages might not include datePublished; use a fallback
c = k('span[itemprop="datePublished"]').text() or k('.post-meta, .entry-meta, time').text()
if a and b:
videos.append({
'vod_id': f"{a}{'@folder' if l else ''}",
'vod_name': b.replace('\n', ' '),
'vod_pic': self.getimg(k('script').text()),
'vod_remarks': c or '',
'vod_tag': 'folder' if l else '',
'style': {"type": "rect", "ratio": 1.33}
})
return videos
def getfod(self, id):
url = f"{self.host}{id}"
data = self.getpq(requests.get(url, headers=self.headers, proxies=self.proxies).text)
vdata=data('.post-content[itemprop="articleBody"]')
r=['.txt-apps','.line','blockquote','.tags','.content-tabs']
for i in r:vdata.remove(i)
p=vdata('p')
videos=[]
for i,x in enumerate(vdata('h2').items()):
c=i*2
videos.append({
'vod_id': p.eq(c)('a').attr('href'),
'vod_name': p.eq(c).text(),
'vod_pic': f"{self.getProxyUrl()}&url={p.eq(c+1)('img').attr('data-xkrkllgl')}&type=img",
'vod_remarks':x.text()
})
return videos
def getimg(self, text):
match = re.search(r"loadBannerDirect\('([^']+)'", text)
if match:
url = match.group(1)
return f"{self.getProxyUrl()}&url={url}&type=img"
else:
return ''
def aesimg(self, word):
key = b'f5d965df75336270'
iv = b'97b60394abc2fbe1'
cipher = AES.new(key, AES.MODE_CBC, iv)
decrypted = unpad(cipher.decrypt(word), AES.block_size)
return decrypted
def getpq(self, data):
try:
return pq(data)
except Exception as e:
print(f"{str(e)}")
return pq(data.encode('utf-8'))

View File

@@ -0,0 +1,404 @@
# -*- coding: utf-8 -*-
# 🌈 Love
import json
import random
import re
import sys
import threading
import time
from base64 import b64decode, b64encode
from urllib.parse import urlparse
import requests
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
try:self.proxies = json.loads(extend)
except:self.proxies = {}
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cache-Control': 'no-cache',
}
# Use working dynamic URLs directly
self.host = self.get_working_host()
self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
self.log(f"使用站点: {self.host}")
print(f"使用站点: {self.host}")
pass
def getName(self):
return "🌈 51吸瓜"
def isVideoFormat(self, url):
# Treat direct media formats as playable without parsing
return any(ext in (url or '') for ext in ['.m3u8', '.mp4', '.ts'])
def manualVideoCheck(self):
return False
def destroy(self):
pass
def homeContent(self, filter):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'class': [], 'list': []}
data = self.getpq(response.text)
result = {}
classes = []
# Try to get categories from different possible locations
category_selectors = [
'.category-list ul li',
'.nav-menu li',
'.menu li',
'nav ul li'
]
for selector in category_selectors:
for k in data(selector).items():
link = k('a')
href = (link.attr('href') or '').strip()
name = (link.text() or '').strip()
# Skip placeholder or invalid entries
if not href or href == '#' or not name:
continue
classes.append({
'type_name': name,
'type_id': href
})
if classes:
break
# If no categories found, create some default ones
if not classes:
classes = [
{'type_name': '首页', 'type_id': '/'},
{'type_name': '最新', 'type_id': '/latest/'},
{'type_name': '热门', 'type_id': '/hot/'}
]
result['class'] = classes
result['list'] = self.getlist(data('#index article a'))
return result
except Exception as e:
print(f"homeContent error: {e}")
return {'class': [], 'list': []}
def homeVideoContent(self):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': []}
data = self.getpq(response.text)
return {'list': self.getlist(data('#index article a, #archive article a'))}
except Exception as e:
print(f"homeVideoContent error: {e}")
return {'list': []}
def categoryContent(self, tid, pg, filter, extend):
try:
if '@folder' in tid:
id = tid.replace('@folder', '')
videos = self.getfod(id)
else:
# Build URL properly
if tid.startswith('/'):
if pg and pg != '1':
url = f"{self.host}{tid}page/{pg}/"
else:
url = f"{self.host}{tid}"
else:
url = f"{self.host}/{tid}"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a'), tid)
result = {}
result['list'] = videos
result['page'] = pg
result['pagecount'] = 1 if '@folder' in tid else 99999
result['limit'] = 90
result['total'] = 999999
return result
except Exception as e:
print(f"categoryContent error: {e}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
def detailContent(self, ids):
try:
url = f"{self.host}{ids[0]}" if not ids[0].startswith('http') else ids[0]
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [{'vod_play_from': '51吸瓜', 'vod_play_url': f'页面加载失败${url}'}]}
data = self.getpq(response.text)
vod = {'vod_play_from': '51吸瓜'}
# Get content/description
try:
clist = []
if data('.tags .keywords a'):
for k in data('.tags .keywords a').items():
title = k.text()
href = k.attr('href')
if title and href:
clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
vod['vod_content'] = ' '.join(clist) if clist else data('.post-title').text()
except:
vod['vod_content'] = data('.post-title').text() or '51吸瓜视频'
# Get video URLs (build episode list when multiple players exist)
try:
plist = []
used_names = set()
if data('.dplayer'):
for c, k in enumerate(data('.dplayer').items(), start=1):
config_attr = k.attr('data-config')
if config_attr:
try:
config = json.loads(config_attr)
video_url = config.get('video', {}).get('url', '')
# Determine a readable episode name from nearby headings if present
ep_name = ''
try:
parent = k.parents().eq(0)
# search up to a few ancestors for a heading text
for _ in range(3):
if not parent: break
heading = parent.find('h2, h3, h4').eq(0).text() or ''
heading = heading.strip()
if heading:
ep_name = heading
break
parent = parent.parents().eq(0)
except Exception:
ep_name = ''
base_name = ep_name if ep_name else f"视频{c}"
name = base_name
count = 2
# Ensure the name is unique
while name in used_names:
name = f"{base_name} {count}"
count += 1
used_names.add(name)
if video_url:
self.log(f"解析到视频: {name} -> {video_url}")
print(f"解析到视频: {name} -> {video_url}")
plist.append(f"{name}${video_url}")
except:
continue
if plist:
self.log(f"拼装播放列表,共{len(plist)}")
print(f"拼装播放列表,共{len(plist)}")
vod['vod_play_url'] = '#'.join(plist)
else:
vod['vod_play_url'] = f"未找到视频源${url}"
except Exception as e:
vod['vod_play_url'] = f"视频解析失败${url}"
return {'list': [vod]}
except Exception as e:
print(f"detailContent error: {e}")
return {'list': [{'vod_play_from': '51吸瓜', 'vod_play_url': f'详情页加载失败${ids[0] if ids else ""}'}]}
def searchContent(self, key, quick, pg="1"):
try:
url = f"{self.host}/search/{key}/{pg}" if pg != "1" else f"{self.host}/search/{key}/"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a'))
return {'list': videos, 'page': pg}
except Exception as e:
print(f"searchContent error: {e}")
return {'list': [], 'page': pg}
def playerContent(self, flag, id, vipFlags):
url = id
p = 1
if self.isVideoFormat(url):
# m3u8/mp4 direct play; when using proxy setting, wrap to proxy for m3u8
if '.m3u8' in url:
url = self.proxy(url)
p = 0
self.log(f"播放请求: parse={p}, url={url}")
print(f"播放请求: parse={p}, url={url}")
return {'parse': p, 'url': url, 'header': self.headers}
def localProxy(self, param):
if param.get('type') == 'img':
res=requests.get(param['url'], headers=self.headers, proxies=self.proxies, timeout=10)
return [200,res.headers.get('Content-Type'),self.aesimg(res.content)]
elif param.get('type') == 'm3u8':return self.m3Proxy(param['url'])
else:return self.tsProxy(param['url'])
def proxy(self, data, type='m3u8'):
if data and len(self.proxies):return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
else:return data
def m3Proxy(self, url):
url=self.d64(url)
ydata = requests.get(url, headers=self.headers, proxies=self.proxies, allow_redirects=False)
data = ydata.content.decode('utf-8')
if ydata.headers.get('Location'):
url = ydata.headers['Location']
data = requests.get(url, headers=self.headers, proxies=self.proxies).content.decode('utf-8')
lines = data.strip().split('\n')
last_r = url[:url.rfind('/')]
parsed_url = urlparse(url)
durl = parsed_url.scheme + "://" + parsed_url.netloc
iskey=True
for index, string in enumerate(lines):
if iskey and 'URI' in string:
pattern = r'URI="([^"]*)"'
match = re.search(pattern, string)
if match:
lines[index] = re.sub(pattern, f'URI="{self.proxy(match.group(1), "mkey")}"', string)
iskey=False
continue
if '#EXT' not in string:
if 'http' not in string:
domain = last_r if string.count('/') < 2 else durl
string = domain + ('' if string.startswith('/') else '/') + string
lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
data = '\n'.join(lines)
return [200, "application/vnd.apple.mpegur", data]
def tsProxy(self, url):
url = self.d64(url)
data = requests.get(url, headers=self.headers, proxies=self.proxies, stream=True)
return [200, data.headers['Content-Type'], data.content]
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def get_working_host(self):
"""Get working host from known dynamic URLs"""
# Known working URLs from the dynamic gateway
dynamic_urls = [
'https://artist.vgwtswi.xyz',
'https://ability.vgwtswi.xyz',
'https://am.vgwtswi.xyz'
]
# Test each URL to find a working one
for url in dynamic_urls:
try:
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
if response.status_code == 200:
# Verify it has the expected content structure
data = self.getpq(response.text)
articles = data('#index article a')
if len(articles) > 0:
self.log(f"选用可用站点: {url}")
print(f"选用可用站点: {url}")
return url
except Exception as e:
continue
# Fallback to first URL if none work (better than crashing)
self.log(f"未检测到可用站点,回退: {dynamic_urls[0]}")
print(f"未检测到可用站点,回退: {dynamic_urls[0]}")
return dynamic_urls[0]
def getlist(self, data, tid=''):
videos = []
l = '/mrdg' in tid
for k in data.items():
a = k.attr('href')
b = k('h2').text()
# Some pages might not include datePublished; use a fallback
c = k('span[itemprop="datePublished"]').text() or k('.post-meta, .entry-meta, time').text()
if a and b:
videos.append({
'vod_id': f"{a}{'@folder' if l else ''}",
'vod_name': b.replace('\n', ' '),
'vod_pic': self.getimg(k('script').text()),
'vod_remarks': c or '',
'vod_tag': 'folder' if l else '',
'style': {"type": "rect", "ratio": 1.33}
})
return videos
def getfod(self, id):
url = f"{self.host}{id}"
data = self.getpq(requests.get(url, headers=self.headers, proxies=self.proxies).text)
vdata=data('.post-content[itemprop="articleBody"]')
r=['.txt-apps','.line','blockquote','.tags','.content-tabs']
for i in r:vdata.remove(i)
p=vdata('p')
videos=[]
for i,x in enumerate(vdata('h2').items()):
c=i*2
videos.append({
'vod_id': p.eq(c)('a').attr('href'),
'vod_name': p.eq(c).text(),
'vod_pic': f"{self.getProxyUrl()}&url={p.eq(c+1)('img').attr('data-xkrkllgl')}&type=img",
'vod_remarks':x.text()
})
return videos
def getimg(self, text):
match = re.search(r"loadBannerDirect\('([^']+)'", text)
if match:
url = match.group(1)
return f"{self.getProxyUrl()}&url={url}&type=img"
else:
return ''
def aesimg(self, word):
key = b'f5d965df75336270'
iv = b'97b60394abc2fbe1'
cipher = AES.new(key, AES.MODE_CBC, iv)
decrypted = unpad(cipher.decrypt(word), AES.block_size)
return decrypted
def getpq(self, data):
try:
return pq(data)
except Exception as e:
print(f"{str(e)}")
return pq(data.encode('utf-8'))

View File

@@ -0,0 +1,165 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import time
import uuid
from base64 import b64decode, b64encode
import json
import sys
from urllib.parse import urlparse, urlunparse
from Crypto.Cipher import AES
from Crypto.Hash import MD5
from Crypto.Util.Padding import unpad, pad
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
host = "https://api.230110.xyz"
phost = "https://cdn.230110.xyz"
headers = {
'origin': host,
'referer': f'{host}/',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.8 Mobile/15E148 Safari/604.1',
}
def homeContent(self, filter):
data='9XSPkyFMrOOG34JSg//ZosMof45cyBo9hwZMZ5rvI6Yz/ZZlXWIf8/644OzwW+FNIOdJ61R/Lxjy1tqN+ZzokxtiVzb8LjYAkh6GFudwAUXFt9yS1ZjAxC3tDKrQsJQLk3nym0s00DBBzLBntRBDFz7nbba+OOBuQOZpL3CESGL42l4opdoViQLhO/dIizY1kIOk2NxxpDC9Z751gPl1ctHWuLWhuLG/QWgNWi/iHScjKrMHJKcC9GQHst/4Q3dgZ03eQIIVB6jvoV1XXoBCz6fjM/jM3BXpzSttT4Stglwy93gWuNWuZiKypHK2Q0lO10oM0ceRW2a0fPGId+rNYMRO3cR/C0ZueD4cmTAVOuxVr9ZZSP8/nhD0bHyAPONXtchIDJb0O/kdFHk2KTJfQ5q4fHOyzezczc4iQDV/R0S8cGZKM14MF+wytA/iljfj43H0UYqq5pM+MCUGRTdYEtuxCp0+A+DiOhNZwY/Km/TgBoGZQWGbpljJ2LAVnWhxX+ickLH7zuR/FeIwP/R8zOuR+8C8UlT9eHTqtvfNzaGdFxt316atHy8TNjRO7J5a177mqsHs3ziG0toDDzLDCbhRUjFgVA3ktahhXiWaaCo/ZGSJAA8TDO5DYqnJ0JDaX0ILPj8QB5zxrHYmRE8PboIr3RBAjz1sREbaHfjrUjoh29ePhlolLV00EvgoxP5knaqt5Ws/sq5IG57qKCAPgqXzblPLHToJGBtukKhLp8jbGJrkb6PVn4/jysks0NGE'
return {'class':self.aes(data,False)}
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
data = {"q": "", "filter": [f"type_id = {tid}"], "offset": (int(pg)-1) * 24, "limit": 24, "sort": ["video_time:desc"],"lang": "zh-cn", "route": "/videos/search"}
result = {}
if 'skey_' in tid:return self.searchContent(tid.split('_')[-1], True, pg)
result['list'] = self.getl(self.getdata(data))
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
data={"limit":1,"filter":[f"video_id = {ids[0]}"],"lang":"zh-cn","route":"/videos/search"}
res = self.getdata(data)[0]
purl=urlunparse(urlparse(self.phost)._replace(path=urlparse(res.get('video_url')).path))
vod = {
'vod_play_from': 'dsysav',
'vod_play_url': f"{res.get('video_duration')}${purl}"
}
if res.get('video_tag'):
clist = []
tags=res['video_tag'].split(',')
for k in tags:
clist.append('[a=cr:' + json.dumps({'id': f'skey_{k}', 'name': k}) + '/]' + k + '[/a]')
vod['vod_content'] = ' '.join(clist)
return {'list':[vod]}
def searchContent(self, key, quick, pg="1"):
data={"q":key,"filter":[],"offset":(int(pg)-1) * 24,"limit":24,"sort":["video_time:desc"],"lang":"zh-cn","route":"/videos/search"}
return {'list':self.getl(self.getdata(data)),'page':pg}
def playerContent(self, flag, id, vipFlags):
if id.endswith('.mpd'):
id=f"{self.getProxyUrl()}&url={self.e64(id)}&type=mpd"
return {'parse': 0, 'url': id, 'header':self.headers}
def localProxy(self, param):
if param.get('type') and param['type']=='mpd':
url = self.d64(param.get('url'))
ids=url.split('/')
id=f"{ids[-3]}/{ids[-2]}/"
xpu = f"{self.getProxyUrl()}&path=".replace('&', '&amp;')
data = self.fetch(url, headers=self.headers).text
data = data.replace('initialization="', f'initialization="{xpu}{id}').replace('media="',f'media="{xpu}{id}')
return [200,'application/octet-stream',data]
else:
hsign=self.md5(f"AjPuom638LmWfWyeM5YueKuJ9PuWLdRn/mpd/{param.get('path')}1767196800")
bytes_data = bytes.fromhex(hsign)
sign = b64encode(bytes_data).decode('utf-8').replace('=','').replace('+','-').replace('/','_')
url=f"{self.phost}/mpd/{param.get('path')}?sign={sign}&expire=1767196800"
return [302,'text/plain',None,{'Location':url}]
def liveContent(self, url):
pass
def aes(self, text, operation=True):
key = b'OPQT123412FRANME'
iv = b'MRDCQP12QPM13412'
cipher = AES.new(key, AES.MODE_CBC, iv)
if operation:
ct_bytes = cipher.encrypt(pad(json.dumps(text).encode("utf-8"), AES.block_size))
ct = b64encode(ct_bytes).decode("utf-8")
return ct
else:
pt = unpad(cipher.decrypt(b64decode(text)), AES.block_size)
return json.loads(pt.decode("utf-8"))
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self,encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def md5(self, text):
h = MD5.new()
h.update(text.encode('utf-8'))
return h.hexdigest()
def getl(self,data):
videos = []
for i in data:
img = i.get('video_cover')
if img and 'http' in img:img = urlunparse(urlparse(self.phost)._replace(path=urlparse(img).path))
videos.append({
'vod_id': i.get('video_id'),
'vod_name': i.get('video_title'),
'vod_pic': img,
'vod_remarks': i.get('video_duration'),
'style': {"type": "rect", "ratio": 1.33}
})
return videos
def getdata(self,data):
uid = str(uuid.uuid4())
t = int(time.time())
json_data = {
'sign': self.md5(f"{self.e64(json.dumps(data))}{uid}{t}AjPuom638LmWfWyeM5YueKuJ9PuWLdRn"),
'nonce': uid,
'timestamp': t,
'data': self.aes(data),
}
res = self.post(f"{self.host}/v1", json=json_data, headers=self.headers).json()
res = self.aes(res['data'], False)
return res

View File

@@ -0,0 +1,351 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import copy
import gzip
import json
import re
import sys
import time
import uuid
from base64 import b64decode
from urllib.parse import urlparse, urlunparse
from Crypto.Hash import SHA1, HMAC
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
'''
{
"": "",
"ext": {
"site": "https://missav.ai",
"cfproxy": ""
}
}
自备:过cf代理如https://xx.vvvv.cc/proxy?url=
'''
try:
ext=json.loads(extend)
self.host,self.pcf,self.phost=ext.get('site',''),ext.get('cfproxy',''),''
if self.pcf:
parsed_url=urlparse(self.pcf)
self.phost=parsed_url.scheme + "://" + parsed_url.netloc
except:
pass
self.headers = {
'referer': f'{self.host}',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36'
}
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
xhost='https://client-rapi-missav.recombee.com'
fts = 'H4sIAAAAAAAAA23P30rDMBQG8FeRXM8X8FVGGZk90rA0HU3SMcZgXjn8V6p2BS2KoOiFAwUn2iK+TBP7GBpYXbG9/c6Pc77TnaABjNHOFtojVIDPUQcx7IJJvl9ydX30GwSYSpN0J4iZgTqJiywrPlN1vm/GJiPMJgGxJaZo2qnc3WXDuZIKMqSwUcX7Ui8O1DJRH3Gldh3CgMM2l31BhNGW8euq3PNFrac+PVNZ2NYzjMrbY53c6/Sm2uwDBczB7mGxqaDTWfkV6atXvXiu4FD2KeHOf3nxViahjv8YxwHYtWfyQ3NvFZYP85oSno3HvYDAiNevPqnosWFHAAPahnU6b2DXY8Jp0bO8QdfEmlo/SBd5PPUBAAA='
actfts = 'H4sIAAAAAAAAA5WVS2sUQRRG/0rT6xTcqq5Xiwjm/X6sQxZjbBLRBBeOIEGIIEgWrtwI4lJEQsjGhU6Iv2bGcf6FVUUydW/d1SxT55sDfbpmsn9WP+/e1A+q+rh7dnT8qp6rT3snXTz4N7icXH4OB697L/rxZP+sPo1g+Ot8PPg+vvoyOb+IOJ7Vb+fuqGxkJSrZmMOTexiORDjAGxs3GvDGinCANjp5NPbo4NHYo5PHYI8OHoM9JnkM9pjgMdhjksdijwkeiz02eSz22OCx2GOTx2GPDR6HPS55HPa44HHY45LHY48LHo89Pnk89vjg8djjk6fFHh88bfAcxNXduz/sv0Qvfnz74+/X65lf/OMqfzD9ndF8geYzWijQQkaLBVrMaKlASxktF2g5o5UCrWS0WqDVjNYKtJbReoHWM9oo0EZGmwXazGirQFsZbRdoO6OdAu1ktFug3Yz2CrRH70TvqEN3YvT75+TP+5nvxMNKwf0pCIWur4JwM5spVCAaRJtI9ZQ2IPBPg47UTKkGgb/wJlI7pQYE/ho/QsiCaFv61E+7J338Izj6MJi8+xSefnhzO/PTK1CmGt58G118zM+pDBloPtBk0PBBQwaKDxQZSD6QZAB8QN6UbNlAtmTg+cCTgeMDRwaWDywZ8JKSlJS8pCQlJS8pSUnJS0pSUvKSkpSUvKQkJYGXBFISeEkgJYGXBFISeEkgJYGXBFISeEkgJYGXBFISeEkgJYGXBFISeElI/7QO/gOZ7bAksggAAA=='
def homeContent(self, filter):
html = self.getpq(f"{self.host}/cn",headers=self.headers)
result = {}
filters = {}
classes=[]
for i in list(html('.mt-4.space-y-4').items())[:2]:
for j in i('ul li').items():
id=j('a').attr('href').split('/')[-1]
classes.append({
'type_name': j.text(),
'type_id': id
})
filters[id] = copy.deepcopy(self.ungzip(self.fts))
if id=='actresses':filters[id].extend(self.ungzip(self.actfts))
result['class'] = classes
result['filters'] = filters
result['list'] = self.getlist(html('.grid-cols-2.md\\:grid-cols-3 .thumbnail.group'))
return result
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
params={
'page':'' if pg=='1' else pg
}
ft = {
'filters': extend.get('filters', ''),
'sort': extend.get('sort', '')
}
if tid in ['makers', 'genres']:
ft = {}
elif tid == 'actresses':
ft = {
'height': extend.get('height', ''),
'cup': extend.get('cup', ''),
'debut': extend.get('debut', ''),
'age': extend.get('age', ''),
'sort': extend.get('sort', '')
}
params.update(ft)
params = {k: v for k, v in params.items() if v != ""}
url=tid if 'http' in tid else f"{self.host}/cn/{tid}"
data=self.getpq(url,headers=self.headers,params=params)
result = {}
if tid in ['makers', 'genres']:
videos = self.gmsca(data)
elif tid == 'actresses':
videos = self.actca(data)
else:
videos = self.getlist(data('.grid-cols-2.md\\:grid-cols-3 .thumbnail.group'))
result['list'] = videos
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
v=self.getpq(ids[0],headers=self.headers)
sctx=v('body script').text()
urls=self.execute_js(sctx)
if not urls:urls=f"嗅探${ids[0]}"
c=v('.space-y-2 .text-secondary')
ac,dt,bq=[],[],[]
for i in c.items():
if re.search(r"导演:|女优:",i.text()):
ac.extend(['[a=cr:' + json.dumps({'id': j.attr('href'), 'name': j.text()}) + '/]' + j.text() + '[/a]' for j in i('a').items()])
elif '发行商:' in i.text():
dt.extend(['[a=cr:' + json.dumps({'id': j.attr('href'), 'name': j.text()}) + '/]' + j.text() + '[/a]' for j in i('a').items()])
elif re.search(r"标籤:|系列:|类型:",i.text()):
bq.extend(['[a=cr:' + json.dumps({'id': j.attr('href'), 'name': j.text()}) + '/]' + j.text() + '[/a]' for j in i('a').items()])
np={'MissAV':urls,'相关视频':self.getfov(ids[0])}
vod = {
'type_name': c.eq(-3)('a').text(),
'vod_year': c.eq(0)('span').text(),
'vod_remarks': ' '.join(bq),
'vod_actor': ' '.join(ac),
'vod_director': ' '.join(dt),
'vod_content': v('.text-secondary.break-all').text()
}
names,plist=[],[]
for i,j in np.items():
if j:
names.append(i)
plist.append(j)
vod['vod_play_from']='$$$'.join(names)
vod['vod_play_url']='$$$'.join(plist)
return {'list': [vod]}
def searchContent(self, key, quick, pg="1"):
data=self.getpq(f"{self.host}/cn/search/{key}",headers=self.headers,params={'page':pg})
return {'list': self.getlist(data('.grid-cols-2.md\\:grid-cols-3 .thumbnail.group')),'page':pg}
def playerContent(self, flag, id, vipFlags):
p=0 if '' in flag else 1
if '相关' in flag:
try:
v = self.getpq(id, headers=self.headers)
sctx = v('body script').text()
urls = self.execute_js(sctx)
if not urls: raise Exception("没有找到地址")
p,id=0,urls.split('#')[0].split('$')[-1]
except:
p=1
return {'parse': p, 'url': id, 'header': self.headers}
def localProxy(self, param):
pass
def josn_to_params(self, params, skip_empty=False):
query = []
for k, v in params.items():
if skip_empty and not v:
continue
query.append(f"{k}={v}")
return "&".join(query)
def getpq(self, url, headers=None,params='',min=0,max=3):
if not min and self.phost in url:
url=url.replace(self.phost,self.host)
if params=={}:params=''
if params:
params=f"?{self.josn_to_params(params)}"
response=self.fetch(f"{self.pcf}{url}{params}", headers=headers,verify=False)
res=response.text
if 300 <= response.status_code < 400:
if min >= max:raise Exception(f"重定向次数过多: {res}")
match = re.search(r"url=['\"](https?://[^'\"]+)['\"]", res)
if match:
url = match.group(1).replace(self.phost, self.host)
return self.getpq(url, headers=headers,params='',min=min+1,max=max)
try:
return pq(res)
except Exception as e:
print(f"{str(e)}")
return pq(res.encode('utf-8'))
def getlist(self,data):
videos = []
names,ids=[],[]
for i in data.items():
k = i('.overflow-hidden.shadow-lg a')
id=k.eq(0).attr('href')
name=i('.text-secondary').text()
if id and id not in ids and name not in names:
ids.append(id)
names.append(name)
videos.append({
'vod_id': id,
'vod_name': name,
'vod_pic': k.eq(0)('img').attr('data-src'),
'vod_year': '' if len(list(k.items())) < 3 else k.eq(1).text(),
'vod_remarks': k.eq(-1).text(),
'style': {"type": "rect", "ratio": 1.33}
})
return videos
def gmsca(self,data):
acts=[]
for i in data('.grid.grid-cols-2.md\\:grid-cols-3 div').items():
acts.append({
'vod_id': i('.text-nord13').attr('href'),
'vod_name': i('.text-nord13').text(),
'vod_pic': '',
'vod_remarks': i('.text-nord10').text(),
'vod_tag': 'folder',
'style': {"type": "rect", "ratio": 1.33}
})
return acts
def actca(self,data):
acts=[]
for i in data('.max-w-full ul li').items():
acts.append({
'vod_id': i('a').attr('href'),
'vod_name': i('img').attr('alt'),
'vod_pic': i('img').attr('src'),
'vod_year': i('.text-nord10').eq(-1).text(),
'vod_remarks': i('.text-nord10').eq(0).text(),
'vod_tag': 'folder',
'style': {"type": "oval"}
})
return acts
def getfov(self, url):
try:
h=self.headers.copy()
ids=url.split('/')
h.update({'referer':f'{url}/'})
t=str(int(time.time()))
params = {
'frontend_timestamp': t,
'frontend_sign': self.getsign(f"/missav-default/batch/?frontend_timestamp={t}"),
}
uid=str(uuid.uuid4())
json_data = {
'requests': [
{
'method': 'POST',
'path': f'/recomms/items/{ids[-1]}/items/',
'params': {
'targetUserId': uid,
'count': 13,
'scenario': 'desktop-watch-next-side',
'returnProperties': True,
'includedProperties': [
'title_cn',
'duration',
'has_chinese_subtitle',
'has_english_subtitle',
'is_uncensored_leak',
'dm',
],
'cascadeCreate': True,
},
},
{
'method': 'POST',
'path': f'/recomms/items/{ids[-1]}/items/',
'params': {
'targetUserId': uid,
'count': 12,
'scenario': 'desktop-watch-next-bottom',
'returnProperties': True,
'includedProperties': [
'title_cn',
'duration',
'has_chinese_subtitle',
'has_english_subtitle',
'is_uncensored_leak',
'dm',
],
'cascadeCreate': True,
},
},
],
'distinctRecomms': True,
}
data = self.post(f'{self.xhost}/missav-default/batch/', params=params,headers=h, json=json_data).json()
vdata=[]
for i in data:
for j in i['json']['recomms']:
if j.get('id'):
vdata.append(f"{j['values']['title_cn']}${self.host}/cn/{j['id']}")
return '#'.join(vdata)
except Exception as e:
print(f"获取推荐失败: {e}")
return ''
def getsign(self, text):
message_bytes = text.encode('utf-8')
key_bytes = b'Ikkg568nlM51RHvldlPvc2GzZPE9R4XGzaH9Qj4zK9npbbbTly1gj9K4mgRn0QlV'
h = HMAC.new(key_bytes, digestmod=SHA1)
h.update(message_bytes)
signature = h.hexdigest()
return signature
def ungzip(self, data):
result=gzip.decompress(b64decode(data)).decode('utf-8')
return json.loads(result)
def execute_js(self, jstxt):
js_code = re.search(r"eval\(function\(p,a,c,k,e,d\).*?return p\}(.*?)\)\)", jstxt).group(0)
try:
from com.whl.quickjs.wrapper import QuickJSContext
ctx = QuickJSContext.create()
ctx.evaluate(js_code)
result = []
common_vars = ["source", "source842", "source1280"]
for var_name in common_vars:
try:
value = ctx.getGlobalObject().getProperty(var_name)
if value is not None:
if isinstance(value, str):
value_str = value
else:
value_str = value.toString()
if "http" in value_str:
result.append(f"{var_name}${value_str}")
self.log(f"找到变量 {var_name} = {value_str[:50]}...")
except Exception as var_err:
self.log(f"获取变量 {var_name} 失败: {var_err}")
ctx.destroy()
return '#'.join(result)
except Exception as e:
self.log(f"执行失败: {e}")
return None

View File

@@ -0,0 +1,321 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import json
import re
import sys
from base64 import b64decode, b64encode
from urllib.parse import urlparse
import requests
from pyquery import PyQuery as pq
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
'''
内置代理配置真心jar为例
{
"key": "Phb",
"name": "Phb",
"type": 3,
"searchable": 1,
"quickSearch": 1,
"filterable": 1,
"api": "./py/Phb.py",
"ext": {
"http": "http://127.0.0.1:1072",
"https": "http://127.0.0.1:1072"
}
},
http(s)代理都是http
'''
try:self.proxies = json.loads(extend)
except:self.proxies = {}
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'dnt': '1',
'sec-ch-ua-mobile': '?0',
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=1, i',
}
self.host = self.gethost()
self.headers.update({'referer': f'{self.host}/', 'origin': self.host})
self.session = Session()
self.session.proxies.update(self.proxies)
self.session.headers.update(self.headers)
# ====== 在这里处理并传入指定的 Cookie ======
# 你提供的 Cookie 字符串
cookie_string = "ss=827590546130942001; sessid=607526310895825838; comp_detect-cookies=57035.100000; fg_afaf12e314c5419a855ddc0bf120670f=89213.100000; fg_7d31324eedb583147b6dcbea0051c868=25322.100000; __s=686AA841-42FE722901BB38AD16-B0A8AB1; __l=686AA841-42FE722901BB38AD16-B0A8AB1; tj_UUID_v2=ChAf6M0hCSZM47qWcst9tIq2EgsIxdCqwwYQp_j6DRgB; _ga=GA1.1.1279613306.1751820360; ua=803dd0debe437cd2610f66cd8235a54c; platform=mobile; cookieConsent=3; d_fs=1; accessAgeDisclaimerPH=1; il=v1JnAJL5n4SJJ8ziiYM4g_WAF6rQvZDAsZWgNYIHsUSg0xNzY3NjMxNjgyREwyTWd1RUVBbnozdHFQV01vUW5leEZ0ajFSM1NvNDdSNkVrQ1BhXw..; bs=e1649232670c3a49db241055d6ccf891; bsdd=e1649232670c3a49db241055d6ccf891; tj_UUID=ChAf6M0hCSZM47qWcst9tIq2EgsIxdCqwwYQp_j6DRgBIiBlMTY0OTIzMjY3MGMzYTQ5ZGIyNDEwNTVkNmNjZjg5MQ==; d_uidb=67be68be-6c63-a0f0-03d0-83e3bd7611c8; d_uid=67be68be-6c63-a0f0-03d0-83e3bd7611c8; d_uidb=67be68be-6c63-a0f0-03d0-83e3bd7611c8; htjf-mobile=4; _ga_B39RFFWGYY=GS2.1.s1751820360$o1$g1$t1751820515$j29$l0$h0"
# 将 Cookie 字符串解析为字典
parsed_cookies = {}
for part in cookie_string.split('; '):
if '=' in part:
key, value = part.split('=', 1) # 只在第一个等号处分割,因为值可能包含等号
parsed_cookies[key.strip()] = value.strip() # strip() 用于去除可能存在的空格
self.session.cookies.update(parsed_cookies)
# ==================================
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
def homeContent(self, filter):
result = {}
cateManual = {
"推荐": "/recommended",
"视频": "/video",
"片单": "/playlists",
"频道": "/channels",
"分类": "/categories",
"明星": "/pornstars"
}
classes = []
filters = {}
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
result['filters'] = filters
return result
def homeVideoContent(self):
# data = self.getpq('/recommended')
# vhtml = data("#recommendedListings .pcVideoListItem .phimage")
# return {'list': self.getlist(vhtml)}
pass
def categoryContent(self, tid, pg, filter, extend):
vdata = []
result = {}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
if tid == '/video' or '_this_video' in tid:
pagestr = f'&' if '?' in tid else f'?'
tid = tid.split('_this_video')[0]
data = self.getpq(f'{tid}{pagestr}page={pg}')
vdata = self.getlist(data('#videoCategory .pcVideoListItem'))
elif tid == '/recommended':
data = self.getpq(f'{tid}?page={pg}')
vdata = self.getlist(data('#recommendedListings .pcVideoListItem .phimage'))
elif tid == '/playlists':
data = self.getpq(f'{tid}?page={pg}')
vhtml = data('#playListSection li')
vdata = []
for i in vhtml.items():
vdata.append({
'vod_id': 'playlists_click_' + i('.thumbnail-info-wrapper .display-block a').attr('href'),
'vod_name': i('.thumbnail-info-wrapper .display-block a').attr('title'),
'vod_pic': self.proxy(i('.largeThumb').attr('src')),
'vod_tag': 'folder',
'vod_remarks': i('.playlist-videos .number').text(),
'style': {"type": "rect", "ratio": 1.33}
})
elif tid == '/channels':
data = self.getpq(f'{tid}?o=rk&page={pg}')
vhtml = data('#filterChannelsSection li .description')
vdata = []
for i in vhtml.items():
vdata.append({
'vod_id': 'director_click_' + i('.avatar a').attr('href'),
'vod_name': i('.avatar img').attr('alt'),
'vod_pic': self.proxy(i('.avatar img').attr('src')),
'vod_tag': 'folder',
'vod_remarks': i('.descriptionContainer ul li').eq(-1).text(),
'style': {"type": "rect", "ratio": 1.33}
})
elif tid == '/categories' and pg == '1':
result['pagecount'] = 1
data = self.getpq(f'{tid}')
vhtml = data('.categoriesListSection li .relativeWrapper')
vdata = []
for i in vhtml.items():
vdata.append({
'vod_id': i('a').attr('href') + '_this_video',
'vod_name': i('a').attr('alt'),
'vod_pic': self.proxy(i('a img').attr('src')),
'vod_tag': 'folder',
'style': {"type": "rect", "ratio": 1.33}
})
elif tid == '/pornstars':
data = self.getpq(f'{tid}?o=t&page={pg}')
vhtml = data('#popularPornstars .performerCard .wrap')
vdata = []
for i in vhtml.items():
vdata.append({
'vod_id': 'pornstars_click_' + i('a').attr('href'),
'vod_name': i('.performerCardName').text(),
'vod_pic': self.proxy(i('a img').attr('src')),
'vod_tag': 'folder',
'vod_year': i('.performerVideosViewsCount span').eq(0).text(),
'vod_remarks': i('.performerVideosViewsCount span').eq(-1).text(),
'style': {"type": "rect", "ratio": 1.33}
})
elif 'playlists_click' in tid:
tid = tid.split('click_')[-1]
if pg == '1':
hdata = self.getpq(tid)
self.token = hdata('#searchInput').attr('data-token')
vdata = self.getlist(hdata('#videoPlaylist .pcVideoListItem .phimage'))
else:
tid = tid.split('playlist/')[-1]
data = self.getpq(f'/playlist/viewChunked?id={tid}&token={self.token}&page={pg}')
vdata = self.getlist(data('.pcVideoListItem .phimage'))
elif 'director_click' in tid:
tid = tid.split('click_')[-1]
data = self.getpq(f'{tid}/videos?page={pg}')
vdata = self.getlist(data('#showAllChanelVideos .pcVideoListItem .phimage'))
elif 'pornstars_click' in tid:
tid = tid.split('click_')[-1]
data = self.getpq(f'{tid}/videos?page={pg}')
vdata = self.getlist(data('#mostRecentVideosSection .pcVideoListItem .phimage'))
result['list'] = vdata
return result
def detailContent(self, ids):
url = f"{self.host}{ids[0]}"
data = self.getpq(ids[0])
vn = data('meta[property="og:title"]').attr('content')
dtext = data('.userInfo .usernameWrap a')
pdtitle = '[a=cr:' + json.dumps(
{'id': 'director_click_' + dtext.attr('href'), 'name': dtext.text()}) + '/]' + dtext.text() + '[/a]'
vod = {
'vod_name': vn,
'vod_director': pdtitle,
'vod_remarks': (data('.userInfo').text() + ' / ' + data('.ratingInfo').text()).replace('\n', ' / '),
'vod_play_from': 'Pornhub',
'vod_play_url': ''
}
js_content = data("#player script").eq(0).text()
plist = [f"{vn}${self.e64(f'{1}@@@@{url}')}"]
try:
pattern = r'"mediaDefinitions":\s*(\[.*?\]),\s*"isVertical"'
match = re.search(pattern, js_content, re.DOTALL)
if match:
json_str = match.group(1)
udata = json.loads(json_str)
plist = [
f"{media['height']}${self.e64(f'{0}@@@@{url}')}"
for media in udata[:-1]
if (url := media.get('videoUrl'))
]
except Exception as e:
print(f"提取mediaDefinitions失败: {str(e)}")
vod['vod_play_url'] = '#'.join(plist)
return {'list': [vod]}
def searchContent(self, key, quick, pg="1"):
data = self.getpq(f'/video/search?search={key}&page={pg}')
return {'list': self.getlist(data('#videoSearchResult .pcVideoListItem .phimage'))}
def playerContent(self, flag, id, vipFlags):
ids = self.d64(id).split('@@@@')
if '.m3u8' in ids[1]: ids[1] = self.proxy(ids[1], 'm3u8')
return {'parse': int(ids[0]), 'url': ids[1], 'header': self.headers}
def localProxy(self, param):
url = self.d64(param.get('url'))
if param.get('type') == 'm3u8':
return self.m3Proxy(url)
else:
return self.tsProxy(url)
def m3Proxy(self, url):
ydata = requests.get(url, headers=self.headers, proxies=self.proxies, allow_redirects=False)
data = ydata.content.decode('utf-8')
if ydata.headers.get('Location'):
url = ydata.headers['Location']
data = requests.get(url, headers=self.headers, proxies=self.proxies).content.decode('utf-8')
lines = data.strip().split('\n')
last_r = url[:url.rfind('/')]
parsed_url = urlparse(url)
durl = parsed_url.scheme + "://" + parsed_url.netloc
for index, string in enumerate(lines):
if '#EXT' not in string:
if 'http' not in string:
domain = last_r if string.count('/') < 2 else durl
string = domain + ('' if string.startswith('/') else '/') + string
lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
data = '\n'.join(lines)
return [200, "application/vnd.apple.mpegur", data]
def tsProxy(self, url):
data = requests.get(url, headers=self.headers, proxies=self.proxies, stream=True)
return [200, data.headers['Content-Type'], data.content]
def gethost(self):
try:
response = requests.get('https://www.pornhub.com', headers=self.headers, proxies=self.proxies,
allow_redirects=False)
return response.headers['Location'][:-1]
except Exception as e:
print(f"获取主页失败: {str(e)}")
return "https://www.pornhub.com"
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def getlist(self, data):
vlist = []
for i in data.items():
vlist.append({
'vod_id': i('a').attr('href'),
'vod_name': i('a').attr('title'),
'vod_pic': self.proxy(i('img').attr('src')),
'vod_remarks': i('.bgShadeEffect').text() or i('.duration').text(),
'style': {'ratio': 1.33, 'type': 'rect'}
})
return vlist
def getpq(self, path):
try:
response = self.session.get(f'{self.host}{path}').text
return pq(response.encode('utf-8'))
except Exception as e:
print(f"请求失败: , {str(e)}")
return None
def proxy(self, data, type='img'):
if data and len(self.proxies):return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
else:return data

View File

@@ -0,0 +1,270 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import json
import sys
from base64 import b64decode, b64encode
from pyquery import PyQuery as pq
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.host = self.gethost()
self.headers['referer'] = f'{self.host}/'
self.session = Session()
self.session.headers.update(self.headers)
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-full-version': '"133.0.6943.98"',
'sec-ch-ua-arch': '"x86"',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua-platform-version': '"19.0.0"',
'sec-ch-ua-model': '""',
'sec-ch-ua-full-version-list': '"Not(A:Brand";v="99.0.0.0", "Google Chrome";v="133.0.6943.98", "Chromium";v="133.0.6943.98"',
'dnt': '1',
'upgrade-insecure-requests': '1',
'sec-fetch-site': 'none',
'sec-fetch-mode': 'navigate',
'sec-fetch-user': '?1',
'sec-fetch-dest': 'document',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=0, i'
}
def homeContent(self, filter):
result = {}
cateManual = {
"4K": "/4k",
"国产": "two_click_/categories/chinese",
"最新": "/newest",
"最佳": "/best",
"频道": "/channels",
"类别": "/categories",
"明星": "/pornstars"
}
classes = []
filters = {}
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
if k !='4K':filters[cateManual[k]]=[{'key':'type','name':'类型','value':[{'n':'4K','v':'/4k'}]}]
result['class'] = classes
result['filters'] = filters
return result
def homeVideoContent(self):
data = self.getpq()
return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item"))}
def categoryContent(self, tid, pg, filter, extend):
vdata = []
result = {}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
if tid in ['/4k', '/newest', '/best'] or 'two_click_' in tid:
if 'two_click_' in tid: tid = tid.split('click_')[-1]
data = self.getpq(f'{tid}{extend.get("type","")}/{pg}')
vdata = self.getlist(data(".thumb-list--sidebar .thumb-list__item"))
elif tid == '/channels':
data = self.getpq(f'{tid}/{pg}')
jsdata = self.getjsdata(data)
for i in jsdata['channels']:
vdata.append({
'vod_id': f"two_click_" + i.get('channelURL'),
'vod_name': i.get('channelName'),
'vod_pic': i.get('siteLogoURL'),
'vod_year': f'videos:{i.get("videoCount")}',
'vod_tag': 'folder',
'vod_remarks': f'subscribers:{i["subscriptionModel"].get("subscribers")}',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif tid == '/categories':
result['pagecount'] = pg
data = self.getpq(tid)
self.cdata = self.getjsdata(data)
for i in self.cdata['layoutPage']['store']['popular']['assignable']:
vdata.append({
'vod_id': "one_click_" + i.get('id'),
'vod_name': i.get('name'),
'vod_pic': '',
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif tid == '/pornstars':
data = self.getpq(f'{tid}/{pg}')
pdata = self.getjsdata(data)
for i in pdata['pagesPornstarsComponent']['pornstarListProps']['pornstars']:
vdata.append({
'vod_id': f"two_click_" + i.get('pageURL'),
'vod_name': i.get('name'),
'vod_pic': i.get('imageThumbUrl'),
'vod_remarks': i.get('translatedCountryName'),
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif 'one_click' in tid:
result['pagecount'] = pg
tid = tid.split('click_')[-1]
for i in self.cdata['layoutPage']['store']['popular']['assignable']:
if i.get('id') == tid:
for j in i['items']:
vdata.append({
'vod_id': f"two_click_" + j.get('url'),
'vod_name': j.get('name'),
'vod_pic': j.get('thumb'),
'vod_tag': 'folder',
'style': {'ratio': 1.33, 'type': 'rect'}
})
result['list'] = vdata
return result
def detailContent(self, ids):
data = self.getpq(ids[0])
djs = self.getjsdata(data)
vn = data('meta[property="og:title"]').attr('content')
dtext = data('#video-tags-list-container')
href = dtext('a').attr('href')
title = dtext('span[class*="body-bold-"]').eq(0).text()
pdtitle = ''
if href:
pdtitle = '[a=cr:' + json.dumps({'id': 'two_click_' + href, 'name': title}) + '/]' + title + '[/a]'
vod = {
'vod_name': vn,
'vod_director': pdtitle,
'vod_remarks': data('.rb-new__info').text(),
'vod_play_from': 'Xhamster',
'vod_play_url': ''
}
try:
plist = []
d = djs['xplayerSettings']['sources']
f = d.get('standard')
def custom_sort_key(url):
quality = url.split('$')[0]
number = ''.join(filter(str.isdigit, quality))
number = int(number) if number else 0
return -number, quality
if f:
for key, value in f.items():
if isinstance(value, list):
for info in value:
id = self.e64(f'{0}@@@@{info.get("url") or info.get("fallback")}')
plist.append(f"{info.get('label') or info.get('quality')}${id}")
plist.sort(key=custom_sort_key)
if d.get('hls'):
for format_type, info in d['hls'].items():
if url := info.get('url'):
encoded = self.e64(f'{0}@@@@{url}')
plist.append(f"{format_type}${encoded}")
except Exception as e:
plist = [f"{vn}${self.e64(f'{1}@@@@{ids[0]}')}"]
print(f"获取视频信息失败: {str(e)}")
vod['vod_play_url'] = '#'.join(plist)
return {'list': [vod]}
def searchContent(self, key, quick, pg="1"):
data = self.getpq(f'/search/{key}?page={pg}')
return {'list': self.getlist(data(".thumb-list--sidebar .thumb-list__item")), 'page': pg}
def playerContent(self, flag, id, vipFlags):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36',
'pragma': 'no-cache',
'cache-control': 'no-cache',
'sec-ch-ua-platform': '"Windows"',
'sec-ch-ua': '"Not(A:Brand";v="99", "Google Chrome";v="133", "Chromium";v="133"',
'dnt': '1',
'sec-ch-ua-mobile': '?0',
'origin': self.host,
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'referer': f'{self.host}/',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'priority': 'u=1, i',
}
ids = self.d64(id).split('@@@@')
return {'parse': int(ids[0]), 'url': ids[1], 'header': headers}
def localProxy(self, param):
pass
def gethost(self):
try:
response = self.fetch('https://xhamster.com', headers=self.headers, allow_redirects=False)
return response.headers['Location']
except Exception as e:
print(f"获取主页失败: {str(e)}")
return "https://zn.xhamster.com"
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def getlist(self, data):
vlist = []
for i in data.items():
vlist.append({
'vod_id': i('.role-pop').attr('href'),
'vod_name': i('.video-thumb-info a').text(),
'vod_pic': i('.role-pop img').attr('src'),
'vod_year': i('.video-thumb-info .video-thumb-views').text().split(' ')[0],
'vod_remarks': i('.role-pop div[data-role="video-duration"]').text(),
'style': {'ratio': 1.33, 'type': 'rect'}
})
return vlist
def getpq(self, path=''):
h = '' if path.startswith('http') else self.host
response = self.session.get(f'{h}{path}').text
try:
return pq(response)
except Exception as e:
print(f"{str(e)}")
return pq(response.encode('utf-8'))
def getjsdata(self, data):
vhtml = data("script[id='initials-script']").text()
jst = json.loads(vhtml.split('initials=')[-1][:-1])
return jst

View File

@@ -0,0 +1,276 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import json
import re
import sys
from urllib.parse import urlparse
import requests
from pyquery import PyQuery as pq
from base64 import b64decode, b64encode
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
try:self.proxies = json.loads(extend)
except:self.proxies = {}
self.session = Session()
self.session.proxies.update(self.proxies)
self.session.headers.update(self.headers)
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
host = "https://www.xvideos.com"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.5410.0 Safari/537.36",
"pragma": "no-cache",
"cache-control": "no-cache",
"sec-ch-ua-platform": "\"Windows\"",
"sec-ch-ua": "\"Not(A:Brand\";v=\"99\", \"Google Chrome\";v=\"133\", \"Chromium\";v=\"133\"",
"dnt": "1",
"origin":host,
'referer':f'{host}/',
"sec-ch-ua-mobile": "?0",
"sec-fetch-site": "cross-site",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
"priority": "u=1, i"
}
def homeContent(self, filter):
result = {}
cateManual = {
"最新": "/new",
"最佳": "/best",
"频道": "/channels-index",
"标签": "/tags",
"明星": "/pornstars-index"
}
classes = []
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
return result
def homeVideoContent(self):
data = self.getpq()
return {'list':self.getlist(data(".mozaique .frame-block"))}
def categoryContent(self, tid, pg, filter, extend):
vdata = []
result = {}
page = f"/{int(pg) - 1}" if pg != '1' else ''
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
if tid=='/new' or 'tags_click' in tid:
if 'tags_click' in tid:tid=tid.split('click_')[-1]
data=self.getpq(f'{tid}/{pg}')
vdata=self.getlist(data(".mozaique .frame-block"))
elif tid=='/best':
if pg=='1':
self.path=self.session.get(f'{self.host}{tid}',allow_redirects=False).headers['Location']
data=self.getpq(f'{self.path}{page}')
vdata=self.getlist(data(".mozaique .frame-block"))
elif tid=='/channels-index' or tid=='/pornstars-index':
data = self.getpq(f'{tid}{page}')
vhtml=data(".mozaique .thumb-block")
for i in vhtml.items():
a = i('.thumb-inside .thumb a')
match = re.search(r'src="([^"]+)"', a('script').text())
img=''
if match:
img = match.group(1).strip()
vdata.append({
'vod_id': f"channels_click_{'/channels'if tid=='/channels-index' else ''}"+a.attr('href'),
'vod_name': a('.profile-name').text() or i('.profile-name').text().replace('\xa0','/'),
'vod_pic': self.proxy(img),
'vod_tag': 'folder',
'vod_remarks': i('.thumb-under .profile-counts').text(),
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif tid=='/tags':
result['pagecount'] = pg
vhtml = self.getpq(tid)
vhtml = vhtml('.tags-list')
for d in vhtml.items():
for i in d('li a').items():
vdata.append({
'vod_id': "tags_click_"+i.attr('href'),
'vod_name': i.attr('title') or i('b').text(),
'vod_pic': '',
'vod_tag': 'folder',
'vod_remarks': i('.navbadge').text(),
'style': {'ratio': 1.33, 'type': 'rect'}
})
elif 'channels_click' in tid:
tid=tid.split('click_')[-1]
vhtml=self.session.post(f'{self.host}{tid}/videos/best/{int(pg)-1}').json()
for i in vhtml['videos']:
vdata.append({
'vod_id': i.get('u'),
'vod_name': i.get('tf'),
'vod_pic': self.proxy(i.get('il')),
'vod_year': i.get('n'),
'vod_remarks': i.get('d'),
'style': {'ratio': 1.33, 'type': 'rect'}
})
result['list'] = vdata
return result
def detailContent(self, ids):
url = f"{self.host}{ids[0]}"
data = self.getpq(ids[0])
vn=data('meta[property="og:title"]').attr('content')
dtext=data('.main-uploader a')
href=dtext.attr('href')
pdtitle=''
if href and href.count('/') < 2:
href=f'/channels{href}'
pdtitle = '[a=cr:' + json.dumps({'id': 'channels_click_'+href, 'name': dtext('.name').text()}) + '/]' + dtext('.name').text() + '[/a]'
vod = {
'vod_name': vn,
'vod_director':pdtitle,
'vod_remarks': data('.page-title').text().replace(vn,''),
'vod_play_from': 'Xvideos',
'vod_play_url': ''
}
js_content = data("#video-player-bg script")
jstr=''
for script in js_content.items():
content = script.text()
if 'setVideoUrlLow' in content and 'html5player' in content:
jstr = content
break
plist = [f"{vn}${self.e64(f'{1}@@@@{url}')}"]
def extract_video_urls(js_content):
try:
low = re.search(r'setVideoUrlLow\([\'"]([^\'"]+)[\'"]\)', js_content)
high = re.search(r'setVideoUrlHigh\([\'"]([^\'"]+)[\'"]\)', js_content)
hls = re.search(r'setVideoHLS\([\'"]([^\'"]+)[\'"]\)', js_content)
return {
'hls': hls.group(1) if hls else None,
'high': high.group(1) if high else None,
'low': low.group(1) if low else None
}
except Exception as e:
print(f"提取视频URL失败: {str(e)}")
return {}
if jstr:
try:
urls = extract_video_urls(jstr)
plist = [
f"{quality}${self.e64(f'{0}@@@@{url}')}"
for quality, url in urls.items()
if url
]
except Exception as e:
print(f"提取url失败: {str(e)}")
vod['vod_play_url'] = '#'.join(plist)
return {'list':[vod]}
def searchContent(self, key, quick, pg="1"):
data=self.getpq(f'/?k={key}&p={int(pg)-1}')
return {'list':self.getlist(data(".mozaique .frame-block")),'page':pg}
def playerContent(self, flag, id, vipFlags):
ids=self.d64(id).split('@@@@')
if '.m3u8' in ids[1]: ids[1] = self.proxy(ids[1], 'm3u8')
return {'parse': int(ids[0]), 'url': ids[1], 'header': self.headers}
def localProxy(self, param):
url=self.d64(param['url'])
if param.get('type') == 'm3u8':
return self.m3Proxy(url)
else:
return self.tsProxy(url)
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self,encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def getlist(self, data):
vlist=[]
for i in data.items():
a=i('.thumb-inside .thumb a')
b=i('.thumb-under .title a')
vlist.append({
'vod_id': a.attr('href'),
'vod_name': b('a').attr('title'),
'vod_pic': self.proxy(a('img').attr('data-src')),
'vod_year': a('.video-hd-mark').text(),
'vod_remarks': b('.duration').text(),
'style': {'ratio': 1.33, 'type': 'rect'}
})
return vlist
def getpq(self, path=''):
response = self.session.get(f'{self.host}{path}').text
try:
return pq(response)
except Exception as e:
print(f"{str(e)}")
return pq(response.encode('utf-8'))
def m3Proxy(self, url):
ydata = requests.get(url, headers=self.headers, proxies=self.proxies, allow_redirects=False)
data = ydata.content.decode('utf-8')
if ydata.headers.get('Location'):
url = ydata.headers['Location']
data = requests.get(url, headers=self.headers, proxies=self.proxies).content.decode('utf-8')
lines = data.strip().split('\n')
last_r = url[:url.rfind('/')]
parsed_url = urlparse(url)
durl = parsed_url.scheme + "://" + parsed_url.netloc
for index, string in enumerate(lines):
if '#EXT' not in string:
if 'http' not in string:
domain=last_r if string.count('/') < 2 else durl
string = domain + ('' if string.startswith('/') else '/') + string
lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
data = '\n'.join(lines)
return [200, "application/vnd.apple.mpegur", data]
def tsProxy(self, url):
data = requests.get(url, headers=self.headers, proxies=self.proxies, stream=True)
return [200, data.headers['Content-Type'], data.content]
def proxy(self, data, type='img'):
if data and len(self.proxies):return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
else:return data

View File

@@ -0,0 +1,212 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import sys
from base64 import b64encode, b64decode
from Crypto.Hash import MD5, SHA256
sys.path.append('..')
from base.spider import Spider
from Crypto.Cipher import AES
import json
import time
class Spider(Spider):
def getName(self):
return "lav"
def init(self, extend=""):
self.id = self.ms(str(int(time.time() * 1000)))[:16]
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def action(self, action):
pass
def destroy(self):
pass
host = "http://sir_new.tiansexyl.tv"
t = str(int(time.time() * 1000))
headers = {'User-Agent': 'okhttp-okgo/jeasonlzy', 'Connection': 'Keep-Alive',
'Content-Type': 'application/x-www-form-urlencoded'}
def homeContent(self, filter):
cateManual = {"演员": "actor", "分类": "avsearch", }
classes = []
for k in cateManual:
classes.append({'type_name': k, 'type_id': cateManual[k]})
j = {'code': 'homePage', 'mod': 'down', 'channel': 'self', 'via': 'agent', 'bundleId': 'com.tvlutv',
'app_type': 'rn', 'os_version': '12.0.5', 'version': '3.2.3', 'oauth_type': 'android_rn',
'oauth_id': self.id}
body = self.aes(j)
data = self.post(f'{self.host}/api.php?t={str(int(time.time() * 1000))}', data=body, headers=self.headers).json()['data']
data1 = self.aes(data, False)['data']
self.r = data1['r']
for i, d in enumerate(data1['avTag']):
# if i == 4:
# break
classes.append({'type_name': d['name'], 'type_id': d['tag']})
resutl = {}
resutl["class"] = classes
return resutl
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
id = tid.split("@@")
result = {}
result["page"] = pg
result["pagecount"] = 9999
result["limit"] = 90
result["total"] = 999999
if id[0] == 'avsearch':
if pg == '1':
j = {'code': 'avsearch', 'mod': 'search', 'channel': 'self', 'via': 'agent', 'bundleId': 'com.tvlutv',
'app_type': 'rn', 'os_version': '12.0.5', 'version': '3.2.3', 'oauth_type': 'android_rn',
'oauth_id': self.id}
if len(id) > 1:
j = {'code': 'find', 'mod': 'tag', 'channel': 'self', 'via': 'agent', 'bundleId': 'com.tvlutv',
'app_type': 'rn', 'os_version': '12.0.5', 'version': '3.2.3', 'oauth_type': 'android_rn',
'oauth_id': self.id, 'type': 'av', 'dis': 'new', 'page': str(pg), 'tag': id[1]}
elif id[0] == 'actor':
j = {'mod': 'actor', 'channel': 'self', 'via': 'agent', 'bundleId': 'com.tvlutv', 'app_type': 'rn',
'os_version': '12.0.5', 'version': '3.2.3', 'oauth_type': 'android_rn', 'oauth_id': self.id,
'page': str(pg), 'filter': ''}
if len(id) > 1:
j = {'code': 'eq', 'mod': 'actor', 'channel': 'self', 'via': 'agent', 'bundleId': 'com.tvlutv',
'app_type': 'rn', 'os_version': '12.0.5', 'version': '3.2.3', 'oauth_type': 'android_rn',
'oauth_id': self.id, 'page': str(pg), 'id': id[1], 'actor': id[2]}
else:
j = {'code': 'search', 'mod': 'av', 'channel': 'self', 'via': 'agent', 'bundleId': 'com.tvlutv',
'app_type': 'rn', 'os_version': '12.0.5', 'version': '3.2.3', 'oauth_type': 'android_rn',
'oauth_id': self.id, 'page': str(pg), 'tag': id[0]}
body = self.aes(j)
data = self.post(f'{self.host}/api.php?t={str(int(time.time() * 1000))}', data=body, headers=self.headers).json()['data']
data1 = self.aes(data, False)['data']
videos = []
if tid == 'avsearch' and len(id) == 1:
for item in data1:
videos.append({"vod_id": id[0] + "@@" + str(item.get('tags')), 'vod_name': item.get('name'),
'vod_pic': self.imgs(item.get('ico')), 'vod_tag': 'folder',
'style': {"type": "rect", "ratio": 1.33}})
elif tid == 'actor' and len(id) == 1:
for item in data1:
videos.append({"vod_id": id[0] + "@@" + str(item.get('id')) + "@@" + item.get('name'),
'vod_name': item.get('name'), 'vod_pic': self.imgs(item.get('cover')),
'vod_tag': 'folder', 'style': {"type": "oval"}})
else:
for item in data1:
if item.get('_id'):
videos.append({"vod_id": str(item.get('id')), 'vod_name': item.get('title'),
'vod_pic': self.imgs(item.get('cover_thumb') or item.get('cover_full')),
'vod_remarks': item.get('good'), 'style': {"type": "rect", "ratio": 1.33}})
result["list"] = videos
return result
def detailContent(self, ids):
id = ids[0]
j = {'code': 'detail', 'mod': 'av', 'channel': 'self', 'via': 'agent', 'bundleId': 'com.tvlutv',
'app_type': 'rn', 'os_version': '12.0.5', 'version': '3.2.3', 'oauth_type': 'android_rn',
'oauth_id': self.id, 'id': id}
body = self.aes(j)
data = self.post(f'{self.host}/api.php?t={str(int(time.time() * 1000))}', data=body, headers=self.headers).json()['data']
data1 = self.aes(data, False)['line']
vod = {}
play = []
for itt in data1:
a = itt['line'].get('s720')
if a:
b = a.split('.')
b[0] = 'https://m3u8'
a = '.'.join(b)
play.append(itt['info']['tips'] + "$" + a)
break
vod["vod_play_from"] = 'LAV'
vod["vod_play_url"] = "#".join(play)
result = {"list": [vod]}
return result
def searchContent(self, key, quick, pg="1"):
pass
def playerContent(self, flag, id, vipFlags):
url = self.getProxyUrl() + "&url=" + b64encode(id.encode('utf-8')).decode('utf-8') + "&type=m3u8"
self.hh = {'User-Agent': 'dd', 'Connection': 'Keep-Alive', 'Referer': self.r}
result = {}
result["parse"] = 0
result["url"] = url
result["header"] = self.hh
return result
def localProxy(self, param):
url = param["url"]
if param.get('type') == "m3u8":
return self.vod(b64decode(url).decode('utf-8'))
else:
return self.img(url)
def vod(self, url):
data = self.fetch(url, headers=self.hh).text
key = bytes.fromhex("13d47399bda541b85e55830528d4e66f1791585b2d2216f23215c4c63ebace31")
iv = bytes.fromhex(data[:32])
data = data[32:]
cipher = AES.new(key, AES.MODE_CFB, iv, segment_size=128)
data_bytes = bytes.fromhex(data)
decrypted = cipher.decrypt(data_bytes)
encoded = decrypted.decode("utf-8").replace("\x08", "")
return [200, "application/vnd.apple.mpegur", encoded]
def imgs(self, url):
return self.getProxyUrl() + '&url=' + url
def img(self, url):
type = url.split('.')[-1]
data = self.fetch(url).text
key = bytes.fromhex("ba78f184208d775e1553550f2037f4af22cdcf1d263a65b4d5c74536f084a4b2")
iv = bytes.fromhex(data[:32])
data = data[32:]
cipher = AES.new(key, AES.MODE_CFB, iv, segment_size=128)
data_bytes = bytes.fromhex(data)
decrypted = cipher.decrypt(data_bytes)
return [200, f"image/{type}", decrypted]
def ms(self, data, m=False):
h = MD5.new()
if m:
h = SHA256.new()
h.update(data.encode('utf-8'))
return h.hexdigest()
def aes(self, data, operation=True):
key = bytes.fromhex("620f15cfdb5c79c34b3940537b21eda072e22f5d7151456dec3932d7a2b22c53")
t = str(int(time.time()))
ivt = self.ms(t)
if operation:
data = json.dumps(data, separators=(',', ':'))
iv = bytes.fromhex(ivt)
else:
iv = bytes.fromhex(data[:32])
data = data[32:]
cipher = AES.new(key, AES.MODE_CFB, iv, segment_size=128)
if operation:
data_bytes = data.encode('utf-8')
encrypted = cipher.encrypt(data_bytes)
ep = f'{ivt}{encrypted.hex()}'
edata = f"data={ep}&timestamp={t}0d27dfacef1338483561a46b246bf36d"
sign = self.ms(self.ms(edata, True))
edata = f"timestamp={t}&data={ep}&sign={sign}"
return edata
else:
data_bytes = bytes.fromhex(data)
decrypted = cipher.decrypt(data_bytes)
return json.loads(decrypted.decode('utf-8'))

View File

@@ -0,0 +1,362 @@
# -*- coding: utf-8 -*-
#author:嗷呜群fans&claude4⚡
import json
import sys
import re
import time
from base64 import b64encode
from urllib.parse import urljoin, urlencode
import requests
from pyquery import PyQuery as pq
from requests import Session
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
try:
self.proxies = json.loads(extend) if extend else {}
except:
self.proxies = {}
if isinstance(self.proxies, dict) and 'proxy' in self.proxies and isinstance(self.proxies['proxy'], dict):
self.proxies = self.proxies['proxy']
fixed = {}
for k, v in (self.proxies or {}).items():
if isinstance(v, str) and not v.startswith('http'):
fixed[k] = f'http://{v}'
else:
fixed[k] = v
self.proxies = fixed
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:142.0) Gecko/20100101 Firefox/142.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.3,en;q=0.2',
'Referer': 'https://gayvidsclub.com/',
'Origin': 'https://gayvidsclub.com',
}
self.host = "https://gayvidsclub.com"
self.session = Session()
self.session.proxies.update(self.proxies)
self.session.headers.update(self.headers)
def getName(self):
return "GayVidsClub"
def isVideoFormat(self, url):
return '.m3u8' in url or '.mp4' in url
def manualVideoCheck(self):
return True
def destroy(self):
pass
def homeContent(self, filter):
result = {}
cateManual = {
"最新": "/all-gay-porn/",
"COAT": "/all-gay-porn/coat/",
"MEN'S RUSH.TV": "/all-gay-porn/mens-rush-tv/",
"HUNK CHANNEL": "/all-gay-porn/hunk-channel/",
"KO": "/all-gay-porn/ko/",
"EXFEED": "/all-gay-porn/exfeed/",
"BRAVO!": "/all-gay-porn/bravo/",
"STR8BOYS": "/all-gay-porn/str8boys/",
"G-BOT": "/all-gay-porn/g-bot/"
}
classes = []
filters = {}
for k in cateManual:
classes.append({
'type_name': k,
'type_id': cateManual[k]
})
result['class'] = classes
result['filters'] = filters
return result
def homeVideoContent(self):
data = self.fetchPage("/")
vlist = self.getlist(data("article"))
if not vlist:
data = self.fetchPage('/all-gay-porn/')
vlist = self.getlist(data("article"))
return {'list': vlist}
def categoryContent(self, tid, pg, filter, extend):
result = {}
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
if pg == 1:
url = tid
else:
url = f"{tid}page/{pg}/"
data = self.fetchPage(url)
vdata = self.getlist(data("article"))
result['list'] = vdata
return result
def detailContent(self, ids):
data = self.fetchPage(ids[0])
title = data('h1').text().strip()
iframe_src = None
iframe_elem = data('iframe')
if iframe_elem:
iframe_src = iframe_elem.attr('src')
if not iframe_src:
scripts = data('script')
for script in scripts.items():
script_text = script.text()
if 'iframe' in script_text and 'src' in script_text:
matches = re.findall(r'iframe.*?src=[\'"](https?://[^\'"]+)[\'"]', script_text)
if matches:
iframe_src = matches[0]
break
# 获取海报图片 - 确保使用横版图片
vod_pic = ""
img_elem = data('img')
if img_elem:
vod_pic = img_elem.attr('src')
# 确保使用横版海报图
if vod_pic and ('poster' in vod_pic or 'cover' in vod_pic):
# 已经是横版图片,不做处理
pass
elif vod_pic:
# 尝试转换为横版图片
vod_pic = self.ensure_horizontal_poster(vod_pic)
vod = {
'vod_name': title,
'vod_content': 'GayVidsClub视频',
'vod_tag': 'GayVidsClub',
'vod_pic': vod_pic, # 添加海报图片
'vod_play_from': 'GayVidsClub',
'vod_play_url': ''
}
play_lines = []
if iframe_src:
if not iframe_src.startswith('http'):
iframe_src = urljoin(self.host, iframe_src)
play_lines.append(f"直连${self.e64(iframe_src)}")
play_lines.append(f"嗅探${self.e64(ids[0])}")
if iframe_src:
play_lines.append(f"阿里云盘解析${self.e64(iframe_src)}")
play_lines.append(f"夸克网盘解析${self.e64(iframe_src)}")
play_lines.append(f"115网盘解析${self.e64(iframe_src)}")
play_lines.append(f"迅雷解析${self.e64(iframe_src)}")
play_lines.append(f"PikPak解析${self.e64(iframe_src)}")
play_lines.append(f"手机推送${iframe_src}")
else:
fallback_url = ids[0]
play_lines.append(f"阿里云盘解析${self.e64(fallback_url)}")
play_lines.append(f"夸克网盘解析${self.e64(fallback_url)}")
play_lines.append(f"115网盘解析${self.e64(fallback_url)}")
play_lines.append(f"迅雷解析${self.e64(fallback_url)}")
play_lines.append(f"PikPak解析${self.e64(fallback_url)}")
play_lines.append(f"手机推送${fallback_url}")
vod['vod_play_url'] = '#'.join(play_lines)
return {'list': [vod]}
def searchContent(self, key, quick, pg="1"):
if pg == 1:
url = f"/?s={key}"
else:
url = f"/page/{pg}/?s={key}"
data = self.fetchPage(url)
return {'list': self.getlist(data("article")), 'page': pg}
def playerContent(self, flag, id, vipFlags):
url = self.d64(id)
if "直连" in flag:
return {'parse': 0, 'url': url, 'header': self.headers}
elif "嗅探" in flag:
return {'parse': 1, 'url': url, 'header': self.headers}
elif "阿里云盘解析" in flag:
return self.parse_with_aliyun(url)
elif "夸克网盘解析" in flag:
return self.parse_with_quark(url)
elif "115网盘解析" in flag:
return self.parse_with_115(url)
elif "迅雷解析" in flag:
return self.parse_with_thunder(url)
elif "PikPak解析" in flag:
return self.parse_with_pikpak(url)
elif "手机推送" in flag:
return {'parse': 1, 'url': url, 'header': self.headers}
else:
return {'parse': 1, 'url': url, 'header': self.headers}
def fetchPage(self, url):
if not url.startswith('http'):
url = urljoin(self.host, url)
response = self.session.get(url)
return pq(response.text)
def getlist(self, items):
vlist = []
for item in items.items():
vid = item.find('a').attr('href')
img = item.find('img').attr('src')
name = item.find('h2').text()
if not name:
name = item.find('h3').text()
# 确保使用横版海报图
if img:
if '?' in img:
img = img.split('?')[0]
# 确保使用横版图片
img = self.ensure_horizontal_poster(img)
vlist.append({
'vod_id': vid,
'vod_name': name,
'vod_pic': img,
'vod_remarks': '',
'style': {'type': 'rect', 'ratio': 1.33} # 添加横版样式
})
return vlist
def ensure_horizontal_poster(self, img_url):
"""
确保使用横版海报图片
"""
if not img_url:
return img_url
# 如果已经是横版图片,直接返回
if 'poster' in img_url or 'cover' in img_url:
return img_url
# 尝试转换为横版图片
# 常见的竖版图片标识
vertical_indicators = ['thumb', 'vertical', 'portrait', 'square']
# 常见的横版图片标识
horizontal_indicators = ['poster', 'cover', 'horizontal', 'landscape']
# 检查是否是竖版图片
is_vertical = any(indicator in img_url for indicator in vertical_indicators)
if is_vertical:
# 尝试转换为横版图片
for v_indicator in vertical_indicators:
for h_indicator in horizontal_indicators:
if v_indicator in img_url:
# 替换竖版标识为横版标识
new_url = img_url.replace(v_indicator, h_indicator)
# 检查新URL是否有效
try:
response = self.session.head(new_url, timeout=3)
if response.status_code == 200:
return new_url
except:
continue
# 如果无法转换,尝试添加横版参数
if '?' in img_url:
new_url = img_url + '&type=horizontal'
else:
new_url = img_url + '?type=horizontal'
return new_url
return img_url
def e64(self, data):
return b64encode(data.encode()).decode()
def d64(self, data):
from base64 import b64decode
return b64decode(data).decode()
def parse_with_aliyun(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': 'aliyun',
'message': '使用阿里云盘解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}
def parse_with_quark(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': 'quark',
'message': '使用夸克网盘解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}
def parse_with_115(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': '115',
'message': '使用115网盘解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}
def parse_with_thunder(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': 'thunder',
'message': '使用迅雷解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}
def parse_with_pikpak(self, url):
try:
parse_result = {
'parse': 1,
'url': url,
'header': self.headers,
'parse_type': 'pikpak',
'message': '使用PikPak解析服务'
}
return parse_result
except Exception as e:
return {'parse': 1, 'url': url, 'header': self.headers}

View File

@@ -0,0 +1,243 @@
# coding=utf-8
# !/usr/bin/python
import sys, re
import base64
import hashlib
import requests
from typing import Tuple
from base.spider import Spider
from datetime import datetime, timedelta
from urllib.parse import quote, unquote
from urllib3.util.retry import Retry
sys.path.append('..')
# 搜索用户名,关键词格式为“类别+空格+关键词”
# 类别在标签上已注明比如“女主播g”则搜索类别为“g”
# 搜索“g per”则在“女主播”中搜索“per”, 关键词不区分大小写但至少3位否则空结果
class Spider(Spider):
def init(self, extend="{}"):
origin = 'https://zh.stripchat.com'
self.host = origin
self.headers = {
'Origin': origin,
'Referer': f"{origin}/",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:144.0) Gecko/20100101 Firefox/144.0'
}
self.stripchat_key = self.decode_key_compact()
# 缓存字典
self._hash_cache = {}
self.create_session_with_retry()
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
def homeContent(self, filter):
CLASSES = [{'type_name': '女主播g', 'type_id': 'girls'}, {'type_name': '情侣c', 'type_id': 'couples'}, {'type_name': '男主播m', 'type_id': 'men'}, {'type_name': '跨性别t', 'type_id': 'trans'}]
VALUE = ({'n': '中国', 'v': 'tagLanguageChinese'}, {'n': '亚洲', 'v': 'ethnicityAsian'}, {'n': '白人', 'v': 'ethnicityWhite'}, {'n': '拉丁', 'v': 'ethnicityLatino'}, {'n': '混血', 'v': 'ethnicityMultiracial'}, {'n': '印度', 'v': 'ethnicityIndian'}, {'n': '阿拉伯', 'v': 'ethnicityMiddleEastern'}, {'n': '黑人', 'v': 'ethnicityEbony'})
VALUE_MEN = ({'n': '情侣', 'v': 'sexGayCouples'}, {'n': '直男', 'v': 'orientationStraight'})
TIDS = ('girls', 'couples', 'men', 'trans')
filters = {
tid: [{'key': 'tag', 'value': VALUE_MEN + VALUE if tid == 'men' else VALUE}]
for tid in TIDS
}
return {
'class': CLASSES,
'filters': filters
}
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
limit = 60
offset = limit * (int(pg) - 1)
url = f"{self.host}/api/front/models?improveTs=false&removeShows=false&limit={limit}&offset={offset}&primaryTag={tid}&sortBy=stripRanking&rcmGrp=A&rbCnGr=true&prxCnGr=false&nic=false"
if 'tag' in extend:
url += "&filterGroupTags=%5B%5B%22" + extend['tag'] + "%22%5D%5D"
rsp = self.fetch(url).json()
videos = [
{
"vod_id": str(vod['username']).strip(),
"vod_name": f"{self.country_code_to_flag(str(vod['country']).strip())}{str(vod['username']).strip()}",
"vod_pic": f"https://img.doppiocdn.net/thumbs/{vod['snapshotTimestamp']}/{vod['id']}",
"vod_remarks": "" if vod.get('status') == "public" else "🎫"
}
for vod in rsp.get('models', [])
]
total = int(rsp.get('filteredCount', 0))
return {
"list": videos,
"page": pg,
"pagecount": (total + limit - 1) // limit,
"limit": limit,
"total": total
}
def detailContent(self, array):
username = array[0]
rsp = self.fetch(f"{self.host}/api/front/v2/models/username/{username}/cam").json()
info = rsp['cam']
user = rsp['user']['user']
id = str(user['id'])
country = str(user['country']).strip()
isLive = "" if user['isLive'] else " 已下播"
flag = self.country_code_to_flag(country)
remark, startAt = '', ''
if show := info.get('show'):
startAt = show.get('createdAt')
elif show := info.get('groupShowAnnouncement'):
startAt = show.get('startAt')
if startAt:
BJtime = (datetime.strptime(startAt, "%Y-%m-%dT%H:%M:%SZ") + timedelta(hours=8)).strftime("%m月%d%H:%M")
remark = f"🎫 始于 {BJtime}"
vod = {
"vod_id": id,
"vod_name": str(info['topic']).strip(),
"vod_pic": str(user['avatarUrl']),
"vod_director": f"{flag}{username}{isLive}",
"vod_remarks": remark,
'vod_play_from': 'StripChat',
'vod_play_url': f"{id}${id}"
}
return {'list': [vod]}
def process_key(self, key: str) -> Tuple[str, str]:
tags = {'G': 'girls', 'C': 'couples', 'M': 'men', 'T': 'trans'}
parts = key.split(maxsplit=1) # 仅分割第一个空格
if len(parts) > 1 and (tag := tags.get(parts[0].upper())):
return tag, parts[1].strip()
return 'girls', key.strip()
def searchContent(self, key, quick, pg="1"):
result = {}
if int(pg) > 1:
return result
tag, key = self.process_key(key)
url = f"{self.host}/api/front/v4/models/search/group/username?query={key}&limit=900&primaryTag={tag}"
rsp = self.fetch(url).json()
result['list'] = [
{
"vod_id": str(user['username']).strip(),
"vod_name": f"{self.country_code_to_flag(str(user['country']).strip())}{user['username']}",
"vod_pic": f"https://img.doppiocdn.net/thumbs/{user['snapshotTimestamp']}/{user['id']}",
"vod_remarks": "" if user['status'] == "public" else "🎫"
}
for user in rsp.get('models', [])
if user['isLive'] # 过滤条件
]
return result
def playerContent(self, flag, id, vipFlags):
url = f"https://edge-hls.doppiocdn.net/hls/{id}/master/{id}_auto.m3u8?playlistType=lowLatency"
rsp = self.fetch(url)
lines = rsp.text.strip().split('\n')
psch, pkey = '', ''
url = []
for i, line in enumerate(lines):
if line.startswith('#EXT-X-MOUFLON:'):
if parts := line.split(':'):
if len(parts) >= 4:
psch, pkey = parts[2], parts[3]
if '#EXT-X-STREAM-INF' in line:
name_start = line.find('NAME="') + 6
name_end = line.find('"', name_start)
qn = line[name_start:name_end]
# URL在下一行
url_base = lines[i + 1]
# 组合最终的URL并加上psch和pkey参数
full_url = f"{url_base}&psch={psch}&pkey={pkey}"
proxy_url = f"{self.getProxyUrl()}&url={quote(full_url)}"
# 将画质和URL添加到列表中
url.extend([qn, proxy_url])
return {
"url": url,
"parse": '0',
"contentType": '',
"header": self.headers
}
def localProxy(self, param):
url = unquote(param['url'])
data = self.fetch(url)
if data.status_code == 403:
data = self.fetch(re.sub(r'\d+p\d*\.m3u8', '160p_blurred.m3u8', url))
if data.status_code != 200:
return [404, "text/plain", ""]
data = data.text
if "#EXT-X-MOUFLON:FILE" in data:
data = self.process_m3u8_content_v2(data)
return [200, "application/vnd.apple.mpegur", data]
def process_m3u8_content_v2(self, m3u8_content):
lines = m3u8_content.strip().split('\n')
for i, line in enumerate(lines):
if (line.startswith('#EXT-X-MOUFLON:FILE:') and 'media.mp4' in lines[i + 1]):
encrypted_data = line.split(':', 2)[2].strip()
try:
decrypted_data = self.decrypt(encrypted_data, self.stripchat_key)
except Exception as e:
decrypted_data = self.decrypt(encrypted_data, "Zokee2OhPh9kugh4")
lines[i + 1] = lines[i + 1].replace('media.mp4', decrypted_data)
return '\n'.join(lines)
def country_code_to_flag(self, country_code):
if len(country_code) != 2 or not country_code.isalpha():
return country_code
flag_emoji = ''.join([chr(ord(c.upper()) - ord('A') + 0x1F1E6) for c in country_code])
return flag_emoji
def decode_key_compact(self):
base64_str = "NTEgNzUgNjUgNjEgNmUgMzQgNjMgNjEgNjkgMzkgNjIgNmYgNGEgNjEgMzUgNjE="
decoded = base64.b64decode(base64_str).decode('utf-8')
key_bytes = bytes(int(hex_str, 16) for hex_str in decoded.split(" "))
return key_bytes.decode('utf-8')
def compute_hash(self, key: str) -> bytes:
"""计算并缓存SHA-256哈希"""
if key not in self._hash_cache:
sha256 = hashlib.sha256()
sha256.update(key.encode('utf-8'))
self._hash_cache[key] = sha256.digest()
return self._hash_cache[key]
def decrypt(self, encrypted_b64: str, key: str) -> str:
# 修复Base64填充
padding = len(encrypted_b64) % 4
if padding:
encrypted_b64 += '=' * (4 - padding)
# 计算哈希并解密
hash_bytes = self.compute_hash(key)
encrypted_data = base64.b64decode(encrypted_b64)
# 异或解密
decrypted_bytes = bytearray()
for i, cipher_byte in enumerate(encrypted_data):
key_byte = hash_bytes[i % len(hash_bytes)]
decrypted_bytes.append(cipher_byte ^ key_byte)
return decrypted_bytes.decode('utf-8')
def create_session_with_retry(self):
self.session = requests.Session()
retry_strategy = Retry(
total = 3,
backoff_factor = 0.3,
status_forcelist = [429, 500, 502, 503, 504] # 需要重试的状态码
)
adapter = requests.adapters.HTTPAdapter(max_retries=retry_strategy)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)
def fetch(self, url):
return self.session.get(url, headers=self.headers, timeout=10)

View File

@@ -0,0 +1,716 @@
# -*- coding: utf-8 -*-
# 🌈 Love
import json
import random
import re
import sys
import threading
import time
from base64 import b64decode, b64encode
from urllib.parse import urlparse, quote
import requests
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
try:self.proxies = json.loads(extend)
except:self.proxies = {}
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cache-Control': 'no-cache',
}
# Use working dynamic URLs directly
self.host = self.get_working_host()
self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
self.log(f"使用站点: {self.host}")
print(f"使用站点: {self.host}")
pass
def getName(self):
return "🌈 今日看料"
def isVideoFormat(self, url):
# Treat direct media formats as playable without parsing
return any(ext in (url or '') for ext in ['.m3u8', '.mp4', '.ts'])
def manualVideoCheck(self):
return False
def destroy(self):
pass
def homeContent(self, filter):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'class': [], 'list': []}
data = self.getpq(response.text)
result = {}
classes = []
# 优先从导航栏获取分类
nav_selectors = [
'#navbarCollapse .navbar-nav .nav-item .nav-link',
'.navbar-nav .nav-item .nav-link',
'#nav .menu-item a',
'.menu .menu-item a'
]
found_categories = False
for selector in nav_selectors:
for item in data(selector).items():
href = item.attr('href') or ''
name = item.text().strip()
# 过滤掉非分类链接
if (not href or not name or
href == '#' or
href.startswith('http') or
'about' in href.lower() or
'contact' in href.lower() or
'tags' in href.lower() or
'top' in href.lower() or
'start' in href.lower() or
'time' in href.lower()):
continue
# 确保是分类链接包含category或明确的分类路径
if '/category/' in href or any(cat in href for cat in ['/dy/', '/ks/', '/douyu/', '/hy/', '/hj/', '/tt/', '/wh/', '/asmr/', '/xb/', '/xsp/', '/rdgz/']):
# 处理相对路径
if href.startswith('/'):
type_id = href
else:
type_id = f'/{href}'
classes.append({
'type_name': name,
'type_id': type_id
})
found_categories = True
# 如果导航栏没找到,尝试从分类下拉菜单获取
if not found_categories:
category_selectors = [
'.category-list a',
'.slide-toggle + .category-list a',
'.menu .category-list a'
]
for selector in category_selectors:
for item in data(selector).items():
href = item.attr('href') or ''
name = item.text().strip()
if href and name and href != '#':
if href.startswith('/'):
type_id = href
else:
type_id = f'/{href}'
classes.append({
'type_name': name,
'type_id': type_id
})
found_categories = True
# 去重
unique_classes = []
seen_ids = set()
for cls in classes:
if cls['type_id'] not in seen_ids:
unique_classes.append(cls)
seen_ids.add(cls['type_id'])
# 如果没有找到分类,创建默认分类
if not unique_classes:
unique_classes = [
{'type_name': '热点关注', 'type_id': '/category/rdgz/'},
{'type_name': '抖音', 'type_id': '/category/dy/'},
{'type_name': '快手', 'type_id': '/category/ks/'},
{'type_name': '斗鱼', 'type_id': '/category/douyu/'},
{'type_name': '虎牙', 'type_id': '/category/hy/'},
{'type_name': '花椒', 'type_id': '/category/hj/'},
{'type_name': '推特', 'type_id': '/category/tt/'},
{'type_name': '网红', 'type_id': '/category/wh/'},
{'type_name': 'ASMR', 'type_id': '/category/asmr/'},
{'type_name': 'X播', 'type_id': '/category/xb/'},
{'type_name': '小视频', 'type_id': '/category/xsp/'}
]
result['class'] = unique_classes
result['list'] = self.getlist(data('#index article a, #archive article a'))
return result
except Exception as e:
print(f"homeContent error: {e}")
return {'class': [], 'list': []}
def homeVideoContent(self):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': []}
data = self.getpq(response.text)
return {'list': self.getlist(data('#index article a, #archive article a'))}
except Exception as e:
print(f"homeVideoContent error: {e}")
return {'list': []}
def categoryContent(self, tid, pg, filter, extend):
try:
# 修复URL构建 - 去除多余的斜杠
base_url = tid.lstrip('/').rstrip('/')
if pg and pg != '1':
url = f"{self.host}{base_url}/{pg}/"
else:
url = f"{self.host}{base_url}/"
print(f"分类页面URL: {url}")
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
print(f"分类页面请求失败: {response.status_code}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a, .post-card'), tid)
# 如果没有找到视频,尝试其他选择器
if not videos:
videos = self.getlist(data('article a, .post a, .entry-title a'), tid)
print(f"找到 {len(videos)} 个视频")
# 改进的页数检测逻辑
pagecount = self.detect_page_count(data, pg)
result = {}
result['list'] = videos
result['page'] = pg
result['pagecount'] = pagecount
result['limit'] = 90
result['total'] = 999999
return result
except Exception as e:
print(f"categoryContent error: {e}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
def tagContent(self, tid, pg, filter, extend):
"""标签页面内容"""
try:
# 修复URL构建 - 去除多余的斜杠
base_url = tid.lstrip('/').rstrip('/')
if pg and pg != '1':
url = f"{self.host}{base_url}/{pg}/"
else:
url = f"{self.host}{base_url}/"
print(f"标签页面URL: {url}")
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
print(f"标签页面请求失败: {response.status_code}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a, .post-card'), tid)
# 如果没有找到视频,尝试其他选择器
if not videos:
videos = self.getlist(data('article a, .post a, .entry-title a'), tid)
print(f"找到 {len(videos)} 个标签相关视频")
# 页数检测
pagecount = self.detect_page_count(data, pg)
result = {}
result['list'] = videos
result['page'] = pg
result['pagecount'] = pagecount
result['limit'] = 90
result['total'] = 999999
return result
except Exception as e:
print(f"tagContent error: {e}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
def detect_page_count(self, data, current_page):
"""改进的页数检测方法"""
pagecount = 99999 # 默认大数字,允许无限翻页
# 方法1: 检查分页器中的所有页码链接
page_numbers = []
# 查找所有可能的页码链接
page_selectors = [
'.page-navigator a',
'.pagination a',
'.pages a',
'.page-numbers a'
]
for selector in page_selectors:
for page_link in data(selector).items():
href = page_link.attr('href') or ''
text = page_link.text().strip()
# 从href中提取页码
if href:
# 匹配 /category/dy/2/ 这种格式
match = re.search(r'/(\d+)/?$', href.rstrip('/'))
if match:
page_num = int(match.group(1))
if page_num not in page_numbers:
page_numbers.append(page_num)
# 从文本中提取数字页码
if text and text.isdigit():
page_num = int(text)
if page_num not in page_numbers:
page_numbers.append(page_num)
# 如果有找到页码,取最大值
if page_numbers:
max_page = max(page_numbers)
print(f"从分页器检测到最大页码: {max_page}")
return max_page
# 方法2: 检查是否存在"下一页"按钮
next_selectors = [
'.page-navigator .next',
'.pagination .next',
'.next-page',
'a:contains("下一页")'
]
for selector in next_selectors:
if data(selector):
print("检测到下一页按钮,允许继续翻页")
return 99999
# 方法3: 如果当前页视频数量很少,可能没有下一页
if len(data('#archive article, #index article, .post-card')) < 5:
print("当前页内容较少,可能没有下一页")
return int(current_page)
print("使用默认页数: 99999")
return 99999
def detailContent(self, ids):
try:
url = f"{self.host}{ids[0]}" if not ids[0].startswith('http') else ids[0]
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [{'vod_play_from': '今日看料', 'vod_play_url': f'页面加载失败${url}'}]}
data = self.getpq(response.text)
vod = {'vod_play_from': '今日看料'}
# 获取标题
title_selectors = ['.post-title', 'h1.entry-title', 'h1', '.post-card-title']
for selector in title_selectors:
title_elem = data(selector)
if title_elem:
vod['vod_name'] = title_elem.text().strip()
break
if 'vod_name' not in vod:
vod['vod_name'] = '今日看料视频'
# 获取内容/描述
try:
clist = []
if data('.tags .keywords a'):
for k in data('.tags .keywords a').items():
title = k.text()
href = k.attr('href')
if title and href:
# 使href相对路径
if href.startswith(self.host):
href = href.replace(self.host, '')
clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
vod['vod_content'] = ' '.join(clist) if clist else data('.post-content').text() or vod['vod_name']
except:
vod['vod_content'] = vod['vod_name']
# 获取视频URLs
try:
plist = []
used_names = set()
# 查找DPlayer视频
if data('.dplayer'):
for c, k in enumerate(data('.dplayer').items(), start=1):
config_attr = k.attr('data-config')
if config_attr:
try:
config = json.loads(config_attr)
video_url = config.get('video', {}).get('url', '')
if video_url:
name = f"视频{c}"
count = 2
while name in used_names:
name = f"视频{c}_{count}"
count += 1
used_names.add(name)
self.log(f"解析到视频: {name} -> {video_url}")
print(f"解析到视频: {name} -> {video_url}")
plist.append(f"{name}${video_url}")
except:
continue
# 查找视频标签
if not plist:
video_selectors = ['video source', 'video', 'iframe[src*="video"]', 'a[href*=".m3u8"]', 'a[href*=".mp4"]']
for selector in video_selectors:
for c, elem in enumerate(data(selector).items(), start=1):
src = elem.attr('src') or elem.attr('href') or ''
if src and any(ext in src for ext in ['.m3u8', '.mp4', 'video']):
name = f"视频{c}"
count = 2
while name in used_names:
name = f"视频{c}_{count}"
count += 1
used_names.add(name)
plist.append(f"{name}${src}")
if plist:
self.log(f"拼装播放列表,共{len(plist)}")
print(f"拼装播放列表,共{len(plist)}")
vod['vod_play_url'] = '#'.join(plist)
else:
vod['vod_play_url'] = f"正片${url}"
except Exception as e:
print(f"视频解析错误: {e}")
vod['vod_play_url'] = f"正片${url}"
return {'list': [vod]}
except Exception as e:
print(f"detailContent error: {e}")
return {'list': [{'vod_play_from': '今日看料', 'vod_play_url': f'详情页加载失败${ids[0] if ids else ""}'}]}
def searchContent(self, key, quick, pg="1"):
try:
# 优先使用标签搜索
encoded_key = quote(key)
url = f"{self.host}/tag/{encoded_key}/{pg}" if pg != "1" else f"{self.host}/tag/{encoded_key}/"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
# 尝试搜索页面
url = f"{self.host}/search/{encoded_key}/{pg}" if pg != "1" else f"{self.host}/search/{encoded_key}/"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a, .post-card'))
# 使用改进的页数检测方法
pagecount = self.detect_page_count(data, pg)
return {'list': videos, 'page': pg, 'pagecount': pagecount}
except Exception as e:
print(f"searchContent error: {e}")
return {'list': [], 'page': pg}
def getTagsContent(self, pg="1"):
"""获取标签页面内容"""
try:
url = f"{self.host}/tags.html"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg}
data = self.getpq(response.text)
tags = []
# 从标签页面提取所有标签 - 使用更宽松的选择器
for tag_elem in data('a[href*="/tag/"]').items():
tag_name = tag_elem.text().strip()
tag_href = tag_elem.attr('href') or ''
if tag_name and tag_href and '/tag/' in tag_href and tag_name != '全部标签': # 排除标题链接
# 处理为相对路径
tag_id = tag_href.replace(self.host, '')
if not tag_id.startswith('/'):
tag_id = '/' + tag_id
tags.append({
'vod_id': tag_id,
'vod_name': f"🏷️ {tag_name}",
'vod_pic': '',
'vod_remarks': '标签',
'vod_tag': 'tag',
'style': {"type": "rect", "ratio": 1.33}
})
print(f"找到 {len(tags)} 个标签")
# 分页处理 - 标签页面通常不需要分页
result = {}
result['list'] = tags
result['page'] = pg
result['pagecount'] = 1 # 标签页面通常只有一页
result['limit'] = 999
result['total'] = len(tags)
return result
except Exception as e:
print(f"getTagsContent error: {e}")
return {'list': [], 'page': pg}
def playerContent(self, flag, id, vipFlags):
url = id
p = 1
if self.isVideoFormat(url):
if '.m3u8' in url:
url = self.proxy(url)
p = 0
self.log(f"播放请求: parse={p}, url={url}")
print(f"播放请求: parse={p}, url={url}")
return {'parse': p, 'url': url, 'header': self.headers}
def localProxy(self, param):
try:
if param.get('type') == 'img':
img_url = self.d64(param['url'])
if not img_url.startswith(('http://', 'https://')):
if img_url.startswith('/'):
img_url = f"{self.host}{img_url}"
else:
img_url = f"{self.host}/{img_url}"
res = requests.get(img_url, headers=self.headers, proxies=self.proxies, timeout=10)
return [200, res.headers.get('Content-Type', 'image/jpeg'), res.content]
elif param.get('type') == 'm3u8':
return self.m3Proxy(param['url'])
else:
return self.tsProxy(param['url'])
except Exception as e:
print(f"localProxy error: {e}")
return [500, "text/plain", f"Proxy error: {str(e)}".encode()]
def proxy(self, data, type='m3u8'):
if data and len(self.proxies):
return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
else:
return data
def m3Proxy(self, url):
try:
url = self.d64(url)
ydata = requests.get(url, headers=self.headers, proxies=self.proxies, allow_redirects=False)
data = ydata.content.decode('utf-8')
if ydata.headers.get('Location'):
url = ydata.headers['Location']
data = requests.get(url, headers=self.headers, proxies=self.proxies).content.decode('utf-8')
lines = data.strip().split('\n')
last_r = url[:url.rfind('/')]
parsed_url = urlparse(url)
durl = parsed_url.scheme + "://" + parsed_url.netloc
iskey = True
for index, string in enumerate(lines):
if iskey and 'URI' in string:
pattern = r'URI="([^"]*)"'
match = re.search(pattern, string)
if match:
lines[index] = re.sub(pattern, f'URI="{self.proxy(match.group(1), "mkey")}"', string)
iskey = False
continue
if '#EXT' not in string:
if 'http' not in string:
domain = last_r if string.count('/') < 2 else durl
string = domain + ('' if string.startswith('/') else '/') + string
lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
data = '\n'.join(lines)
return [200, "application/vnd.apple.mpegur", data]
except Exception as e:
print(f"m3Proxy error: {e}")
return [500, "text/plain", f"m3u8 proxy error: {str(e)}".encode()]
def tsProxy(self, url):
try:
url = self.d64(url)
data = requests.get(url, headers=self.headers, proxies=self.proxies, stream=True)
return [200, data.headers.get('Content-Type', 'video/mp2t'), data.content]
except Exception as e:
print(f"tsProxy error: {e}")
return [500, "text/plain", f"ts proxy error: {str(e)}".encode()]
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def get_working_host(self):
"""Get working host from known dynamic URLs"""
dynamic_urls = [
'https://kanliao2.one/',
'https://kanliao7.org/',
'https://kanliao7.net/',
'https://kanliao14.com/'
]
for url in dynamic_urls:
try:
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
if response.status_code == 200:
data = self.getpq(response.text)
articles = data('#index article a, #archive article a')
if len(articles) > 0:
self.log(f"选用可用站点: {url}")
print(f"选用可用站点: {url}")
return url
except Exception as e:
continue
self.log(f"未检测到可用站点,回退: {dynamic_urls[0]}")
print(f"未检测到可用站点,回退: {dynamic_urls[0]}")
return dynamic_urls[0]
def getlist(self, data, tid=''):
videos = []
for k in data.items():
a = k.attr('href')
b = k('h2').text() or k('.post-card-title').text() or k('.entry-title').text() or k.text()
c = k('span[itemprop="datePublished"]').text() or k('.post-meta, .entry-meta, time, .post-card-info').text()
# 过滤广告:检查是否包含"热搜HOT"标志
if self.is_advertisement(k):
print(f"过滤广告: {b}")
continue
if a and b and b.strip():
# 处理相对路径
if not a.startswith('http'):
if a.startswith('/'):
vod_id = a
else:
vod_id = f'/{a}'
else:
vod_id = a
videos.append({
'vod_id': vod_id,
'vod_name': b.replace('\n', ' ').strip(),
'vod_pic': self.get_article_img(k),
'vod_remarks': c.strip() if c else '',
'vod_tag': '',
'style': {"type": "rect", "ratio": 1.33}
})
return videos
def is_advertisement(self, article_elem):
"""判断是否为广告包含热搜HOT标志"""
# 检查.wraps元素是否包含"热搜HOT"文本
hot_elements = article_elem.find('.wraps')
for elem in hot_elements.items():
if '热搜HOT' in elem.text():
return True
# 检查标题是否包含广告关键词
title = article_elem('h2').text() or article_elem('.post-card-title').text() or ''
ad_keywords = ['热搜HOT', '手机链接', 'DNS设置', '修改DNS', 'WIFI设置']
if any(keyword in title for keyword in ad_keywords):
return True
# 检查背景颜色是否为广告特有的渐变背景
style = article_elem.attr('style') or ''
if 'background:' in style and any(gradient in style for gradient in ['-webkit-linear-gradient', 'linear-gradient']):
# 进一步检查是否包含特定的广告颜色组合
ad_gradients = ['#ec008c,#fc6767', '#ffe259,#ffa751']
if any(gradient in style for gradient in ad_gradients):
return True
return False
def get_article_img(self, article_elem):
"""从文章元素中提取图片,多种方式尝试"""
# 方式1: 从script标签中提取loadBannerDirect
script_text = article_elem('script').text()
if script_text:
match = re.search(r"loadBannerDirect\('([^']+)'", script_text)
if match:
url = match.group(1)
if not url.startswith(('http://', 'https://')):
if url.startswith('/'):
url = f"{self.host}{url}"
else:
url = f"{self.host}/{url}"
return f"{self.getProxyUrl()}&url={self.e64(url)}&type=img"
# 方式2: 从背景图片中提取
bg_elem = article_elem.find('.blog-background')
if bg_elem:
style = bg_elem.attr('style') or ''
bg_match = re.search(r'background-image:\s*url\(["\']?([^"\'\)]+)["\']?\)', style)
if bg_match:
img_url = bg_match.group(1)
if img_url and not img_url.startswith('data:'):
if not img_url.startswith(('http://', 'https://')):
if img_url.startswith('/'):
img_url = f"{self.host}{img_url}"
else:
img_url = f"{self.host}/{img_url}"
return f"{self.getProxyUrl()}&url={self.e64(img_url)}&type=img"
# 方式3: 从图片标签中提取
img_elem = article_elem.find('img')
if img_elem:
data_src = img_elem.attr('data-src')
if data_src:
if not data_src.startswith(('http://', 'https://')):
if data_src.startswith('/'):
data_src = f"{self.host}{data_src}"
else:
data_src = f"{self.host}/{data_src}"
return f"{self.getProxyUrl()}&url={self.e64(data_src)}&type=img"
src = img_elem.attr('src')
if src:
if not src.startswith(('http://', 'https://')):
if src.startswith('/'):
src = f"{self.host}{src}"
else:
src = f"{self.host}/{src}"
return f"{self.getProxyUrl()}&url={self.e64(src)}&type=img"
return ''
def getpq(self, data):
try:
return pq(data)
except Exception as e:
print(f"{str(e)}")
return pq(data.encode('utf-8'))

View File

@@ -0,0 +1,533 @@
import re
import sys
import urllib.parse
import threading
import time
import requests
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def __init__(self):
# 基础配置
self.name = '好色TV'
self.host = 'https://hsex.icu/'
self.candidate_hosts = [
"https://hsex.icu/",
"https://hsex1.icu/",
"https://hsex.tv/"
]
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Referer': self.host
}
self.timeout = 5000
# 分类映射关键修复视频分类url_suffix设为空适配list-{pg}.htm格式
self.class_map = {
'视频': {'type_id': 'list', 'url_suffix': ''}, # 修复点1视频分类后缀为空
'周榜': {'type_id': 'top7', 'url_suffix': 'top7'},
'月榜': {'type_id': 'top', 'url_suffix': 'top'},
'5分钟+': {'type_id': '5min', 'url_suffix': '5min'},
'10分钟+': {'type_id': 'long', 'url_suffix': 'long'}
}
def getName(self):
return self.name
def init(self, extend=""):
# 尝试获取最快可用域名
self.host = self.get_fastest_host()
self.headers['Referer'] = self.host
def isVideoFormat(self, url):
if not url:
return False
return any(fmt in url.lower() for fmt in ['.mp4', '.m3u8', '.flv', '.avi'])
def manualVideoCheck(self):
def check(url):
if not self.isVideoFormat(url):
return False
try:
resp = self.fetch(url, headers=self.headers, method='HEAD', timeout=3)
return resp.status_code in (200, 302) and 'video' in resp.headers.get('Content-Type', '')
except:
return False
return check
def get_fastest_host(self):
"""测试候选域名,返回最快可用的"""
results = {}
threads = []
def test_host(url):
try:
start_time = time.time()
resp = requests.head(url, headers=self.headers, timeout=2, allow_redirects=False)
if resp.status_code in (200, 301, 302):
delay = (time.time() - start_time) * 1000
results[url] = delay
else:
results[url] = float('inf')
except:
results[url] = float('inf')
for host in self.candidate_hosts:
t = threading.Thread(target=test_host, args=(host,))
threads.append(t)
t.start()
for t in threads:
t.join()
valid_hosts = [(h, d) for h, d in results.items() if d != float('inf')]
return valid_hosts[0][0] if valid_hosts else self.candidate_hosts[0]
def homeContent(self, filter):
result = {}
# 构造分类列表
classes = []
for name, info in self.class_map.items():
classes.append({
'type_name': name,
'type_id': info['type_id']
})
result['class'] = classes
try:
# 获取首页内容
html = self.fetch_with_retry(self.host, retry=2, timeout=5).text
data = pq(html)
# 提取视频列表
vlist = []
items = data('.row .col-xs-6.col-md-3')
for item in items.items():
try:
title = item('h5').text().strip()
if not title:
continue
# 提取图片URL
style = item('.image').attr('style') or ''
pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', style)
vod_pic = pic_match.group(1) if pic_match else ''
if vod_pic and not vod_pic.startswith('http'):
vod_pic = f"{self.host.rstrip('/')}/{vod_pic.lstrip('/')}"
# 提取时长备注
desc = item('.duration').text().strip() or '未知'
# 提取视频ID
href = item('a').attr('href') or ''
if not href:
continue
vod_id = href.split('/')[-1]
if not vod_id.endswith('.htm'):
vod_id += '.htm'
vlist.append({
'vod_id': vod_id,
'vod_name': title,
'vod_pic': vod_pic,
'vod_remarks': desc
})
except Exception as e:
print(f"解析首页视频项失败: {e}")
continue
result['list'] = vlist
except Exception as e:
print(f"首页解析失败: {e}")
result['list'] = []
return result
def homeVideoContent(self):
return []
def categoryContent(self, tid, pg, filter, extend):
result = {}
try:
# 匹配分类信息
cate_info = None
for name, info in self.class_map.items():
if info['type_id'] == tid:
cate_info = info
break
if not cate_info:
result['list'] = []
return result
# 关键修复区分视频分类与其他分类的URL格式
if tid == 'list': # 视频分类type_id为list
url = f"{self.host}list-{pg}.htm" # 格式list-1.htm、list-2.htm
else: # 其他分类(周榜/月榜等xxx_list-{pg}.htm
url = f"{self.host}{cate_info['url_suffix']}_list-{pg}.htm"
# 请求分类页
html = self.fetch(url, headers=self.headers, timeout=8).text
html = html.encode('utf-8', errors='ignore').decode('utf-8')
data = pq(html)
# 提取视频列表
vlist = []
items = data('.row .col-xs-6.col-md-3')
for item in items.items():
try:
title = item('h5').text().strip()
if not title:
continue
style = item('.image').attr('style') or ''
pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', style)
vod_pic = pic_match.group(1) if pic_match else ''
if vod_pic and not vod_pic.startswith('http'):
vod_pic = f"{self.host.rstrip('/')}/{vod_pic.lstrip('/')}"
desc = item('.duration').text().strip() or '未知'
href = item('a').attr('href') or ''
if not href:
continue
vod_id = href.split('/')[-1]
if not vod_id.endswith('.htm'):
vod_id += '.htm'
vlist.append({
'vod_id': vod_id,
'vod_name': title,
'vod_pic': vod_pic,
'vod_remarks': desc
})
except Exception as e:
print(f"解析分类视频项失败: {e}")
continue
# 提取总页数
pagecount = 1
try:
pagination = data('.pagination1 li a')
page_nums = []
for a in pagination.items():
text = a.text().strip()
if text.isdigit():
page_nums.append(int(text))
if page_nums:
pagecount = max(page_nums)
except:
pagecount = 1
result['list'] = vlist
result['page'] = pg
result['pagecount'] = pagecount
result['limit'] = len(vlist)
result['total'] = 999999
except Exception as e:
print(f"分类解析失败: {e}")
result['list'] = []
result['page'] = pg
result['pagecount'] = 1
result['limit'] = 0
result['total'] = 0
return result
def detailContent(self, ids):
try:
if not ids or not ids[0]:
return {'list': []}
vod_id = ids[0].strip()
if not vod_id.endswith('.htm'):
vod_id += '.htm'
url = f"{self.host}{vod_id.lstrip('/')}"
html = self.fetch_with_retry(url, retry=2, timeout=8).text
html = html.encode('utf-8', errors='ignore').decode('utf-8')
data = pq(html)
# 提取标题
title = data('.panel-title, .video-title, h1').text().strip() or '未知标题'
# 提取封面图
vod_pic = ''
poster_style = data('.vjs-poster').attr('style') or ''
pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', poster_style)
if pic_match:
vod_pic = pic_match.group(1)
if not vod_pic:
vod_pic = data('.video-pic img, .vjs-poster img, .thumbnail img').attr('src') or ''
if vod_pic and not vod_pic.startswith('http'):
vod_pic = f"{self.host}{vod_pic.lstrip('/')}"
# 提取时长和观看量
duration = '未知'
views = '未知'
info_items = data('.panel-body .col-md-3, .video-info .info-item, .info p')
for item in info_items.items():
text = item.text().strip()
if '时长' in text or 'duration' in text.lower():
duration = text.replace('时长:', '').replace('时长', '').strip()
elif '观看' in text or 'views' in text.lower():
views_match = re.search(r'(\d+\.?\d*[kK]?)次观看', text)
if views_match:
views = views_match.group(1)
else:
views = text.replace('观看:', '').replace('观看', '').strip()
remarks = f"{duration} | {views}"
# 提取播放地址
video_url = ''
m3u8_match = re.search(r'videoUrl\s*=\s*["\']([^"\']+\.m3u8)["\']', html)
if m3u8_match:
video_url = m3u8_match.group(1)
if not video_url:
source = data('source[src*=".m3u8"], source[src*=".mp4"]')
video_url = source.attr('src') or ''
if not video_url:
js_matches = re.findall(r'(https?://[^\s"\']+\.(?:m3u8|mp4))', html)
if js_matches:
video_url = js_matches[0]
if video_url and not video_url.startswith('http'):
video_url = f"{self.host}{video_url.lstrip('/')}"
vod = {
'vod_id': vod_id,
'vod_name': title,
'vod_pic': vod_pic,
'vod_remarks': remarks,
'vod_play_from': '好色TV',
'vod_play_url': f'正片${video_url}' if video_url else '正片$暂无地址'
}
return {'list': [vod]}
except Exception as e:
print(f"详情解析失败: {e}")
return {'list': []}
def searchContent(self, key, quick, pg=1):
try:
# 关键词合法性校验
if not key.strip():
print("搜索关键词不能为空")
return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
# 编码关键词
encoded_key = urllib.parse.quote(key.strip(), encoding='utf-8', errors='replace')
# 构造搜索URL
search_url = f"{self.host}search.htm"
params = {
'search': encoded_key,
'page': int(pg)
}
# 发起请求
resp = self.fetch(
url=search_url,
headers=self.headers,
params=params,
timeout=8
)
if resp.status_code not in (200, 302):
print(f"搜索页面请求失败URL{resp.url},状态码:{resp.status_code}")
return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
# 处理页面内容
html = resp.text.encode('utf-8', errors='ignore').decode('utf-8')
data = pq(html)
# 检测无结果场景
no_result_texts = ['没有找到相关视频', '无搜索结果', 'No results found', '未找到匹配内容']
no_result = any(data(f'div:contains("{text}"), p:contains("{text}")').text() for text in no_result_texts)
if no_result:
print(f"搜索关键词「{key}」第{pg}页无结果")
return {'list': [], 'page': int(pg), 'pagecount': 1, 'limit': 0, 'total': 0}
# 解析搜索结果
vlist = []
items = data('.row .col-xs-6.col-md-3')
for item in items.items():
try:
title = item('h5').text().strip()
if not title:
continue
style = item('.image').attr('style') or ''
pic_match = re.search(r'url\(["\']?([^"\']+)["\']?\)', style)
vod_pic = pic_match.group(1) if pic_match else ''
if vod_pic and not vod_pic.startswith(('http://', 'https://')):
vod_pic = f"{self.host.rstrip('/')}/{vod_pic.lstrip('/')}"
desc = item('.duration').text().strip() or '未知时长'
href = item('a').attr('href') or ''
if not href:
continue
vod_id = href.split('/')[-1]
if not vod_id.endswith('.htm'):
vod_id += '.htm'
vlist.append({
'vod_id': vod_id,
'vod_name': title,
'vod_pic': vod_pic,
'vod_remarks': desc
})
except Exception as e:
print(f"解析单条搜索结果失败:{e}(跳过该条)")
continue
# 解析总页数
pagecount = 1
try:
pagination = data('.pagination1 li a')
page_nums = []
for a in pagination.items():
text = a.text().strip()
if text.isdigit():
page_nums.append(int(text))
if page_nums:
pagecount = max(page_nums)
print(f"搜索关键词「{key}」分页解析完成,共{pagecount}")
except Exception as e:
print(f"解析分页失败(默认单页):{e}")
pagecount = 1
# 返回结果修复点2补全page键的引号修正语法错误
total = len(vlist) * pagecount
print(f"搜索关键词「{key}」第{pg}页处理完成,结果{len(vlist)}条,总页数{pagecount}")
return {
'list': vlist,
'page': int(pg), # 原代码此处缺少引号,导致语法错误
'pagecount': pagecount,
'limit': len(vlist),
'total': total
}
except Exception as e:
print(f"搜索功能整体异常:{e}")
return {
'list': [],
'page': int(pg), 'pagecount': 1,
'limit': 0,
'total': 0
}
def playerContent(self, flag, id, vipFlags):
headers = self.headers.copy()
headers.update({
'Referer': self.host,
'Origin': self.host.rstrip('/'),
'Host': urllib.parse.urlparse(self.host).netloc,
})
# 根据rule中的double设置
return {
'parse': 1, # 根据rule中的play_parse设置
'url': id,
'header': headers,
'double': True # 根据rule中的double设置
}
def localProxy(self, param):
try:
url = param['url']
if url and not url.startswith(('http://', 'https://')):
url = f"{self.host.rstrip('/')}/{url.lstrip('/')}"
img_headers = self.headers.copy()
img_headers.update({'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8'})
res = self.fetch(url, headers=img_headers, timeout=10)
content_type = res.headers.get('Content-Type', 'image/jpeg')
return [200, content_type, res.content]
except Exception as e:
print(f"图片代理失败: {e}")
return [200, 'image/jpeg', b'']
def fetch_with_retry(self, url, retry=2, timeout=5):
for i in range(retry + 1):
try:
resp = self.fetch(f'https://vpsdn.leuse.top/proxy?single=true&url={urllib.parse.quote(url)}',headers=self.headers, timeout=timeout)
if resp.status_code in (200, 301, 302):
return resp
print(f"请求{url}返回状态码{resp.status_code},重试中...")
except Exception as e:
print(f"{i+1}次请求{url}失败: {e}")
if i < retry:
time.sleep(0.5)
return type('obj', (object,), {'text': '', 'status_code': 404})
def fetch(self, url, headers=None, timeout=5, method='GET', params=None):
headers = headers or self.headers
params = params or {}
try:
if method.upper() == 'GET':
resp = requests.get(
f'https://vpsdn.leuse.top/proxy?single=true&url={urllib.parse.quote(url)}',
headers=headers,
timeout=timeout,
allow_redirects=True,
params=params # 支持GET请求带参数适配搜索分页
)
elif method.upper() == 'HEAD':
resp = requests.head(
f'https://vpsdn.leuse.top/proxy?single=true&url={urllib.parse.quote(url)}',
headers=headers,
timeout=timeout,
allow_redirects=False,
params=params
)
else:
resp = requests.get( # 默认GET请求兼容其他方法调用
f'https://vpsdn.leuse.top/proxy?single=true&url={urllib.parse.quote(url)}',
headers=headers,
timeout=timeout,
allow_redirects=True,
params=params
)
# 自动适配编码,避免中文乱码
if 'charset' in resp.headers.get('Content-Type', '').lower():
resp.encoding = resp.apparent_encoding
else:
resp.encoding = 'utf-8'
return resp
except Exception as e:
print(f"网络请求失败({url}): {e}")
# 返回统一格式空响应,避免后续逻辑崩溃
return type('obj', (object,), {
'text': '',
'status_code': 500,
'headers': {},
'url': url
})
# ------------------------------
# 可选测试代码(运行时注释或删除,用于验证功能)
# ------------------------------
if __name__ == "__main__":
# 初始化爬虫
spider = Spider()
spider.init()
# 测试首页内容
print("=== 测试首页 ===")
home_data = spider.homeContent(filter='')
print(f"首页分类数:{len(home_data['class'])}")
print(f"首页视频数:{len(home_data['list'])}")
# 测试视频分类(修复后的数据获取)
print("\n=== 测试视频分类第1页 ===")
cate_data = spider.categoryContent(tid='list', pg=1, filter='', extend='')
print(f"视频分类第1页视频数{len(cate_data['list'])}")
print(f"视频分类总页数:{cate_data['pagecount']}")
# 测试搜索功能(修复语法错误后)
print("\n=== 测试搜索(关键词:测试) ===")
search_data = spider.searchContent(key="测试", quick=False, pg=1)
print(f"搜索结果数:{len(search_data['list'])}")
print(f"搜索总页数:{search_data['pagecount']}")

View File

@@ -0,0 +1,186 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import json
import random
import string
import sys
import time
from base64 import b64decode
from Crypto.Cipher import AES
from Crypto.Hash import MD5
from Crypto.Util.Padding import unpad
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.did = self.getdid()
self.token,self.phost,self.host = self.gettoken()
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
hs = ['fhoumpjjih', 'dyfcbkggxn', 'rggwiyhqtg', 'bpbbmplfxc']
def homeContent(self, filter):
data = self.fetch(f'{self.host}/api/video/queryClassifyList?mark=4', headers=self.headers()).json()['encData']
data1 = self.aes(data)
result = {}
classes = []
for k in data1['data']:
classes.append({'type_name': k['classifyTitle'], 'type_id': k['classifyId']})
result['class'] = classes
return result
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
path=f'/api/short/video/getShortVideos?classifyId={tid}&videoMark=4&page={pg}&pageSize=20'
result = {}
videos = []
data=self.fetch(f'{self.host}{path}', headers=self.headers()).json()['encData']
vdata=self.aes(data)
for k in vdata['data']:
videos.append({"vod_id": k['videoId'], 'vod_name': k.get('title'), 'vod_pic': self.getProxyUrl() + '&url=' + k['coverImg'],
'vod_remarks': self.dtim(k.get('playTime')),'style': {"type": "rect", "ratio": 1.33}})
result["list"] = videos
result["page"] = pg
result["pagecount"] = 9999
result["limit"] = 90
result["total"] = 999999
return result
def detailContent(self, ids):
path = f'/api/video/getVideoById?videoId={ids[0]}'
data = self.fetch(f'{self.host}{path}', headers=self.headers()).json()['encData']
v = self.aes(data)
d=f'{v["title"]}$auth_key={v["authKey"]}&path={v["videoUrl"]}'
vod = {'vod_name': v["title"], 'type_name': ''.join(v.get('tagTitles',[])),'vod_play_from': v.get('nickName') or "小红书官方", 'vod_play_url': d}
result = {"list": [vod]}
return result
def searchContent(self, key, quick, pg='1'):
pass
def playerContent(self, flag, id, vipFlags):
h=self.headers()
h['Authorization'] = h.pop('aut')
del h['deviceid']
result = {"parse": 0, "url": f"{self.host}/api/m3u8/decode/authPath?{id}", "header": h}
return result
def localProxy(self, param):
return self.action(param)
def md5(self, text):
h = MD5.new()
h.update(text.encode('utf-8'))
return h.hexdigest()
def aes(self, word):
key = b64decode("SmhiR2NpT2lKSVV6STFOaQ==")
iv = key
cipher = AES.new(key, AES.MODE_CBC, iv)
decrypted = unpad(cipher.decrypt(b64decode(word)), AES.block_size)
return json.loads(decrypted.decode('utf-8'))
def dtim(self, seconds):
try:
seconds = int(seconds)
hours = seconds // 3600
remaining_seconds = seconds % 3600
minutes = remaining_seconds // 60
remaining_seconds = remaining_seconds % 60
formatted_minutes = str(minutes).zfill(2)
formatted_seconds = str(remaining_seconds).zfill(2)
if hours > 0:
formatted_hours = str(hours).zfill(2)
return f"{formatted_hours}:{formatted_minutes}:{formatted_seconds}"
else:
return f"{formatted_minutes}:{formatted_seconds}"
except:
return ''
def getdid(self):
did = self.getCache('did')
if not did:
t = str(int(time.time()))
did = self.md5(t)
self.setCache('did', did)
return did
def getsign(self):
t=str(int(time.time() * 1000))
return self.md5(t[3:8]),t
def gettoken(self, i=0, max_attempts=10):
if i >= len(self.hs) or i >= max_attempts:
return ''
current_domain = f"https://{''.join(random.choices(string.ascii_lowercase + string.digits, k=random.randint(5, 10)))}.{self.hs[i]}.work"
try:
sign,t=self.getsign()
url = f'{current_domain}/api/user/traveler'
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36;SuiRui/xhs/ver=1.2.6',
'deviceid': self.did, 't': t, 's': sign, }
data = {'deviceId': self.did, 'tt': 'U', 'code': '', 'chCode': 'dafe13'}
data1 = self.post(url, json=data, headers=headers)
data1.raise_for_status()
data2 = data1.json()['data']
return data2['token'], data2['imgDomain'],current_domain
except:
return self.gettoken(i+1, max_attempts)
def headers(self):
sign,t=self.getsign()
henda = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36;SuiRui/xhs/ver=1.2.6',
'deviceid': self.did, 't': t, 's': sign, 'aut': self.token}
return henda
def action(self, param):
headers = {
'User-Agent': 'Dalvik/2.1.0 (Linux; U; Android 11; M2012K10C Build/RP1A.200720.011)'}
data = self.fetch(f'{self.phost}{param["url"]}', headers=headers)
type=data.headers.get('Content-Type').split(';')[0]
base64_data = self.img(data.content, 100, '2020-zq3-888')
return [200, type, base64_data]
def img(self, data: bytes, length: int, key: str):
GIF = b'\x47\x49\x46'
JPG = b'\xFF\xD8\xFF'
PNG = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
def is_dont_need_decode_for_gif(data):
return len(data) > 2 and data[:3] == GIF
def is_dont_need_decode_for_jpg(data):
return len(data) > 7 and data[:3] == JPG
def is_dont_need_decode_for_png(data):
return len(data) > 7 and data[1:8] == PNG[1:8]
if is_dont_need_decode_for_png(data):
return data
elif is_dont_need_decode_for_gif(data):
return data
elif is_dont_need_decode_for_jpg(data):
return data
else:
key_bytes = key.encode('utf-8')
result = bytearray(data)
for i in range(length):
result[i] ^= key_bytes[i % len(key_bytes)]
return bytes(result)

View File

@@ -0,0 +1,246 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import json
import random
import string
import sys
import time
from base64 import b64decode
from urllib.parse import quote
from Crypto.Cipher import AES
from Crypto.Hash import MD5
from Crypto.Util.Padding import unpad
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.did = self.getdid()
self.token,self.phost,self.host = self.gettoken()
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def action(self, action):
pass
def destroy(self):
pass
hs=['wcyfhknomg','pdcqllfomw','alxhzjvean','bqeaaxzplt','hfbtpixjso']
ua='Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36;SuiRui/twitter/ver=1.4.4'
def homeContent(self, filter):
data = self.fetch(f'{self.host}/api/video/classifyList', headers=self.headers()).json()['encData']
data1 = self.aes(data)
result = {'filters': {"1": [{"key": "fl", "name": "分类",
"value": [{"n": "最近更新", "v": "1"}, {"n": "最多播放", "v": "2"},
{"n": "好评榜", "v": "3"}]}], "2": [{"key": "fl", "name": "分类",
"value": [
{"n": "最近更新", "v": "1"},
{"n": "最多播放", "v": "2"},
{"n": "好评榜", "v": "3"}]}],
"3": [{"key": "fl", "name": "分类",
"value": [{"n": "最近更新", "v": "1"}, {"n": "最多播放", "v": "2"},
{"n": "好评榜", "v": "3"}]}], "4": [{"key": "fl", "name": "分类",
"value": [
{"n": "最近更新", "v": "1"},
{"n": "最多播放", "v": "2"},
{"n": "好评榜", "v": "3"}]}],
"5": [{"key": "fl", "name": "分类",
"value": [{"n": "最近更新", "v": "1"}, {"n": "最多播放", "v": "2"},
{"n": "好评榜", "v": "3"}]}], "6": [{"key": "fl", "name": "分类",
"value": [
{"n": "最近更新", "v": "1"},
{"n": "最多播放", "v": "2"},
{"n": "好评榜", "v": "3"}]}],
"7": [{"key": "fl", "name": "分类",
"value": [{"n": "最近更新", "v": "1"}, {"n": "最多播放", "v": "2"},
{"n": "好评榜", "v": "3"}]}], "jx": [{"key": "type", "name": "精选",
"value": [{"n": "日榜", "v": "1"},
{"n": "周榜", "v": "2"},
{"n": "月榜", "v": "3"},
{"n": "总榜",
"v": "4"}]}]}}
classes = [{'type_name': "精选", 'type_id': "jx"}]
for k in data1['data']:
classes.append({'type_name': k['classifyTitle'], 'type_id': k['classifyId']})
result['class'] = classes
return result
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
path = f'/api/video/queryVideoByClassifyId?pageSize=20&page={pg}&classifyId={tid}&sortType={extend.get("fl", "1")}'
if 'click' in tid:
path = f'/api/video/queryPersonVideoByType?pageSize=20&page={pg}&userId={tid.replace("click", "")}'
if tid == 'jx':
path = f'/api/video/getRankVideos?pageSize=20&page={pg}&type={extend.get("type", "1")}'
data = self.fetch(f'{self.host}{path}', headers=self.headers()).json()['encData']
data1 = self.aes(data)['data']
result = {}
videos = []
for k in data1:
id = f'{k.get("videoId")}?{k.get("userId")}?{k.get("nickName")}'
if 'click' in tid:
id = id + 'click'
videos.append({"vod_id": id, 'vod_name': k.get('title'), 'vod_pic': self.getProxyUrl() + f"&url={k.get('coverImg')[0]}",
'vod_remarks': self.dtim(k.get('playTime')),'style': {"type": "rect", "ratio": 1.33}})
result["list"] = videos
result["page"] = pg
result["pagecount"] = 9999
result["limit"] = 90
result["total"] = 999999
return result
def detailContent(self, ids):
vid = ids[0].replace('click', '').split('?')
path = f'/api/video/can/watch?videoId={vid[0]}'
data = self.fetch(f'{self.host}{path}', headers=self.headers()).json()['encData']
data1 = self.aes(data)['playPath']
clj = '[a=cr:' + json.dumps({'id': vid[1] + 'click', 'name': vid[2]}) + '/]' + vid[2] + '[/a]'
if 'click' in ids[0]:
clj = vid[2]
vod = {'vod_director': clj, 'vod_play_from': "推特", 'vod_play_url': vid[2] + "$" + data1}
result = {"list": [vod]}
return result
def searchContent(self, key, quick, pg='1'):
path = f'/api/search/keyWord?pageSize=20&page={pg}&searchWord={quote(key)}&searchType=1'
data = self.fetch(f'{self.host}{path}', headers=self.headers()).json()['encData']
data1 = self.aes(data)['videoList']
result = {}
videos = []
for k in data1:
id = f'{k.get("videoId")}?{k.get("userId")}?{k.get("nickName")}'
videos.append({"vod_id": id, 'vod_name': k.get('title'), 'vod_pic': self.getProxyUrl() + f"&url={k.get('coverImg')[0]}",
'vod_remarks': self.dtim(k.get('playTime')), 'style': {"type": "rect", "ratio": 1.33}})
result["list"] = videos
result["page"] = pg
result["pagecount"] = 9999
result["limit"] = 90
result["total"] = 999999
return result
def playerContent(self, flag, id, vipFlags):
return {"parse": 0, "url": id, "header": self.headers()}
def localProxy(self, param):
return self.imgs(param)
def getsign(self):
t = str(int(time.time() * 1000))
sign = self.md5(t)
return sign, t
def headers(self):
sign, t = self.getsign()
return {'User-Agent': self.ua,'deviceid': self.did, 't': t, 's': sign, 'aut': self.token}
def aes(self, word):
key = b64decode("SmhiR2NpT2lKSVV6STFOaQ==")
iv = key
cipher = AES.new(key, AES.MODE_CBC, iv)
decrypted = unpad(cipher.decrypt(b64decode(word)), AES.block_size)
return json.loads(decrypted.decode('utf-8'))
def dtim(self, seconds):
try:
seconds = int(seconds)
hours = seconds // 3600
remaining_seconds = seconds % 3600
minutes = remaining_seconds // 60
remaining_seconds = remaining_seconds % 60
formatted_minutes = str(minutes).zfill(2)
formatted_seconds = str(remaining_seconds).zfill(2)
if hours > 0:
formatted_hours = str(hours).zfill(2)
return f"{formatted_hours}:{formatted_minutes}:{formatted_seconds}"
else:
return f"{formatted_minutes}:{formatted_seconds}"
except:
return "666"
def gettoken(self, i=0, max_attempts=10):
if i >= len(self.hs) or i >= max_attempts:
return ''
current_domain = f"https://{''.join(random.choices(string.ascii_lowercase + string.digits, k=random.randint(5, 10)))}.{self.hs[i]}.work"
try:
url = f'{current_domain}/api/user/traveler'
sign, t = self.getsign()
headers = {
'User-Agent': self.ua,
'Accept': 'application/json',
'deviceid': self.did,
't': t,
's': sign,
}
data = {
'deviceId': self.did,
'tt': 'U',
'code': '##X-4m6Goo4zzPi1hF##',
'chCode': 'tt09'
}
response = self.post(url, json=data, headers=headers)
response.raise_for_status()
data1 = response.json()['data']
return data1['token'], data1['imgDomain'], current_domain
except Exception as e:
return self.gettoken(i + 1, max_attempts)
def getdid(self):
did = self.getCache('did')
if not did:
t = str(int(time.time()))
did = self.md5(t)
self.setCache('did', did)
return did
def md5(self, text):
h = MD5.new()
h.update(text.encode('utf-8'))
return h.hexdigest()
def imgs(self, param):
headers = {'User-Agent': self.ua}
url = param['url']
data = self.fetch(f"{self.phost}{url}",headers=headers)
bdata = self.img(data.content, 100, '2020-zq3-888')
return [200, data.headers.get('Content-Type'), bdata]
def img(self, data: bytes, length: int, key: str):
GIF = b'\x47\x49\x46'
JPG = b'\xFF\xD8\xFF'
PNG = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
def is_dont_need_decode_for_gif(data):
return len(data) > 2 and data[:3] == GIF
def is_dont_need_decode_for_jpg(data):
return len(data) > 7 and data[:3] == JPG
def is_dont_need_decode_for_png(data):
return len(data) > 7 and data[1:8] == PNG[1:8]
if is_dont_need_decode_for_png(data):
return data
elif is_dont_need_decode_for_gif(data):
return data
elif is_dont_need_decode_for_jpg(data):
return data
else:
key_bytes = key.encode('utf-8')
result = bytearray(data)
for i in range(length):
result[i] ^= key_bytes[i % len(key_bytes)]
return bytes(result)

View File

@@ -0,0 +1,349 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import json
import re
import sys
import threading
import time
from base64 import b64decode, b64encode
import requests
from Crypto.Cipher import AES
from Crypto.Hash import MD5
from Crypto.Util.Padding import unpad
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.did = self.getdid()
self.token=self.gettoken()
domain=self.domain()
self.phost=self.host_late(domain['domain_preview'])
self.bhost=domain['domain_original']
self.names=domain['name_original']
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
host = 'https://lulu-api-92mizw.jcdwn.com'
headers = {
'User-Agent': 'okhttp/4.11.0',
'referer': 'https://app.nova-traffic-1688.com',
}
def homeContent(self, filter):
BASE_CATEGORIES = [
{'type_name': '片商', 'type_id': 'makers'},
{'type_name': '演员', 'type_id': 'actor'}
]
SORT_OPTIONS = {
'key': 'sortby',
'name': 'sortby',
'value': [
{'n': '最新', 'v': 'on_shelf_at'},
{'n': '最热', 'v': 'hot'}
]
}
tags = self.getdata('/api/v1/video/tag?current=1&pageSize=100&level=1')
producers = self.getdata('/api/v1/video/producer?current=1&pageSize=100&status=1')
regions = self.getdata('/api/v1/video/region?current=1&pageSize=100')
result = {'class': [], 'filters': {}}
result['class'].extend(BASE_CATEGORIES)
for category in BASE_CATEGORIES:
result['filters'][category['type_id']] = [SORT_OPTIONS]
if tags.get('data'):
main_tag = tags['data'][0]
result['class'].append({
'type_name': '发现',
'type_id': f'{main_tag["id"]}_tag'
})
tag_values = [
{'n': tag['name'], 'v': f"{tag['id']}_tag"}
for tag in tags['data'][1:]
if tag.get('id')
]
result['filters'][f'{main_tag["id"]}_tag'] = [
{'key': 'tagtype', 'name': 'tagtype', 'value': tag_values},
SORT_OPTIONS
]
region_filter = {
'key': 'region_ids',
'name': 'region_ids',
'value': [
{'n': region['name'], 'v': region['id']}
for region in regions['data'][1:]
if region.get('id')
]
}
self.aid=regions['data'][0]['id']
result['filters']['actor'].append({
'key': 'region_id',
'name': 'region_id',
'value': region_filter['value'][:2]
})
complex_sort = {
'key': 'sortby',
'name': 'sortby',
'value': [
{'n': '综合', 'v': 'complex'},
*SORT_OPTIONS['value']
]
}
producer_filters = [region_filter, complex_sort]
for producer in producers['data']:
result['class'].append({
'type_name': producer['name'],
'type_id': f'{producer["id"]}_sx'
})
result['filters'][f'{producer["id"]}_sx'] = producer_filters
return result
def homeVideoContent(self):
data=self.getdata('/api/v1/video?current=1&pageSize=60&region_ids=&sortby=complex')
return {'list':self.getlist(data)}
def categoryContent(self, tid, pg, filter, extend):
if 'act' in tid:
data=self.getact(tid, pg, filter, extend)
elif 'tag' in tid:
data=self.gettag(tid, pg, filter, extend)
elif 'sx' in tid:
data=self.getsx(tid, pg, filter, extend)
elif 'make' in tid:
data=self.getmake(tid, pg, filter, extend)
result = {}
result['list'] = data
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
v=self.getdata(f'/api/v1/video?current=1&pageSize=1&id={ids[0]}&detail=1')
v=v['data'][0]
vod = {
'vod_name': v.get('title'),
'type_name': '/'.join(v.get('tag_names',[])),
'vod_play_from': '浴火社',
'vod_play_url': ''
}
p=[]
for i,j in enumerate(self.bhost):
p.append(f'{self.names[i]}${j}{v.get("highres_url") or v.get("preview_url")}@@@{v["id"]}')
vod['vod_play_url'] = '#'.join(p)
return {'list':[vod]}
def searchContent(self, key, quick, pg="1"):
data=self.getdata(f'/api/v1/video?current={pg}&pageSize=30&title={key}')
return {'list':self.getlist(data),'page':pg}
def playerContent(self, flag, id, vipFlags):
url=f'{self.getProxyUrl()}&url={self.e64(id)}&type=m3u8'
return {'parse': 0, 'url': url, 'header': self.headers}
def localProxy(self, param):
if param.get('type')=='image':
data=self.fetch(param.get('url'), headers=self.headers).text
content=b64decode(data.encode('utf-8'))
return [200, 'image/png', content]
if param.get('type')=='m3u8':
ids=self.d64(param.get('url')).split('@@@')
data=self.fetch(ids[0], headers=self.headers).text
lines = data.strip().split('\n')
for index, string in enumerate(lines):
if 'URI=' in string:
replacement = f'URI="{self.getProxyUrl()}&id={ids[1]}&type=mkey"'
lines[index]=re.sub(r'URI="[^"]+"', replacement, string)
continue
if '#EXT' not in string and 'http' not in string:
last_slash_index = ids[0].rfind('/')
lpath = ids[0][:last_slash_index + 1]
lines[index] = f'{lpath}{string}'
data = '\n'.join(lines)
return [200, 'audio/x-mpegurl', data]
if param.get('type')=='mkey':
id=param.get('id')
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36',
'authdog': self.token
}
response = self.fetch(f'{self.host}/api/v1/video/key/{id}', headers=headers)
type=response.headers.get('Content-Type')
return [200, type, response.content]
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self,encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def getdid(self):
did = self.md5(str(int(time.time() * 1000)))
try:
if self.getCache('did'):
return self.getCache('did')
else:
self.setCache('did', did)
return did
except Exception as e:
self.setCache('did', did)
return did
def host_late(self, url_list):
if isinstance(url_list, str):
urls = [u.strip() for u in url_list.split(',')]
else:
urls = url_list
if len(urls) <= 1:
return urls[0] if urls else ''
results = {}
threads = []
def test_host(url):
try:
start_time = time.time()
response = requests.head(url, timeout=1.0, allow_redirects=False)
delay = (time.time() - start_time) * 1000
results[url] = delay
except Exception as e:
results[url] = float('inf')
for url in urls:
t = threading.Thread(target=test_host, args=(url,))
threads.append(t)
t.start()
for t in threads:
t.join()
return min(results.items(), key=lambda x: x[1])[0]
def domain(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36',
}
response = self.fetch(f'{self.host}/api/v1/system/domain', headers=headers)
return self.aes(response.content)
def aes(self, word):
key = b64decode("amtvaWc5ZnJ2Ym5taml1eQ==")
iv = b64decode("AAEFAwQFCQcICQoLDA0ODw==")
cipher = AES.new(key, AES.MODE_CBC, iv)
decrypted = unpad(cipher.decrypt(word), AES.block_size)
return json.loads(decrypted.decode('utf-8'))
def md5(self, text):
h = MD5.new()
h.update(text.encode('utf-8'))
return h.hexdigest()
def gettoken(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36',
'cookei': self.md5(f'{self.did}+android'),
'siteid': '11',
'siteauthority': 'lls888.tv'
}
json_data = {
'app_id': 'jukjoe.zqgpi.hfzvde.sdot',
'phone_device': 'Redmi M2012K10C',
'device_id': self.did,
'device_type': 'android',
'invite_code': 'oi1o',
'is_first': 1,
'os_version': '11',
'version': '8.59',
}
response = self.post(f'{self.host}/api/v1/member/device', headers=headers, json=json_data)
tdata = self.aes(response.content)
return f'{tdata["token_type"]} {tdata["access_token"]}'
def getdata(self, path):
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36',
'authdog': self.token
}
response = self.fetch(f'{self.host}{path}', headers=headers)
return self.aes(response.content)
def getimg(self, path):
if not path.startswith('/'):
path = f'/{path}'
return f'{self.getProxyUrl()}&url={self.phost}{path}&type=image'
def getlist(self,data):
videos = []
for i in data['data']:
videos.append({
'vod_id': i['id'],
'vod_name': i['title'],
'vod_pic': self.getimg(i.get('coverphoto_h' or i.get('coverphoto_v'))),
'style': {"type": "rect", "ratio": 1.33}})
return videos
def geticon(self, data, st='',style=None):
if style is None:style = {"type": "oval"}
videos = []
for i in data['data']:
videos.append({
'vod_id': f'{i["id"]}{st}',
'vod_name': i['name'],
'vod_pic': self.getimg(i.get('icon_path')),
'vod_tag': 'folder',
'style': style})
return videos
def getact(self, tid, pg, filter, extend):
if tid == 'actor' and pg=='1':
data = self.getdata(f'/api/v1/video/actor?current=1&pageSize=999&region_id={extend.get("region_id",self.aid)}&discover_page={pg}')
return self.geticon(data, '_act')
elif '_act' in tid:
data = self.getdata(f'/api/v1/video?current={pg}&pageSize=50&actor_ids={tid.split("_")[0]}&sortby={extend.get("sortby","on_shelf_at")}')
return self.getlist(data)
def gettag(self, tid, pg, filter, extend):
if '_tag' in tid:
tid=extend.get('tagtype',tid)
data=self.getdata(f'/api/v1/video/tag?current={pg}&pageSize=100&level=2&parent_id={tid.split("_")[0]}')
return self.geticon(data, '_stag',{"type": "rect", "ratio": 1.33})
elif '_stag' in tid:
data = self.getdata(f'/api/v1/video?current={pg}&pageSize=50&tag_ids={tid.split("_")[0]}&sortby={extend.get("sortby","on_shelf_at")}')
return self.getlist(data)
def getsx(self, tid, pg, filter, extend):
data=self.getdata(f'/api/v1/video?current={pg}&pageSize=20&producer_ids={tid.split("_")[0]}&region_ids={extend.get("region_ids","")}&sortby={extend.get("sortby","complex")}')
return self.getlist(data)
def getmake(self, tid, pg, filter, extend):
if pg=='1':
data=self.getdata('/api/v1/video/producer?current=1&pageSize=100&status=1')
return self.geticon(data, '_sx',{"type": "rect", "ratio": 1.33})

View File

@@ -0,0 +1,242 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import json
import re
import sys
import threading
import time
from base64 import b64encode, b64decode
from urllib.parse import urlparse
import requests
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
'''
如果一直访问不了,手动访问导航页:https://a.hdys.top替换
self.host = 'https://xxx.xxx.xxx'
'''
self.session = requests.Session()
self.headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
'sec-ch-ua-platform': '"Android"',
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="130", "Google Chrome";v="130"',
'dnt': '1',
'sec-ch-ua-mobile': '?1',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'no-cors',
'sec-fetch-dest': 'script',
'accept-language': 'zh-CN,zh;q=0.9',
'priority': 'u=2',
}
try:self.proxies = json.loads(extend)
except:self.proxies = {}
self.hsot=self.gethost()
# self.hsot='https://hd.hdys2.com'
self.headers.update({'referer': f"{self.hsot}/"})
self.session.proxies.update(self.proxies)
self.session.headers.update(self.headers)
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
pheader={
'User-Agent': 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
'sec-ch-ua-platform': '"Android"',
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="130", "Google Chrome";v="130"',
'dnt': '1',
'sec-ch-ua-mobile': '?1',
'origin': 'https://jx.8852.top',
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'accept-language': 'zh-CN,zh;q=0.9',
'priority': 'u=1, i',
}
def homeContent(self, filter):
data=self.getpq(self.session.get(self.hsot))
cdata=data('.stui-header__menu.type-slide li')
ldata=data('.stui-vodlist.clearfix li')
result = {}
classes = []
for k in cdata.items():
i=k('a').attr('href')
if i and 'type' in i:
classes.append({
'type_name': k.text(),
'type_id': re.search(r'\d+', i).group(0)
})
result['class'] = classes
result['list'] = self.getlist(ldata)
return result
def homeVideoContent(self):
return {'list':''}
def categoryContent(self, tid, pg, filter, extend):
data=self.getpq(self.session.get(f"{self.hsot}/vodshow/{tid}--------{pg}---.html"))
result = {}
result['list'] = self.getlist(data('.stui-vodlist.clearfix li'))
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
data=self.getpq(self.session.get(f"{self.hsot}{ids[0]}"))
v=data('.stui-vodlist__box a')
vod = {
'vod_play_from': '花都影视',
'vod_play_url': f"{v('img').attr('alt')}${v.attr('href')}"
}
return {'list':[vod]}
def searchContent(self, key, quick, pg="1"):
data=self.getpq(self.session.get(f"{self.hsot}/vodsearch/{key}----------{pg}---.html"))
return {'list':self.getlist(data('.stui-vodlist.clearfix li')),'page':pg}
def playerContent(self, flag, id, vipFlags):
try:
data=self.getpq(self.session.get(f"{self.hsot}{id}"))
jstr=data('.stui-player.col-pd script').eq(0).text()
jsdata=json.loads(jstr.split("=", maxsplit=1)[-1])
p,url=0,jsdata['url']
if '.m3u8' in url:url=self.proxy(url,'m3u8')
except Exception as e:
print(f"{str(e)}")
p,url=1,f"{self.hsot}{id}"
return {'parse': p, 'url': url, 'header': self.pheader}
def liveContent(self, url):
pass
def localProxy(self, param):
url = self.d64(param['url'])
if param.get('type') == 'm3u8':
return self.m3Proxy(url)
else:
return self.tsProxy(url,param['type'])
def gethost(self):
params = {
'v': '1',
}
self.headers.update({'referer': 'https://a.hdys.top/'})
response = self.session.get('https://a.hdys.top/assets/js/config.js',proxies=self.proxies, params=params, headers=self.headers)
return self.host_late(response.text.split(';')[:-4])
def getlist(self,data):
videos=[]
for i in data.items():
videos.append({
'vod_id': i('a').attr('href'),
'vod_name': i('img').attr('alt'),
'vod_pic': self.proxy(i('img').attr('data-original')),
'vod_year': i('.pic-tag-t').text(),
'vod_remarks': i('.pic-tag-b').text()
})
return videos
def getpq(self, data):
try:
return pq(data.text)
except Exception as e:
print(f"{str(e)}")
return pq(data.text.encode('utf-8'))
def host_late(self, url_list):
if isinstance(url_list, str):
urls = [u.strip() for u in url_list.split(',')]
else:
urls = url_list
if len(urls) <= 1:
return urls[0] if urls else ''
results = {}
threads = []
def test_host(url):
try:
url=re.findall(r'"([^"]*)"', url)[0]
start_time = time.time()
self.headers.update({'referer': f'{url}/'})
response = requests.head(url,proxies=self.proxies,headers=self.headers,timeout=1.0, allow_redirects=False)
delay = (time.time() - start_time) * 1000
results[url] = delay
except Exception as e:
results[url] = float('inf')
for url in urls:
t = threading.Thread(target=test_host, args=(url,))
threads.append(t)
t.start()
for t in threads:
t.join()
return min(results.items(), key=lambda x: x[1])[0]
def m3Proxy(self, url):
ydata = requests.get(url, headers=self.pheader, proxies=self.proxies, allow_redirects=False)
data = ydata.content.decode('utf-8')
if ydata.headers.get('Location'):
url = ydata.headers['Location']
data = requests.get(url, headers=self.pheader, proxies=self.proxies).content.decode('utf-8')
lines = data.strip().split('\n')
last_r = url[:url.rfind('/')]
parsed_url = urlparse(url)
durl = parsed_url.scheme + "://" + parsed_url.netloc
for index, string in enumerate(lines):
if '#EXT' not in string:
if 'http' not in string:
domain=last_r if string.count('/') < 2 else durl
string = domain + ('' if string.startswith('/') else '/') + string
lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
data = '\n'.join(lines)
return [200, "application/vnd.apple.mpegur", data]
def tsProxy(self, url,type):
h=self.pheader.copy()
if type=='img':h=self.headers.copy()
data = requests.get(url, headers=h, proxies=self.proxies, stream=True)
return [200, data.headers['Content-Type'], data.content]
def proxy(self, data, type='img'):
if data and len(self.proxies):return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
else:return data
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self,encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""

View File

@@ -0,0 +1,132 @@
# -*- coding: utf-8 -*-
# by @嗷呜
import random
import string
import sys
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
self.host,self.headers = self.getat()
pass
def getName(self):
pass
def isVideoFormat(self, url):
pass
def manualVideoCheck(self):
pass
def destroy(self):
pass
def homeContent(self, filter):
data=self.fetch(f'{self.host}/vod/listing-0-0-0-0-0-0-0-0-0-0',headers=self.headers).json()
result = {}
classes = [{
'type_name': '全部',
'type_id': '0'
}]
filters = {}
ft=[]
filter_keys = ['orders', 'areas', 'years', 'definitions', 'durations', 'mosaics', 'langvoices']
for key in filter_keys:
if key in data['data']:
filter_item = {
'key': key,
'name': key,
'value': []
}
for item in data['data'][key]:
first_two = dict(list(item.items())[:2])
filter_item['value'].append({
'v': list(first_two.values())[0],
'n': list(first_two.values())[1]
})
ft.append(filter_item)
filters['0']=ft
for k in data['data']['categories']:
classes.append({
'type_name': k['catename'],
'type_id': k['cateid']
})
filters[k['cateid']]=ft
result['class'] = classes
result['filters'] =filters
result['list'] = self.getlist(data['data']['vodrows'])
return result
def homeVideoContent(self):
pass
def categoryContent(self, tid, pg, filter, extend):
data=self.fetch(f'{self.host}/vod/listing-{tid}-{extend.get("areas","0")}-{extend.get("years","0")}-1-{extend.get("definitions","0")}-{extend.get("durations","0")}-{extend.get("mosaics","0")}-{extend.get("langvoices","0")}-{extend.get("orders","0")}-{pg}',headers=self.headers).json()
result = {}
result['list'] = self.getlist(data['data']['vodrows'])
result['page'] = pg
result['pagecount'] = 9999
result['limit'] = 90
result['total'] = 999999
return result
def detailContent(self, ids):
data=self.fetch(f'{self.host}/vod/reqplay/{ids[0]}',headers=self.headers).json()
vod = {
'vod_play_from': data['errmsg'],
'vod_play_url': '#'.join([f"{i['hdtype']}${i['httpurl']}" for i in data['data']['httpurls']]),
}
return {'list':[vod]}
def searchContent(self, key, quick, pg="1"):
data=self.fetch(f'{self.host}/search?page={pg}&wd={key}',headers=self.headers).json()
return {'list':self.getlist(data['data']['vodrows']),'page':pg}
def playerContent(self, flag, id, vipFlags):
return {'parse': 0, 'url': id, 'header': {'User-Agent':'ExoPlayer'}}
def localProxy(self, param):
pass
def getlist(self,data):
vlist=[]
for i in data:
if i['isvip'] !='1':
vlist.append({
'vod_id': i['vodid'],
'vod_name': i['title'],
'vod_pic': i['coverpic'],
'vod_year': i.get('duration'),
'vod_remarks': i.get('catename'),
'style': {"type": "rect", "ratio": 1.33}
})
return vlist
def getat(self):
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 11; M2012K10C Build/RP1A.200720.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36',
'Accept': 'application/json, text/plain, */*',
'x-auth-uuid': self.random_str(32),
'x-system': 'Android',
'x-version': '5.0.5',
'x-channel': 'xj2',
'x-requested-with': 'com.uyvzkv.pnjzdv',
'sec-fetch-site': 'cross-site',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'accept-language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
}
host=f'https://{self.random_str(6)}.bjhpz.com'
data=self.fetch(f'{host}/init',headers=headers).json()
headers.update({'x-cookie-auth': data['data']['globalData'].get('xxx_api_auth')})
return host,headers
def random_str(self,length=16):
chars = string.ascii_lowercase + string.digits
return ''.join(random.choice(chars) for _ in range(length))

View File

@@ -0,0 +1,139 @@
# -*- coding: utf-8 -*-
import json,re,sys,base64,requests
from Crypto.Cipher import AES
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
SELECTORS=['.video-item','.video-list .item','.list-item','.post-item']
def getName(self):return"黑料不打烊"
def init(self,extend=""):pass
def homeContent(self,filter):
cateManual={"最新黑料":"hlcg","今日热瓜":"jrrs","每日TOP10":"mrrb","周报精选":"zbjx","月榜热瓜":"ybrg","反差女友":"fczq","校园黑料":"xycg","网红黑料":"whhl","明星丑闻":"mxcw","原创社区":"ycsq","推特社区":"ttsq","社会新闻":"shxw","官场爆料":"gchl","影视短剧":"ysdj","全球奇闻":"qqqw","黑料课堂":"hlkt","每日大赛":"mrds","激情小说":"jqxs","桃图杂志":"ttzz","深夜综艺":"syzy","独家爆料":"djbl"}
return{'class':[{'type_name':k,'type_id':v}for k,v in cateManual.items()]}
def homeVideoContent(self):return{}
def categoryContent(self,tid,pg,filter,extend):
url=f'https://heiliao.com/{tid}/'if int(pg)==1 else f'https://heiliao.com/{tid}/page/{pg}/'
videos=self.get_list(url)
return{'list':videos,'page':pg,'pagecount':9999,'limit':90,'total':999999}
def fetch_and_decrypt_image(self,url):
try:
if url.startswith('//'):url='https:'+url
elif url.startswith('/'):url='https://heiliao.com'+url
r=requests.get(url,headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36','Referer':'https://heiliao.com/'},timeout=15,verify=False)
if r.status_code!=200:return b''
return AES.new(b'f5d965df75336270',AES.MODE_CBC,b'97b60394abc2fbe1').decrypt(r.content)
except: return b''
def _extract_img_from_onload(self,node):
try:
m=re.search(r"load(?:Share)?Img\s*\([^,]+,\s*['\"]([^'\"]+)['\"]",(node.attr('onload')or''))
return m.group(1)if m else''
except:return''
def _should_decrypt(self,url:str)->bool:
u=(url or'').lower();return any(x in u for x in['pic.gylhaa.cn','new.slfpld.cn','/upload_01/','/upload/'])
def _abs(self,u:str)->str:
if not u:return''
if u.startswith('//'):return'https:'+u
if u.startswith('/'):return'https://heiliao.com'+u
return u
def e64(self,s:str)->str:
try:return base64.b64encode((s or'').encode()).decode()
except:return''
def d64(self,s:str)->str:
try:return base64.b64decode((s or'').encode()).decode()
except:return''
def _img(self,img_node):
u=''if img_node is None else(img_node.attr('src')or img_node.attr('data-src')or'')
enc=''if img_node is None else self._extract_img_from_onload(img_node)
t=enc or u
return f"{self.getProxyUrl()}&url={self.e64(t)}&type=hlimg"if t and(enc or self._should_decrypt(t))else self._abs(t)
def _parse_items(self,root):
vids=[]
for sel in self.SELECTORS:
for it in root(sel).items():
title=it.find('.title, h3, h4, .video-title').text()
if not title:continue
link=it.find('a').attr('href')
if not link:continue
vids.append({'vod_id':self._abs(link),'vod_name':title,'vod_pic':self._img(it.find('img')),'vod_remarks':it.find('.date, .time, .remarks, .duration').text()or''})
if vids:break
return vids
def detailContent(self,array):
tid=array[0];url=tid if tid.startswith('http')else f'https://heiliao.com{tid}'
rsp=self.fetch(url)
if not rsp:return{'list':[]}
rsp.encoding='utf-8';html_text=rsp.text
try:root_text=pq(html_text)
except:root_text=None
try:root_content=pq(rsp.content)
except:root_content=None
title=(root_text('title').text()if root_text else'')or''
if' - 黑料网'in title:title=title.replace(' - 黑料网','')
pic=''
if root_text:
og=root_text('meta[property="og:image"]').attr('content')
if og and(og.endswith('.png')or og.endswith('.jpg')or og.endswith('.jpeg')):pic=og
else:pic=self._img(root_text('.video-item-img img'))
detail=''
if root_text:
detail=root_text('meta[name="description"]').attr('content')or''
if not detail:detail=root_text('.content').text()[:200]
play_from,play_url=[],[]
if root_content:
for i,p in enumerate(root_content('.dplayer').items()):
c=p.attr('config')
if not c:continue
try:s=(c.replace('&quot;','"').replace('&#34;','"').replace('&amp;','&').replace('&#38;','&').replace('&lt;','<').replace('&#60;','<').replace('&gt;','>').replace('&#62;','>'));u=(json.loads(s).get('video',{})or{}).get('url','')
except:m=re.search(r'"url"\s*:\s*"([^"]+)"',c);u=m.group(1)if m else''
if u:
u=u.replace('\\/','/');u=self._abs(u)
play_from.append(f'视频{i+1}');play_url.append(u)
if not play_url:
for pat in[r'https://hls\.[^"\']+\.m3u8[^"\']*',r'https://[^"\']+\.m3u8\?auth_key=[^"\']+',r'//hls\.[^"\']+\.m3u8[^"\']*']:
for u in re.findall(pat,html_text):
u=self._abs(u);play_from.append(f'视频{len(play_from)+1}');play_url.append(u)
if len(play_url)>=3:break
if play_url:break
if not play_url:
js_patterns=[r'video[\s\S]{0,500}?url[\s"\'`:=]+([^"\'`\s]+)',r'videoUrl[\s"\'`:=]+([^"\'`\s]+)',r'src[\s"\'`:=]+([^"\'`\s]+\.m3u8[^"\'`\s]*)']
for pattern in js_patterns:
js_urls=re.findall(pattern,html_text)
for js_url in js_urls:
if'.m3u8'in js_url:
if js_url.startswith('//'):js_url='https:'+js_url
elif js_url.startswith('/'):js_url='https://heiliao.com'+js_url
elif not js_url.startswith('http'):js_url='https://'+js_url
play_from.append(f'视频{len(play_from)+1}');play_url.append(js_url)
if len(play_url)>=3:break
if play_url:break
if not play_url:
play_from.append('示例视频');play_url.append("https://hls.obmoti.cn/videos5/b9699667fbbffcd464f8874395b91c81/b9699667fbbffcd464f8874395b91c81.m3u8?auth_key=1760372539-68ed273b94e7a-0-3a53bc0df110c5f149b7d374122ef1ed&v=2")
return{'list':[{'vod_id':tid,'vod_name':title,'vod_pic':pic,'vod_content':detail,'vod_play_from':'$$$'.join(play_from),'vod_play_url':'$$$'.join(play_url)}]}
def searchContent(self,key,quick,pg="1"):
rsp=self.fetch(f'https://heiliao.com/index/search?word={key}')
if not rsp:return{'list':[]}
return{'list':self._parse_items(pq(rsp.text))}
def playerContent(self,flag,id,vipFlags):
return{"parse":0,"playUrl":"","url":id,"header":{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36","Referer":"https://heiliao.com/"}}
def get_list(self,url):
rsp=self.fetch(url)
return[]if not rsp else self._parse_items(pq(rsp.text))
def fetch(self,url,params=None,cookies=None,headers=None,timeout=5,verify=True,stream=False,allow_redirects=True):
h=headers or{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36","Referer":"https://heiliao.com/"}
return super().fetch(url,params=params,cookies=cookies,headers=h,timeout=timeout,verify=verify,stream=stream,allow_redirects=allow_redirects)
def localProxy(self,param):
try:
if param.get('type')=='hlimg':
url=self.d64(param.get('url'))
if url.startswith('//'):url='https:'+url
elif url.startswith('/'):url='https://heiliao.com'+url
r=requests.get(url,headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36","Referer":"https://heiliao.com/"},timeout=15,verify=False)
if r.status_code!=200:return[404,'text/plain','']
b=AES.new(b'f5d965df75336270',AES.MODE_CBC,b'97b60394abc2fbe1').decrypt(r.content)
ct='image/jpeg'
if b.startswith(b'\x89PNG'):ct='image/png'
elif b.startswith(b'GIF8'):ct='image/gif'
return[200,ct,b]
except:pass
return[404,'text/plain','']

View File

@@ -0,0 +1,444 @@
# -*- coding: utf-8 -*-
import json, re, sys, base64, requests, threading, time, random, colorsys
from Crypto.Cipher import AES
from pyquery import PyQuery as pq
from urllib.parse import quote, unquote
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
SELECTORS = ['.video-item', '.video-list .item', '.list-item', '.post-item']
def init(self, extend='{}'):
"""初始化配置(支持代理)"""
config = json.loads(extend)
self.proxies = config.get('proxy', {}) # 示例:{"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
self.plp = config.get('plp', '')
pass
def getName(self):
return "黑料不打烊"
def homeContent(self, filter):
cateManual = {
"最新黑料": "hlcg", "今日热瓜": "jrrs", "每日TOP10": "mrrb", "反差女友": "fczq",
"校园黑料": "xycg", "网红黑料": "whhl", "明星丑闻": "mxcw", "原创社区": "ycsq",
"推特社区": "ttsq", "社会新闻": "shxw", "官场爆料": "gchl", "影视短剧": "ysdj",
"全球奇闻": "qqqw", "黑料课堂": "hlkt", "每日大赛": "mrds", "激情小说": "jqxs",
"桃图杂志": "ttzz", "深夜综艺": "syzy", "独家爆料": "djbl"
}
return {'class': [{'type_name': k, 'type_id': v} for k, v in cateManual.items()]}
def homeVideoContent(self):
return {}
def categoryContent(self, tid, pg, filter, extend):
url = f'https://heiliao.com/{tid}/' if int(pg) == 1 else f'https://heiliao.com/{tid}/page/{pg}/'
videos = self.get_list(url)
return {'list': videos, 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 999999}
def fetch_and_decrypt_image(self, url):
try:
if url.startswith('//'):
url = 'https:' + url
elif url.startswith('/'):
url = 'https://heiliao.com' + url
r = requests.get(
url,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36',
'Referer': 'https://heiliao.com/'
},
timeout=15,
verify=False,
proxies=self.proxies # ✅ 使用代理
)
if r.status_code != 200:
return b''
return AES.new(b'f5d965df75336270', AES.MODE_CBC, b'97b60394abc2fbe1').decrypt(r.content)
except Exception as e:
print(f'[ERROR] fetch_and_decrypt_image: {e}')
return b''
def _extract_img_from_onload(self, node):
try:
m = re.search(r"load(?:Share)?Img\s*\([^,]+,\s*['\"]([^'\"]+)['\"]", (node.attr('onload') or ''))
return m.group(1) if m else ''
except:
return ''
def _should_decrypt(self, url: str) -> bool:
u = (url or '').lower()
return any(x in u for x in ['pic.gylhaa.cn', 'new.slfpld.cn', '/upload_01/', '/upload/'])
def _abs(self, u: str) -> str:
if not u:
return ''
if u.startswith('//'):
return 'https:' + u
if u.startswith('/'):
return 'https://heiliao.com' + u
return u
def e64(self, s: str) -> str:
try:
return base64.b64encode((s or '').encode()).decode()
except:
return ''
def d64(self, s: str) -> str:
try:
return base64.b64decode((s or '').encode()).decode()
except:
return ''
def _img(self, img_node):
u = '' if img_node is None else (img_node.attr('src') or img_node.attr('data-src') or '')
enc = '' if img_node is None else self._extract_img_from_onload(img_node)
t = enc or u
return f"{self.getProxyUrl()}&url={self.e64(t)}&type=hlimg" if t and (enc or self._should_decrypt(t)) else self._abs(t)
def _parse_items(self, root):
vids = []
for sel in self.SELECTORS:
for it in root(sel).items():
title = it.find('.title, h3, h4, .video-title').text()
if not title:
continue
link = it.find('a').attr('href')
if not link:
continue
vids.append({
'vod_id': self._abs(link),
'vod_name': title,
'vod_pic': self._img(it.find('img')),
'vod_remarks': it.find('.date, .time, .remarks, .duration').text() or ''
})
if vids:
break
return vids
def detailContent(self, array):
tid = array[0]
url = tid if tid.startswith('http') else f'https://heiliao.com{tid}'
rsp = self.fetch(url)
if not rsp:
return {'list': []}
rsp.encoding = 'utf-8'
html_text = rsp.text
try:
root_text = pq(html_text)
except:
root_text = None
try:
root_content = pq(rsp.content)
except:
root_content = None
title = (root_text('title').text() if root_text else '') or ''
if ' - 黑料网' in title:
title = title.replace(' - 黑料网', '')
pic = ''
if root_text:
og = root_text('meta[property="og:image"]').attr('content')
if og and (og.endswith('.png') or og.endswith('.jpg') or og.endswith('.jpeg')):
pic = og
else:
pic = self._img(root_text('.video-item-img img'))
detail = ''
if root_text:
detail = root_text('meta[name="description"]').attr('content') or ''
if not detail:
detail = root_text('.content').text()[:200]
play_from, play_url = [], []
if root_content:
for i, p in enumerate(root_content('.dplayer').items()):
c = p.attr('config')
if not c:
continue
try:
s = (c.replace('&quot;', '"')
.replace('&#34;', '"')
.replace('&amp;', '&')
.replace('&#38;', '&')
.replace('&lt;', '<')
.replace('&#60;', '<')
.replace('&gt;', '>')
.replace('&#62;', '>'))
u = (json.loads(s).get('video', {}) or {}).get('url', '')
except:
m = re.search(r'"url"\s*:\s*"([^"]+)"', c)
u = m.group(1) if m else ''
if u:
u = u.replace('\\/', '/')
u = self._abs(u)
article_id = self._extract_article_id(tid)
if article_id:
play_from.append(f'视频{i + 1}')
play_url.append(f"{article_id}_dm_{u}")
else:
play_from.append(f'视频{i + 1}')
play_url.append(u)
if not play_url:
for pat in [
r'https://hls\.[^"\']+\.m3u8[^"\']*',
r'https://[^"\']+\.m3u8\?auth_key=[^"\']+',
r'//hls\.[^"\']+\.m3u8[^"\']*'
]:
for u in re.findall(pat, html_text):
u = self._abs(u)
article_id = self._extract_article_id(tid)
if article_id:
play_from.append(f'视频{len(play_from) + 1}')
play_url.append(f"{article_id}_dm_{u}")
else:
play_from.append(f'视频{len(play_from) + 1}')
play_url.append(u)
if len(play_url) >= 3:
break
if play_url:
break
if not play_url:
js_patterns = [
r'video[\s\S]{0,500}?url[\s"\'`:=]+([^"\'`\s]+)',
r'videoUrl[\s"\'`:=]+([^"\'`\s]+)',
r'src[\s"\'`:=]+([^"\'`\s]+\.m3u8[^"\'`\s]*)'
]
for pattern in js_patterns:
js_urls = re.findall(pattern, html_text)
for js_url in js_urls:
if '.m3u8' in js_url:
if js_url.startswith('//'):
js_url = 'https:' + js_url
elif js_url.startswith('/'):
js_url = 'https://heiliao.com' + js_url
elif not js_url.startswith('http'):
js_url = 'https://' + js_url
article_id = self._extract_article_id(tid)
if article_id:
play_from.append(f'视频{len(play_from) + 1}')
play_url.append(f"{article_id}_dm_{js_url}")
else:
play_from.append(f'视频{len(play_from) + 1}')
play_url.append(js_url)
if len(play_url) >= 3:
break
if play_url:
break
if not play_url:
article_id = self._extract_article_id(tid)
example_url = "https://hls.obmoti.cn/videos5/b9699667fbbffcd464f8874395b91c81/b9699667fbbffcd464f8874395b91c81.m3u8"
if article_id:
play_from.append('示例视频')
play_url.append(f"{article_id}_dm_{example_url}")
else:
play_from.append('示例视频')
play_url.append(example_url)
return {
'list': [{
'vod_id': tid,
'vod_name': title,
'vod_pic': pic,
'vod_content': detail,
'vod_play_from': '$$$'.join(play_from),
'vod_play_url': '$$$'.join(play_url)
}]
}
def searchContent(self, key, quick, pg="1"):
rsp = self.fetch(f'https://heiliao.com/index/search?word={key}')
if not rsp:
return {'list': []}
return {'list': self._parse_items(pq(rsp.text))}
def playerContent(self, flag, id, vipFlags):
if '_dm_' in id:
aid, pid = id.split('_dm_', 1)
p = 0 if re.search(r'\.(m3u8|mp4|flv|ts|mkv|mov|avi|webm)', pid) else 1
if not p:
pid = f"{self.getProxyUrl()}&pdid={quote(id)}&type=m3u8"
return {
'parse': p,
'url': pid,
'header': {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36",
"Referer": "https://heiliao.com/"
}
}
else:
return {
"parse": 0,
"playUrl": "",
"url": id,
"header": {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36",
"Referer": "https://heiliao.com/"
}
}
def get_list(self, url):
rsp = self.fetch(url)
return [] if not rsp else self._parse_items(pq(rsp.text))
def fetch(self, url, params=None, cookies=None, headers=None, timeout=5, verify=True,
stream=False, allow_redirects=True):
h = headers or {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36",
"Referer": "https://heiliao.com/"
}
try:
return requests.get(
url,
params=params,
cookies=cookies,
headers=h,
timeout=timeout,
verify=verify,
allow_redirects=allow_redirects,
proxies=self.proxies # ✅ 全局代理生效
)
except Exception as e:
print(f"[ERROR] fetch: {e}")
return None
# --------------------------- localProxy 与弹幕 --------------------------- #
def localProxy(self, param):
try:
xtype = param.get('type', '')
if xtype == 'hlimg':
url = self.d64(param.get('url'))
if url.startswith('//'):
url = 'https:' + url
elif url.startswith('/'):
url = 'https://heiliao.com' + url
r = requests.get(
url,
headers={"User-Agent": "Mozilla/5.0", "Referer": "https://heiliao.com/"},
timeout=15,
verify=False,
proxies=self.proxies
)
if r.status_code != 200:
return [404, 'text/plain', '']
b = AES.new(b'f5d965df75336270', AES.MODE_CBC, b'97b60394abc2fbe1').decrypt(r.content)
ct = 'image/jpeg'
if b.startswith(b'\x89PNG'):
ct = 'image/png'
elif b.startswith(b'GIF8'):
ct = 'image/gif'
return [200, ct, b]
elif xtype == 'm3u8':
path, url = unquote(param['pdid']).split('_dm_', 1)
data = requests.get(
url,
headers={"User-Agent": "Mozilla/5.0", "Referer": "https://heiliao.com/"},
timeout=10,
proxies=self.proxies
).text
lines = data.strip().split('\n')
times = 0.0
for i in lines:
if i.startswith('#EXTINF:'):
times += float(i.split(':')[-1].replace(',', ''))
thread = threading.Thread(target=self.some_background_task, args=(path, int(times)))
thread.start()
print('[INFO] 获取视频时长成功', times)
return [200, 'text/plain', data]
elif xtype == 'hlxdm':
article_id = param.get('path', '')
times = int(param.get('times', 0))
comments = self._fetch_heiliao_comments(article_id)
return self._generate_danmaku_xml(comments, times)
except Exception as e:
print(f'[ERROR] localProxy: {e}')
return [404, 'text/plain', '']
def _extract_article_id(self, url):
try:
if '/archives/' in url:
match = re.search(r'/archives/(\d+)/?', url)
return match.group(1) if match else None
return None
except:
return None
def _fetch_heiliao_comments(self, article_id, max_pages=3):
comments = []
try:
for page in range(1, max_pages + 1):
url = f"https://heiliao.com/comments/1/{article_id}/{page}.json"
resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10, proxies=self.proxies)
if resp.status_code == 200:
data = resp.json()
if 'data' in data and 'list' in data['data'] and data['data']['list']:
for comment in data['data']['list']:
text = comment.get('content', '').strip()
if text and len(text) <= 100:
comments.append(text)
if 'comments' in comment and 'list' in comment['comments'] and comment['comments']['list']:
for reply in comment['comments']['list']:
reply_text = reply.get('content', '').strip()
if reply_text and len(reply_text) <= 100:
comments.append(reply_text)
if not data['data'].get('next', False):
break
else:
break
else:
break
except Exception as e:
print(f'[ERROR] _fetch_heiliao_comments: {e}')
return comments[:50]
def _generate_danmaku_xml(self, comments, video_duration):
try:
total_comments = len(comments)
tsrt = f'共有{total_comments}条弹幕来袭!!!'
danmu_xml = f'<?xml version="1.0" encoding="UTF-8"?>\n<i>\n'
danmu_xml += '\t<chatserver>chat.heiliao.com</chatserver>\n\t<chatid>88888888</chatid>\n'
danmu_xml += '\t<mission>0</mission>\n\t<maxlimit>99999</maxlimit>\n\t<state>0</state>\n'
danmu_xml += '\t<real_name>0</real_name>\n\t<source>heiliao</source>\n'
danmu_xml += f'\t<d p="0,5,25,16711680,0">{tsrt}</d>\n'
for i, comment in enumerate(comments):
base_time = (i / total_comments) * video_duration if total_comments > 0 else 0
dm_time = round(max(0, min(base_time + random.uniform(-3, 3), video_duration)), 1)
dm_color = self._get_danmaku_color()
dm_text = re.sub(r'[<>&\u0000\b]', '', comment)
danmu_xml += f'\t<d p="{dm_time},1,25,{dm_color},0">{dm_text}</d>\n'
danmu_xml += '</i>'
return [200, "text/xml", danmu_xml]
except Exception as e:
print(f'[ERROR] _generate_danmaku_xml: {e}')
return [500, 'text/html', '']
def _get_danmaku_color(self):
if random.random() < 0.1:
h = random.random()
s = random.uniform(0.7, 1.0)
v = random.uniform(0.8, 1.0)
r, g, b = colorsys.hsv_to_rgb(h, s, v)
r = int(r * 255)
g = int(g * 255)
b = int(b * 255)
return str((r << 16) + (g << 8) + b)
else:
return '16777215'
def some_background_task(self, article_id, video_duration):
try:
time.sleep(1)
danmaku_url = f"{self.getProxyUrl()}&path={quote(article_id)}&times={video_duration}&type=hlxdm"
self.fetch(f"http://127.0.0.1:9978/action?do=refresh&type=danmaku&path={quote(danmaku_url)}")
print(f'[INFO] 弹幕刷新成功: {article_id}')
except Exception as e:
print(f'[ERROR] some_background_task: {e}')