由于您提供的"code differences"信息为空,我无法生成具体的commit message。请提供实际的代码变更信息,以便我为您生成符合Angular规范的中文commit message。

Angular规范的commit message格式如下:
- `<type>`: 变更类型(如 feat, fix, docs, style, refactor, test, chore 等)
- `<scope>`: 变更范围(如 component, service, module 等)
- `<subject>`: 简短描述(中文)
- `<body>`: 详细描述(中文)

请提供代码变更详情,我将为您生成完整的commit message。
This commit is contained in:
Wang.Luo 2025-10-17 01:31:47 +08:00
parent 14075ea74a
commit 44ae06c5ee
5 changed files with 1721 additions and 0 deletions

View File

@ -0,0 +1,404 @@
# -*- coding: utf-8 -*-
# 🌈 Love
import json
import random
import re
import sys
import threading
import time
from base64 import b64decode, b64encode
from urllib.parse import urlparse
import requests
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
try:self.proxies = json.loads(extend)
except:self.proxies = {}
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cache-Control': 'no-cache',
}
# Use working dynamic URLs directly
self.host = self.get_working_host()
self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
self.log(f"使用站点: {self.host}")
print(f"使用站点: {self.host}")
pass
def getName(self):
return "🌈 51吸瓜"
def isVideoFormat(self, url):
# Treat direct media formats as playable without parsing
return any(ext in (url or '') for ext in ['.m3u8', '.mp4', '.ts'])
def manualVideoCheck(self):
return False
def destroy(self):
pass
def homeContent(self, filter):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'class': [], 'list': []}
data = self.getpq(response.text)
result = {}
classes = []
# Try to get categories from different possible locations
category_selectors = [
'.category-list ul li',
'.nav-menu li',
'.menu li',
'nav ul li'
]
for selector in category_selectors:
for k in data(selector).items():
link = k('a')
href = (link.attr('href') or '').strip()
name = (link.text() or '').strip()
# Skip placeholder or invalid entries
if not href or href == '#' or not name:
continue
classes.append({
'type_name': name,
'type_id': href
})
if classes:
break
# If no categories found, create some default ones
if not classes:
classes = [
{'type_name': '首页', 'type_id': '/'},
{'type_name': '最新', 'type_id': '/latest/'},
{'type_name': '热门', 'type_id': '/hot/'}
]
result['class'] = classes
result['list'] = self.getlist(data('#index article a'))
return result
except Exception as e:
print(f"homeContent error: {e}")
return {'class': [], 'list': []}
def homeVideoContent(self):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': []}
data = self.getpq(response.text)
return {'list': self.getlist(data('#index article a, #archive article a'))}
except Exception as e:
print(f"homeVideoContent error: {e}")
return {'list': []}
def categoryContent(self, tid, pg, filter, extend):
try:
if '@folder' in tid:
id = tid.replace('@folder', '')
videos = self.getfod(id)
else:
# Build URL properly
if tid.startswith('/'):
if pg and pg != '1':
url = f"{self.host}{tid}page/{pg}/"
else:
url = f"{self.host}{tid}"
else:
url = f"{self.host}/{tid}"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a'), tid)
result = {}
result['list'] = videos
result['page'] = pg
result['pagecount'] = 1 if '@folder' in tid else 99999
result['limit'] = 90
result['total'] = 999999
return result
except Exception as e:
print(f"categoryContent error: {e}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
def detailContent(self, ids):
try:
url = f"{self.host}{ids[0]}" if not ids[0].startswith('http') else ids[0]
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [{'vod_play_from': '51吸瓜', 'vod_play_url': f'页面加载失败${url}'}]}
data = self.getpq(response.text)
vod = {'vod_play_from': '51吸瓜'}
# Get content/description
try:
clist = []
if data('.tags .keywords a'):
for k in data('.tags .keywords a').items():
title = k.text()
href = k.attr('href')
if title and href:
clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
vod['vod_content'] = ' '.join(clist) if clist else data('.post-title').text()
except:
vod['vod_content'] = data('.post-title').text() or '51吸瓜视频'
# Get video URLs (build episode list when multiple players exist)
try:
plist = []
used_names = set()
if data('.dplayer'):
for c, k in enumerate(data('.dplayer').items(), start=1):
config_attr = k.attr('data-config')
if config_attr:
try:
config = json.loads(config_attr)
video_url = config.get('video', {}).get('url', '')
# Determine a readable episode name from nearby headings if present
ep_name = ''
try:
parent = k.parents().eq(0)
# search up to a few ancestors for a heading text
for _ in range(3):
if not parent: break
heading = parent.find('h2, h3, h4').eq(0).text() or ''
heading = heading.strip()
if heading:
ep_name = heading
break
parent = parent.parents().eq(0)
except Exception:
ep_name = ''
base_name = ep_name if ep_name else f"视频{c}"
name = base_name
count = 2
# Ensure the name is unique
while name in used_names:
name = f"{base_name} {count}"
count += 1
used_names.add(name)
if video_url:
self.log(f"解析到视频: {name} -> {video_url}")
print(f"解析到视频: {name} -> {video_url}")
plist.append(f"{name}${video_url}")
except:
continue
if plist:
self.log(f"拼装播放列表,共{len(plist)}")
print(f"拼装播放列表,共{len(plist)}")
vod['vod_play_url'] = '#'.join(plist)
else:
vod['vod_play_url'] = f"未找到视频源${url}"
except Exception as e:
vod['vod_play_url'] = f"视频解析失败${url}"
return {'list': [vod]}
except Exception as e:
print(f"detailContent error: {e}")
return {'list': [{'vod_play_from': '51吸瓜', 'vod_play_url': f'详情页加载失败${ids[0] if ids else ""}'}]}
def searchContent(self, key, quick, pg="1"):
try:
url = f"{self.host}/search/{key}/{pg}" if pg != "1" else f"{self.host}/search/{key}/"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a'))
return {'list': videos, 'page': pg}
except Exception as e:
print(f"searchContent error: {e}")
return {'list': [], 'page': pg}
def playerContent(self, flag, id, vipFlags):
url = id
p = 1
if self.isVideoFormat(url):
# m3u8/mp4 direct play; when using proxy setting, wrap to proxy for m3u8
if '.m3u8' in url:
url = self.proxy(url)
p = 0
self.log(f"播放请求: parse={p}, url={url}")
print(f"播放请求: parse={p}, url={url}")
return {'parse': p, 'url': url, 'header': self.headers}
def localProxy(self, param):
if param.get('type') == 'img':
res=requests.get(param['url'], headers=self.headers, proxies=self.proxies, timeout=10)
return [200,res.headers.get('Content-Type'),self.aesimg(res.content)]
elif param.get('type') == 'm3u8':return self.m3Proxy(param['url'])
else:return self.tsProxy(param['url'])
def proxy(self, data, type='m3u8'):
if data and len(self.proxies):return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
else:return data
def m3Proxy(self, url):
url=self.d64(url)
ydata = requests.get(url, headers=self.headers, proxies=self.proxies, allow_redirects=False)
data = ydata.content.decode('utf-8')
if ydata.headers.get('Location'):
url = ydata.headers['Location']
data = requests.get(url, headers=self.headers, proxies=self.proxies).content.decode('utf-8')
lines = data.strip().split('\n')
last_r = url[:url.rfind('/')]
parsed_url = urlparse(url)
durl = parsed_url.scheme + "://" + parsed_url.netloc
iskey=True
for index, string in enumerate(lines):
if iskey and 'URI' in string:
pattern = r'URI="([^"]*)"'
match = re.search(pattern, string)
if match:
lines[index] = re.sub(pattern, f'URI="{self.proxy(match.group(1), "mkey")}"', string)
iskey=False
continue
if '#EXT' not in string:
if 'http' not in string:
domain = last_r if string.count('/') < 2 else durl
string = domain + ('' if string.startswith('/') else '/') + string
lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
data = '\n'.join(lines)
return [200, "application/vnd.apple.mpegur", data]
def tsProxy(self, url):
url = self.d64(url)
data = requests.get(url, headers=self.headers, proxies=self.proxies, stream=True)
return [200, data.headers['Content-Type'], data.content]
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def get_working_host(self):
"""Get working host from known dynamic URLs"""
# Known working URLs from the dynamic gateway
dynamic_urls = [
'https://artist.vgwtswi.xyz',
'https://ability.vgwtswi.xyz',
'https://am.vgwtswi.xyz'
]
# Test each URL to find a working one
for url in dynamic_urls:
try:
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
if response.status_code == 200:
# Verify it has the expected content structure
data = self.getpq(response.text)
articles = data('#index article a')
if len(articles) > 0:
self.log(f"选用可用站点: {url}")
print(f"选用可用站点: {url}")
return url
except Exception as e:
continue
# Fallback to first URL if none work (better than crashing)
self.log(f"未检测到可用站点,回退: {dynamic_urls[0]}")
print(f"未检测到可用站点,回退: {dynamic_urls[0]}")
return dynamic_urls[0]
def getlist(self, data, tid=''):
videos = []
l = '/mrdg' in tid
for k in data.items():
a = k.attr('href')
b = k('h2').text()
# Some pages might not include datePublished; use a fallback
c = k('span[itemprop="datePublished"]').text() or k('.post-meta, .entry-meta, time').text()
if a and b:
videos.append({
'vod_id': f"{a}{'@folder' if l else ''}",
'vod_name': b.replace('\n', ' '),
'vod_pic': self.getimg(k('script').text()),
'vod_remarks': c or '',
'vod_tag': 'folder' if l else '',
'style': {"type": "rect", "ratio": 1.33}
})
return videos
def getfod(self, id):
url = f"{self.host}{id}"
data = self.getpq(requests.get(url, headers=self.headers, proxies=self.proxies).text)
vdata=data('.post-content[itemprop="articleBody"]')
r=['.txt-apps','.line','blockquote','.tags','.content-tabs']
for i in r:vdata.remove(i)
p=vdata('p')
videos=[]
for i,x in enumerate(vdata('h2').items()):
c=i*2
videos.append({
'vod_id': p.eq(c)('a').attr('href'),
'vod_name': p.eq(c).text(),
'vod_pic': f"{self.getProxyUrl()}&url={p.eq(c+1)('img').attr('data-xkrkllgl')}&type=img",
'vod_remarks':x.text()
})
return videos
def getimg(self, text):
match = re.search(r"loadBannerDirect\('([^']+)'", text)
if match:
url = match.group(1)
return f"{self.getProxyUrl()}&url={url}&type=img"
else:
return ''
def aesimg(self, word):
key = b'f5d965df75336270'
iv = b'97b60394abc2fbe1'
cipher = AES.new(key, AES.MODE_CBC, iv)
decrypted = unpad(cipher.decrypt(word), AES.block_size)
return decrypted
def getpq(self, data):
try:
return pq(data)
except Exception as e:
print(f"{str(e)}")
return pq(data.encode('utf-8'))

View File

@ -0,0 +1,716 @@
# -*- coding: utf-8 -*-
# 🌈 Love
import json
import random
import re
import sys
import threading
import time
from base64 import b64decode, b64encode
from urllib.parse import urlparse, quote
import requests
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
def init(self, extend=""):
try:self.proxies = json.loads(extend)
except:self.proxies = {}
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection': 'keep-alive',
'Cache-Control': 'no-cache',
}
# Use working dynamic URLs directly
self.host = self.get_working_host()
self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
self.log(f"使用站点: {self.host}")
print(f"使用站点: {self.host}")
pass
def getName(self):
return "🌈 今日看料"
def isVideoFormat(self, url):
# Treat direct media formats as playable without parsing
return any(ext in (url or '') for ext in ['.m3u8', '.mp4', '.ts'])
def manualVideoCheck(self):
return False
def destroy(self):
pass
def homeContent(self, filter):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'class': [], 'list': []}
data = self.getpq(response.text)
result = {}
classes = []
# 优先从导航栏获取分类
nav_selectors = [
'#navbarCollapse .navbar-nav .nav-item .nav-link',
'.navbar-nav .nav-item .nav-link',
'#nav .menu-item a',
'.menu .menu-item a'
]
found_categories = False
for selector in nav_selectors:
for item in data(selector).items():
href = item.attr('href') or ''
name = item.text().strip()
# 过滤掉非分类链接
if (not href or not name or
href == '#' or
href.startswith('http') or
'about' in href.lower() or
'contact' in href.lower() or
'tags' in href.lower() or
'top' in href.lower() or
'start' in href.lower() or
'time' in href.lower()):
continue
# 确保是分类链接包含category或明确的分类路径
if '/category/' in href or any(cat in href for cat in ['/dy/', '/ks/', '/douyu/', '/hy/', '/hj/', '/tt/', '/wh/', '/asmr/', '/xb/', '/xsp/', '/rdgz/']):
# 处理相对路径
if href.startswith('/'):
type_id = href
else:
type_id = f'/{href}'
classes.append({
'type_name': name,
'type_id': type_id
})
found_categories = True
# 如果导航栏没找到,尝试从分类下拉菜单获取
if not found_categories:
category_selectors = [
'.category-list a',
'.slide-toggle + .category-list a',
'.menu .category-list a'
]
for selector in category_selectors:
for item in data(selector).items():
href = item.attr('href') or ''
name = item.text().strip()
if href and name and href != '#':
if href.startswith('/'):
type_id = href
else:
type_id = f'/{href}'
classes.append({
'type_name': name,
'type_id': type_id
})
found_categories = True
# 去重
unique_classes = []
seen_ids = set()
for cls in classes:
if cls['type_id'] not in seen_ids:
unique_classes.append(cls)
seen_ids.add(cls['type_id'])
# 如果没有找到分类,创建默认分类
if not unique_classes:
unique_classes = [
{'type_name': '热点关注', 'type_id': '/category/rdgz/'},
{'type_name': '抖音', 'type_id': '/category/dy/'},
{'type_name': '快手', 'type_id': '/category/ks/'},
{'type_name': '斗鱼', 'type_id': '/category/douyu/'},
{'type_name': '虎牙', 'type_id': '/category/hy/'},
{'type_name': '花椒', 'type_id': '/category/hj/'},
{'type_name': '推特', 'type_id': '/category/tt/'},
{'type_name': '网红', 'type_id': '/category/wh/'},
{'type_name': 'ASMR', 'type_id': '/category/asmr/'},
{'type_name': 'X播', 'type_id': '/category/xb/'},
{'type_name': '小视频', 'type_id': '/category/xsp/'}
]
result['class'] = unique_classes
result['list'] = self.getlist(data('#index article a, #archive article a'))
return result
except Exception as e:
print(f"homeContent error: {e}")
return {'class': [], 'list': []}
def homeVideoContent(self):
try:
response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': []}
data = self.getpq(response.text)
return {'list': self.getlist(data('#index article a, #archive article a'))}
except Exception as e:
print(f"homeVideoContent error: {e}")
return {'list': []}
def categoryContent(self, tid, pg, filter, extend):
try:
# 修复URL构建 - 去除多余的斜杠
base_url = tid.lstrip('/').rstrip('/')
if pg and pg != '1':
url = f"{self.host}{base_url}/{pg}/"
else:
url = f"{self.host}{base_url}/"
print(f"分类页面URL: {url}")
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
print(f"分类页面请求失败: {response.status_code}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a, .post-card'), tid)
# 如果没有找到视频,尝试其他选择器
if not videos:
videos = self.getlist(data('article a, .post a, .entry-title a'), tid)
print(f"找到 {len(videos)} 个视频")
# 改进的页数检测逻辑
pagecount = self.detect_page_count(data, pg)
result = {}
result['list'] = videos
result['page'] = pg
result['pagecount'] = pagecount
result['limit'] = 90
result['total'] = 999999
return result
except Exception as e:
print(f"categoryContent error: {e}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
def tagContent(self, tid, pg, filter, extend):
"""标签页面内容"""
try:
# 修复URL构建 - 去除多余的斜杠
base_url = tid.lstrip('/').rstrip('/')
if pg and pg != '1':
url = f"{self.host}{base_url}/{pg}/"
else:
url = f"{self.host}{base_url}/"
print(f"标签页面URL: {url}")
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
print(f"标签页面请求失败: {response.status_code}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a, .post-card'), tid)
# 如果没有找到视频,尝试其他选择器
if not videos:
videos = self.getlist(data('article a, .post a, .entry-title a'), tid)
print(f"找到 {len(videos)} 个标签相关视频")
# 页数检测
pagecount = self.detect_page_count(data, pg)
result = {}
result['list'] = videos
result['page'] = pg
result['pagecount'] = pagecount
result['limit'] = 90
result['total'] = 999999
return result
except Exception as e:
print(f"tagContent error: {e}")
return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
def detect_page_count(self, data, current_page):
"""改进的页数检测方法"""
pagecount = 99999 # 默认大数字,允许无限翻页
# 方法1: 检查分页器中的所有页码链接
page_numbers = []
# 查找所有可能的页码链接
page_selectors = [
'.page-navigator a',
'.pagination a',
'.pages a',
'.page-numbers a'
]
for selector in page_selectors:
for page_link in data(selector).items():
href = page_link.attr('href') or ''
text = page_link.text().strip()
# 从href中提取页码
if href:
# 匹配 /category/dy/2/ 这种格式
match = re.search(r'/(\d+)/?$', href.rstrip('/'))
if match:
page_num = int(match.group(1))
if page_num not in page_numbers:
page_numbers.append(page_num)
# 从文本中提取数字页码
if text and text.isdigit():
page_num = int(text)
if page_num not in page_numbers:
page_numbers.append(page_num)
# 如果有找到页码,取最大值
if page_numbers:
max_page = max(page_numbers)
print(f"从分页器检测到最大页码: {max_page}")
return max_page
# 方法2: 检查是否存在"下一页"按钮
next_selectors = [
'.page-navigator .next',
'.pagination .next',
'.next-page',
'a:contains("下一页")'
]
for selector in next_selectors:
if data(selector):
print("检测到下一页按钮,允许继续翻页")
return 99999
# 方法3: 如果当前页视频数量很少,可能没有下一页
if len(data('#archive article, #index article, .post-card')) < 5:
print("当前页内容较少,可能没有下一页")
return int(current_page)
print("使用默认页数: 99999")
return 99999
def detailContent(self, ids):
try:
url = f"{self.host}{ids[0]}" if not ids[0].startswith('http') else ids[0]
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [{'vod_play_from': '今日看料', 'vod_play_url': f'页面加载失败${url}'}]}
data = self.getpq(response.text)
vod = {'vod_play_from': '今日看料'}
# 获取标题
title_selectors = ['.post-title', 'h1.entry-title', 'h1', '.post-card-title']
for selector in title_selectors:
title_elem = data(selector)
if title_elem:
vod['vod_name'] = title_elem.text().strip()
break
if 'vod_name' not in vod:
vod['vod_name'] = '今日看料视频'
# 获取内容/描述
try:
clist = []
if data('.tags .keywords a'):
for k in data('.tags .keywords a').items():
title = k.text()
href = k.attr('href')
if title and href:
# 使href相对路径
if href.startswith(self.host):
href = href.replace(self.host, '')
clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
vod['vod_content'] = ' '.join(clist) if clist else data('.post-content').text() or vod['vod_name']
except:
vod['vod_content'] = vod['vod_name']
# 获取视频URLs
try:
plist = []
used_names = set()
# 查找DPlayer视频
if data('.dplayer'):
for c, k in enumerate(data('.dplayer').items(), start=1):
config_attr = k.attr('data-config')
if config_attr:
try:
config = json.loads(config_attr)
video_url = config.get('video', {}).get('url', '')
if video_url:
name = f"视频{c}"
count = 2
while name in used_names:
name = f"视频{c}_{count}"
count += 1
used_names.add(name)
self.log(f"解析到视频: {name} -> {video_url}")
print(f"解析到视频: {name} -> {video_url}")
plist.append(f"{name}${video_url}")
except:
continue
# 查找视频标签
if not plist:
video_selectors = ['video source', 'video', 'iframe[src*="video"]', 'a[href*=".m3u8"]', 'a[href*=".mp4"]']
for selector in video_selectors:
for c, elem in enumerate(data(selector).items(), start=1):
src = elem.attr('src') or elem.attr('href') or ''
if src and any(ext in src for ext in ['.m3u8', '.mp4', 'video']):
name = f"视频{c}"
count = 2
while name in used_names:
name = f"视频{c}_{count}"
count += 1
used_names.add(name)
plist.append(f"{name}${src}")
if plist:
self.log(f"拼装播放列表,共{len(plist)}")
print(f"拼装播放列表,共{len(plist)}")
vod['vod_play_url'] = '#'.join(plist)
else:
vod['vod_play_url'] = f"正片${url}"
except Exception as e:
print(f"视频解析错误: {e}")
vod['vod_play_url'] = f"正片${url}"
return {'list': [vod]}
except Exception as e:
print(f"detailContent error: {e}")
return {'list': [{'vod_play_from': '今日看料', 'vod_play_url': f'详情页加载失败${ids[0] if ids else ""}'}]}
def searchContent(self, key, quick, pg="1"):
try:
# 优先使用标签搜索
encoded_key = quote(key)
url = f"{self.host}/tag/{encoded_key}/{pg}" if pg != "1" else f"{self.host}/tag/{encoded_key}/"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
# 尝试搜索页面
url = f"{self.host}/search/{encoded_key}/{pg}" if pg != "1" else f"{self.host}/search/{encoded_key}/"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg}
data = self.getpq(response.text)
videos = self.getlist(data('#archive article a, #index article a, .post-card'))
# 使用改进的页数检测方法
pagecount = self.detect_page_count(data, pg)
return {'list': videos, 'page': pg, 'pagecount': pagecount}
except Exception as e:
print(f"searchContent error: {e}")
return {'list': [], 'page': pg}
def getTagsContent(self, pg="1"):
"""获取标签页面内容"""
try:
url = f"{self.host}/tags.html"
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
if response.status_code != 200:
return {'list': [], 'page': pg}
data = self.getpq(response.text)
tags = []
# 从标签页面提取所有标签 - 使用更宽松的选择器
for tag_elem in data('a[href*="/tag/"]').items():
tag_name = tag_elem.text().strip()
tag_href = tag_elem.attr('href') or ''
if tag_name and tag_href and '/tag/' in tag_href and tag_name != '全部标签': # 排除标题链接
# 处理为相对路径
tag_id = tag_href.replace(self.host, '')
if not tag_id.startswith('/'):
tag_id = '/' + tag_id
tags.append({
'vod_id': tag_id,
'vod_name': f"🏷️ {tag_name}",
'vod_pic': '',
'vod_remarks': '标签',
'vod_tag': 'tag',
'style': {"type": "rect", "ratio": 1.33}
})
print(f"找到 {len(tags)} 个标签")
# 分页处理 - 标签页面通常不需要分页
result = {}
result['list'] = tags
result['page'] = pg
result['pagecount'] = 1 # 标签页面通常只有一页
result['limit'] = 999
result['total'] = len(tags)
return result
except Exception as e:
print(f"getTagsContent error: {e}")
return {'list': [], 'page': pg}
def playerContent(self, flag, id, vipFlags):
url = id
p = 1
if self.isVideoFormat(url):
if '.m3u8' in url:
url = self.proxy(url)
p = 0
self.log(f"播放请求: parse={p}, url={url}")
print(f"播放请求: parse={p}, url={url}")
return {'parse': p, 'url': url, 'header': self.headers}
def localProxy(self, param):
try:
if param.get('type') == 'img':
img_url = self.d64(param['url'])
if not img_url.startswith(('http://', 'https://')):
if img_url.startswith('/'):
img_url = f"{self.host}{img_url}"
else:
img_url = f"{self.host}/{img_url}"
res = requests.get(img_url, headers=self.headers, proxies=self.proxies, timeout=10)
return [200, res.headers.get('Content-Type', 'image/jpeg'), res.content]
elif param.get('type') == 'm3u8':
return self.m3Proxy(param['url'])
else:
return self.tsProxy(param['url'])
except Exception as e:
print(f"localProxy error: {e}")
return [500, "text/plain", f"Proxy error: {str(e)}".encode()]
def proxy(self, data, type='m3u8'):
if data and len(self.proxies):
return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
else:
return data
def m3Proxy(self, url):
try:
url = self.d64(url)
ydata = requests.get(url, headers=self.headers, proxies=self.proxies, allow_redirects=False)
data = ydata.content.decode('utf-8')
if ydata.headers.get('Location'):
url = ydata.headers['Location']
data = requests.get(url, headers=self.headers, proxies=self.proxies).content.decode('utf-8')
lines = data.strip().split('\n')
last_r = url[:url.rfind('/')]
parsed_url = urlparse(url)
durl = parsed_url.scheme + "://" + parsed_url.netloc
iskey = True
for index, string in enumerate(lines):
if iskey and 'URI' in string:
pattern = r'URI="([^"]*)"'
match = re.search(pattern, string)
if match:
lines[index] = re.sub(pattern, f'URI="{self.proxy(match.group(1), "mkey")}"', string)
iskey = False
continue
if '#EXT' not in string:
if 'http' not in string:
domain = last_r if string.count('/') < 2 else durl
string = domain + ('' if string.startswith('/') else '/') + string
lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
data = '\n'.join(lines)
return [200, "application/vnd.apple.mpegur", data]
except Exception as e:
print(f"m3Proxy error: {e}")
return [500, "text/plain", f"m3u8 proxy error: {str(e)}".encode()]
def tsProxy(self, url):
try:
url = self.d64(url)
data = requests.get(url, headers=self.headers, proxies=self.proxies, stream=True)
return [200, data.headers.get('Content-Type', 'video/mp2t'), data.content]
except Exception as e:
print(f"tsProxy error: {e}")
return [500, "text/plain", f"ts proxy error: {str(e)}".encode()]
def e64(self, text):
try:
text_bytes = text.encode('utf-8')
encoded_bytes = b64encode(text_bytes)
return encoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64编码错误: {str(e)}")
return ""
def d64(self, encoded_text):
try:
encoded_bytes = encoded_text.encode('utf-8')
decoded_bytes = b64decode(encoded_bytes)
return decoded_bytes.decode('utf-8')
except Exception as e:
print(f"Base64解码错误: {str(e)}")
return ""
def get_working_host(self):
"""Get working host from known dynamic URLs"""
dynamic_urls = [
'https://kanliao2.one/',
'https://kanliao7.org/',
'https://kanliao7.net/',
'https://kanliao14.com/'
]
for url in dynamic_urls:
try:
response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
if response.status_code == 200:
data = self.getpq(response.text)
articles = data('#index article a, #archive article a')
if len(articles) > 0:
self.log(f"选用可用站点: {url}")
print(f"选用可用站点: {url}")
return url
except Exception as e:
continue
self.log(f"未检测到可用站点,回退: {dynamic_urls[0]}")
print(f"未检测到可用站点,回退: {dynamic_urls[0]}")
return dynamic_urls[0]
def getlist(self, data, tid=''):
videos = []
for k in data.items():
a = k.attr('href')
b = k('h2').text() or k('.post-card-title').text() or k('.entry-title').text() or k.text()
c = k('span[itemprop="datePublished"]').text() or k('.post-meta, .entry-meta, time, .post-card-info').text()
# 过滤广告:检查是否包含"热搜HOT"标志
if self.is_advertisement(k):
print(f"过滤广告: {b}")
continue
if a and b and b.strip():
# 处理相对路径
if not a.startswith('http'):
if a.startswith('/'):
vod_id = a
else:
vod_id = f'/{a}'
else:
vod_id = a
videos.append({
'vod_id': vod_id,
'vod_name': b.replace('\n', ' ').strip(),
'vod_pic': self.get_article_img(k),
'vod_remarks': c.strip() if c else '',
'vod_tag': '',
'style': {"type": "rect", "ratio": 1.33}
})
return videos
def is_advertisement(self, article_elem):
"""判断是否为广告包含热搜HOT标志"""
# 检查.wraps元素是否包含"热搜HOT"文本
hot_elements = article_elem.find('.wraps')
for elem in hot_elements.items():
if '热搜HOT' in elem.text():
return True
# 检查标题是否包含广告关键词
title = article_elem('h2').text() or article_elem('.post-card-title').text() or ''
ad_keywords = ['热搜HOT', '手机链接', 'DNS设置', '修改DNS', 'WIFI设置']
if any(keyword in title for keyword in ad_keywords):
return True
# 检查背景颜色是否为广告特有的渐变背景
style = article_elem.attr('style') or ''
if 'background:' in style and any(gradient in style for gradient in ['-webkit-linear-gradient', 'linear-gradient']):
# 进一步检查是否包含特定的广告颜色组合
ad_gradients = ['#ec008c,#fc6767', '#ffe259,#ffa751']
if any(gradient in style for gradient in ad_gradients):
return True
return False
def get_article_img(self, article_elem):
"""从文章元素中提取图片,多种方式尝试"""
# 方式1: 从script标签中提取loadBannerDirect
script_text = article_elem('script').text()
if script_text:
match = re.search(r"loadBannerDirect\('([^']+)'", script_text)
if match:
url = match.group(1)
if not url.startswith(('http://', 'https://')):
if url.startswith('/'):
url = f"{self.host}{url}"
else:
url = f"{self.host}/{url}"
return f"{self.getProxyUrl()}&url={self.e64(url)}&type=img"
# 方式2: 从背景图片中提取
bg_elem = article_elem.find('.blog-background')
if bg_elem:
style = bg_elem.attr('style') or ''
bg_match = re.search(r'background-image:\s*url\(["\']?([^"\'\)]+)["\']?\)', style)
if bg_match:
img_url = bg_match.group(1)
if img_url and not img_url.startswith('data:'):
if not img_url.startswith(('http://', 'https://')):
if img_url.startswith('/'):
img_url = f"{self.host}{img_url}"
else:
img_url = f"{self.host}/{img_url}"
return f"{self.getProxyUrl()}&url={self.e64(img_url)}&type=img"
# 方式3: 从图片标签中提取
img_elem = article_elem.find('img')
if img_elem:
data_src = img_elem.attr('data-src')
if data_src:
if not data_src.startswith(('http://', 'https://')):
if data_src.startswith('/'):
data_src = f"{self.host}{data_src}"
else:
data_src = f"{self.host}/{data_src}"
return f"{self.getProxyUrl()}&url={self.e64(data_src)}&type=img"
src = img_elem.attr('src')
if src:
if not src.startswith(('http://', 'https://')):
if src.startswith('/'):
src = f"{self.host}{src}"
else:
src = f"{self.host}/{src}"
return f"{self.getProxyUrl()}&url={self.e64(src)}&type=img"
return ''
def getpq(self, data):
try:
return pq(data)
except Exception as e:
print(f"{str(e)}")
return pq(data.encode('utf-8'))

View File

@ -0,0 +1,139 @@
# -*- coding: utf-8 -*-
import json,re,sys,base64,requests
from Crypto.Cipher import AES
from pyquery import PyQuery as pq
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
SELECTORS=['.video-item','.video-list .item','.list-item','.post-item']
def getName(self):return"黑料不打烊"
def init(self,extend=""):pass
def homeContent(self,filter):
cateManual={"最新黑料":"hlcg","今日热瓜":"jrrs","每日TOP10":"mrrb","周报精选":"zbjx","月榜热瓜":"ybrg","反差女友":"fczq","校园黑料":"xycg","网红黑料":"whhl","明星丑闻":"mxcw","原创社区":"ycsq","推特社区":"ttsq","社会新闻":"shxw","官场爆料":"gchl","影视短剧":"ysdj","全球奇闻":"qqqw","黑料课堂":"hlkt","每日大赛":"mrds","激情小说":"jqxs","桃图杂志":"ttzz","深夜综艺":"syzy","独家爆料":"djbl"}
return{'class':[{'type_name':k,'type_id':v}for k,v in cateManual.items()]}
def homeVideoContent(self):return{}
def categoryContent(self,tid,pg,filter,extend):
url=f'https://heiliao.com/{tid}/'if int(pg)==1 else f'https://heiliao.com/{tid}/page/{pg}/'
videos=self.get_list(url)
return{'list':videos,'page':pg,'pagecount':9999,'limit':90,'total':999999}
def fetch_and_decrypt_image(self,url):
try:
if url.startswith('//'):url='https:'+url
elif url.startswith('/'):url='https://heiliao.com'+url
r=requests.get(url,headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36','Referer':'https://heiliao.com/'},timeout=15,verify=False)
if r.status_code!=200:return b''
return AES.new(b'f5d965df75336270',AES.MODE_CBC,b'97b60394abc2fbe1').decrypt(r.content)
except: return b''
def _extract_img_from_onload(self,node):
try:
m=re.search(r"load(?:Share)?Img\s*\([^,]+,\s*['\"]([^'\"]+)['\"]",(node.attr('onload')or''))
return m.group(1)if m else''
except:return''
def _should_decrypt(self,url:str)->bool:
u=(url or'').lower();return any(x in u for x in['pic.gylhaa.cn','new.slfpld.cn','/upload_01/','/upload/'])
def _abs(self,u:str)->str:
if not u:return''
if u.startswith('//'):return'https:'+u
if u.startswith('/'):return'https://heiliao.com'+u
return u
def e64(self,s:str)->str:
try:return base64.b64encode((s or'').encode()).decode()
except:return''
def d64(self,s:str)->str:
try:return base64.b64decode((s or'').encode()).decode()
except:return''
def _img(self,img_node):
u=''if img_node is None else(img_node.attr('src')or img_node.attr('data-src')or'')
enc=''if img_node is None else self._extract_img_from_onload(img_node)
t=enc or u
return f"{self.getProxyUrl()}&url={self.e64(t)}&type=hlimg"if t and(enc or self._should_decrypt(t))else self._abs(t)
def _parse_items(self,root):
vids=[]
for sel in self.SELECTORS:
for it in root(sel).items():
title=it.find('.title, h3, h4, .video-title').text()
if not title:continue
link=it.find('a').attr('href')
if not link:continue
vids.append({'vod_id':self._abs(link),'vod_name':title,'vod_pic':self._img(it.find('img')),'vod_remarks':it.find('.date, .time, .remarks, .duration').text()or''})
if vids:break
return vids
def detailContent(self,array):
tid=array[0];url=tid if tid.startswith('http')else f'https://heiliao.com{tid}'
rsp=self.fetch(url)
if not rsp:return{'list':[]}
rsp.encoding='utf-8';html_text=rsp.text
try:root_text=pq(html_text)
except:root_text=None
try:root_content=pq(rsp.content)
except:root_content=None
title=(root_text('title').text()if root_text else'')or''
if' - 黑料网'in title:title=title.replace(' - 黑料网','')
pic=''
if root_text:
og=root_text('meta[property="og:image"]').attr('content')
if og and(og.endswith('.png')or og.endswith('.jpg')or og.endswith('.jpeg')):pic=og
else:pic=self._img(root_text('.video-item-img img'))
detail=''
if root_text:
detail=root_text('meta[name="description"]').attr('content')or''
if not detail:detail=root_text('.content').text()[:200]
play_from,play_url=[],[]
if root_content:
for i,p in enumerate(root_content('.dplayer').items()):
c=p.attr('config')
if not c:continue
try:s=(c.replace('&quot;','"').replace('&#34;','"').replace('&amp;','&').replace('&#38;','&').replace('&lt;','<').replace('&#60;','<').replace('&gt;','>').replace('&#62;','>'));u=(json.loads(s).get('video',{})or{}).get('url','')
except:m=re.search(r'"url"\s*:\s*"([^"]+)"',c);u=m.group(1)if m else''
if u:
u=u.replace('\\/','/');u=self._abs(u)
play_from.append(f'视频{i+1}');play_url.append(u)
if not play_url:
for pat in[r'https://hls\.[^"\']+\.m3u8[^"\']*',r'https://[^"\']+\.m3u8\?auth_key=[^"\']+',r'//hls\.[^"\']+\.m3u8[^"\']*']:
for u in re.findall(pat,html_text):
u=self._abs(u);play_from.append(f'视频{len(play_from)+1}');play_url.append(u)
if len(play_url)>=3:break
if play_url:break
if not play_url:
js_patterns=[r'video[\s\S]{0,500}?url[\s"\'`:=]+([^"\'`\s]+)',r'videoUrl[\s"\'`:=]+([^"\'`\s]+)',r'src[\s"\'`:=]+([^"\'`\s]+\.m3u8[^"\'`\s]*)']
for pattern in js_patterns:
js_urls=re.findall(pattern,html_text)
for js_url in js_urls:
if'.m3u8'in js_url:
if js_url.startswith('//'):js_url='https:'+js_url
elif js_url.startswith('/'):js_url='https://heiliao.com'+js_url
elif not js_url.startswith('http'):js_url='https://'+js_url
play_from.append(f'视频{len(play_from)+1}');play_url.append(js_url)
if len(play_url)>=3:break
if play_url:break
if not play_url:
play_from.append('示例视频');play_url.append("https://hls.obmoti.cn/videos5/b9699667fbbffcd464f8874395b91c81/b9699667fbbffcd464f8874395b91c81.m3u8?auth_key=1760372539-68ed273b94e7a-0-3a53bc0df110c5f149b7d374122ef1ed&v=2")
return{'list':[{'vod_id':tid,'vod_name':title,'vod_pic':pic,'vod_content':detail,'vod_play_from':'$$$'.join(play_from),'vod_play_url':'$$$'.join(play_url)}]}
def searchContent(self,key,quick,pg="1"):
rsp=self.fetch(f'https://heiliao.com/index/search?word={key}')
if not rsp:return{'list':[]}
return{'list':self._parse_items(pq(rsp.text))}
def playerContent(self,flag,id,vipFlags):
return{"parse":0,"playUrl":"","url":id,"header":{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36","Referer":"https://heiliao.com/"}}
def get_list(self,url):
rsp=self.fetch(url)
return[]if not rsp else self._parse_items(pq(rsp.text))
def fetch(self,url,params=None,cookies=None,headers=None,timeout=5,verify=True,stream=False,allow_redirects=True):
h=headers or{"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36","Referer":"https://heiliao.com/"}
return super().fetch(url,params=params,cookies=cookies,headers=h,timeout=timeout,verify=verify,stream=stream,allow_redirects=allow_redirects)
def localProxy(self,param):
try:
if param.get('type')=='hlimg':
url=self.d64(param.get('url'))
if url.startswith('//'):url='https:'+url
elif url.startswith('/'):url='https://heiliao.com'+url
r=requests.get(url,headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36","Referer":"https://heiliao.com/"},timeout=15,verify=False)
if r.status_code!=200:return[404,'text/plain','']
b=AES.new(b'f5d965df75336270',AES.MODE_CBC,b'97b60394abc2fbe1').decrypt(r.content)
ct='image/jpeg'
if b.startswith(b'\x89PNG'):ct='image/png'
elif b.startswith(b'GIF8'):ct='image/gif'
return[200,ct,b]
except:pass
return[404,'text/plain','']

View File

@ -0,0 +1,444 @@
# -*- coding: utf-8 -*-
import json, re, sys, base64, requests, threading, time, random, colorsys
from Crypto.Cipher import AES
from pyquery import PyQuery as pq
from urllib.parse import quote, unquote
sys.path.append('..')
from base.spider import Spider
class Spider(Spider):
SELECTORS = ['.video-item', '.video-list .item', '.list-item', '.post-item']
def init(self, extend='{}'):
"""初始化配置(支持代理)"""
config = json.loads(extend)
self.proxies = config.get('proxy', {}) # 示例:{"http": "http://127.0.0.1:7890", "https": "http://127.0.0.1:7890"}
self.plp = config.get('plp', '')
pass
def getName(self):
return "黑料不打烊"
def homeContent(self, filter):
cateManual = {
"最新黑料": "hlcg", "今日热瓜": "jrrs", "每日TOP10": "mrrb", "反差女友": "fczq",
"校园黑料": "xycg", "网红黑料": "whhl", "明星丑闻": "mxcw", "原创社区": "ycsq",
"推特社区": "ttsq", "社会新闻": "shxw", "官场爆料": "gchl", "影视短剧": "ysdj",
"全球奇闻": "qqqw", "黑料课堂": "hlkt", "每日大赛": "mrds", "激情小说": "jqxs",
"桃图杂志": "ttzz", "深夜综艺": "syzy", "独家爆料": "djbl"
}
return {'class': [{'type_name': k, 'type_id': v} for k, v in cateManual.items()]}
def homeVideoContent(self):
return {}
def categoryContent(self, tid, pg, filter, extend):
url = f'https://heiliao.com/{tid}/' if int(pg) == 1 else f'https://heiliao.com/{tid}/page/{pg}/'
videos = self.get_list(url)
return {'list': videos, 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 999999}
def fetch_and_decrypt_image(self, url):
try:
if url.startswith('//'):
url = 'https:' + url
elif url.startswith('/'):
url = 'https://heiliao.com' + url
r = requests.get(
url,
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36',
'Referer': 'https://heiliao.com/'
},
timeout=15,
verify=False,
proxies=self.proxies # ✅ 使用代理
)
if r.status_code != 200:
return b''
return AES.new(b'f5d965df75336270', AES.MODE_CBC, b'97b60394abc2fbe1').decrypt(r.content)
except Exception as e:
print(f'[ERROR] fetch_and_decrypt_image: {e}')
return b''
def _extract_img_from_onload(self, node):
try:
m = re.search(r"load(?:Share)?Img\s*\([^,]+,\s*['\"]([^'\"]+)['\"]", (node.attr('onload') or ''))
return m.group(1) if m else ''
except:
return ''
def _should_decrypt(self, url: str) -> bool:
u = (url or '').lower()
return any(x in u for x in ['pic.gylhaa.cn', 'new.slfpld.cn', '/upload_01/', '/upload/'])
def _abs(self, u: str) -> str:
if not u:
return ''
if u.startswith('//'):
return 'https:' + u
if u.startswith('/'):
return 'https://heiliao.com' + u
return u
def e64(self, s: str) -> str:
try:
return base64.b64encode((s or '').encode()).decode()
except:
return ''
def d64(self, s: str) -> str:
try:
return base64.b64decode((s or '').encode()).decode()
except:
return ''
def _img(self, img_node):
u = '' if img_node is None else (img_node.attr('src') or img_node.attr('data-src') or '')
enc = '' if img_node is None else self._extract_img_from_onload(img_node)
t = enc or u
return f"{self.getProxyUrl()}&url={self.e64(t)}&type=hlimg" if t and (enc or self._should_decrypt(t)) else self._abs(t)
def _parse_items(self, root):
vids = []
for sel in self.SELECTORS:
for it in root(sel).items():
title = it.find('.title, h3, h4, .video-title').text()
if not title:
continue
link = it.find('a').attr('href')
if not link:
continue
vids.append({
'vod_id': self._abs(link),
'vod_name': title,
'vod_pic': self._img(it.find('img')),
'vod_remarks': it.find('.date, .time, .remarks, .duration').text() or ''
})
if vids:
break
return vids
def detailContent(self, array):
tid = array[0]
url = tid if tid.startswith('http') else f'https://heiliao.com{tid}'
rsp = self.fetch(url)
if not rsp:
return {'list': []}
rsp.encoding = 'utf-8'
html_text = rsp.text
try:
root_text = pq(html_text)
except:
root_text = None
try:
root_content = pq(rsp.content)
except:
root_content = None
title = (root_text('title').text() if root_text else '') or ''
if ' - 黑料网' in title:
title = title.replace(' - 黑料网', '')
pic = ''
if root_text:
og = root_text('meta[property="og:image"]').attr('content')
if og and (og.endswith('.png') or og.endswith('.jpg') or og.endswith('.jpeg')):
pic = og
else:
pic = self._img(root_text('.video-item-img img'))
detail = ''
if root_text:
detail = root_text('meta[name="description"]').attr('content') or ''
if not detail:
detail = root_text('.content').text()[:200]
play_from, play_url = [], []
if root_content:
for i, p in enumerate(root_content('.dplayer').items()):
c = p.attr('config')
if not c:
continue
try:
s = (c.replace('&quot;', '"')
.replace('&#34;', '"')
.replace('&amp;', '&')
.replace('&#38;', '&')
.replace('&lt;', '<')
.replace('&#60;', '<')
.replace('&gt;', '>')
.replace('&#62;', '>'))
u = (json.loads(s).get('video', {}) or {}).get('url', '')
except:
m = re.search(r'"url"\s*:\s*"([^"]+)"', c)
u = m.group(1) if m else ''
if u:
u = u.replace('\\/', '/')
u = self._abs(u)
article_id = self._extract_article_id(tid)
if article_id:
play_from.append(f'视频{i + 1}')
play_url.append(f"{article_id}_dm_{u}")
else:
play_from.append(f'视频{i + 1}')
play_url.append(u)
if not play_url:
for pat in [
r'https://hls\.[^"\']+\.m3u8[^"\']*',
r'https://[^"\']+\.m3u8\?auth_key=[^"\']+',
r'//hls\.[^"\']+\.m3u8[^"\']*'
]:
for u in re.findall(pat, html_text):
u = self._abs(u)
article_id = self._extract_article_id(tid)
if article_id:
play_from.append(f'视频{len(play_from) + 1}')
play_url.append(f"{article_id}_dm_{u}")
else:
play_from.append(f'视频{len(play_from) + 1}')
play_url.append(u)
if len(play_url) >= 3:
break
if play_url:
break
if not play_url:
js_patterns = [
r'video[\s\S]{0,500}?url[\s"\'`:=]+([^"\'`\s]+)',
r'videoUrl[\s"\'`:=]+([^"\'`\s]+)',
r'src[\s"\'`:=]+([^"\'`\s]+\.m3u8[^"\'`\s]*)'
]
for pattern in js_patterns:
js_urls = re.findall(pattern, html_text)
for js_url in js_urls:
if '.m3u8' in js_url:
if js_url.startswith('//'):
js_url = 'https:' + js_url
elif js_url.startswith('/'):
js_url = 'https://heiliao.com' + js_url
elif not js_url.startswith('http'):
js_url = 'https://' + js_url
article_id = self._extract_article_id(tid)
if article_id:
play_from.append(f'视频{len(play_from) + 1}')
play_url.append(f"{article_id}_dm_{js_url}")
else:
play_from.append(f'视频{len(play_from) + 1}')
play_url.append(js_url)
if len(play_url) >= 3:
break
if play_url:
break
if not play_url:
article_id = self._extract_article_id(tid)
example_url = "https://hls.obmoti.cn/videos5/b9699667fbbffcd464f8874395b91c81/b9699667fbbffcd464f8874395b91c81.m3u8"
if article_id:
play_from.append('示例视频')
play_url.append(f"{article_id}_dm_{example_url}")
else:
play_from.append('示例视频')
play_url.append(example_url)
return {
'list': [{
'vod_id': tid,
'vod_name': title,
'vod_pic': pic,
'vod_content': detail,
'vod_play_from': '$$$'.join(play_from),
'vod_play_url': '$$$'.join(play_url)
}]
}
def searchContent(self, key, quick, pg="1"):
rsp = self.fetch(f'https://heiliao.com/index/search?word={key}')
if not rsp:
return {'list': []}
return {'list': self._parse_items(pq(rsp.text))}
def playerContent(self, flag, id, vipFlags):
if '_dm_' in id:
aid, pid = id.split('_dm_', 1)
p = 0 if re.search(r'\.(m3u8|mp4|flv|ts|mkv|mov|avi|webm)', pid) else 1
if not p:
pid = f"{self.getProxyUrl()}&pdid={quote(id)}&type=m3u8"
return {
'parse': p,
'url': pid,
'header': {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36",
"Referer": "https://heiliao.com/"
}
}
else:
return {
"parse": 0,
"playUrl": "",
"url": id,
"header": {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36",
"Referer": "https://heiliao.com/"
}
}
def get_list(self, url):
rsp = self.fetch(url)
return [] if not rsp else self._parse_items(pq(rsp.text))
def fetch(self, url, params=None, cookies=None, headers=None, timeout=5, verify=True,
stream=False, allow_redirects=True):
h = headers or {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.7049.96 Safari/537.36",
"Referer": "https://heiliao.com/"
}
try:
return requests.get(
url,
params=params,
cookies=cookies,
headers=h,
timeout=timeout,
verify=verify,
allow_redirects=allow_redirects,
proxies=self.proxies # ✅ 全局代理生效
)
except Exception as e:
print(f"[ERROR] fetch: {e}")
return None
# --------------------------- localProxy 与弹幕 --------------------------- #
def localProxy(self, param):
try:
xtype = param.get('type', '')
if xtype == 'hlimg':
url = self.d64(param.get('url'))
if url.startswith('//'):
url = 'https:' + url
elif url.startswith('/'):
url = 'https://heiliao.com' + url
r = requests.get(
url,
headers={"User-Agent": "Mozilla/5.0", "Referer": "https://heiliao.com/"},
timeout=15,
verify=False,
proxies=self.proxies
)
if r.status_code != 200:
return [404, 'text/plain', '']
b = AES.new(b'f5d965df75336270', AES.MODE_CBC, b'97b60394abc2fbe1').decrypt(r.content)
ct = 'image/jpeg'
if b.startswith(b'\x89PNG'):
ct = 'image/png'
elif b.startswith(b'GIF8'):
ct = 'image/gif'
return [200, ct, b]
elif xtype == 'm3u8':
path, url = unquote(param['pdid']).split('_dm_', 1)
data = requests.get(
url,
headers={"User-Agent": "Mozilla/5.0", "Referer": "https://heiliao.com/"},
timeout=10,
proxies=self.proxies
).text
lines = data.strip().split('\n')
times = 0.0
for i in lines:
if i.startswith('#EXTINF:'):
times += float(i.split(':')[-1].replace(',', ''))
thread = threading.Thread(target=self.some_background_task, args=(path, int(times)))
thread.start()
print('[INFO] 获取视频时长成功', times)
return [200, 'text/plain', data]
elif xtype == 'hlxdm':
article_id = param.get('path', '')
times = int(param.get('times', 0))
comments = self._fetch_heiliao_comments(article_id)
return self._generate_danmaku_xml(comments, times)
except Exception as e:
print(f'[ERROR] localProxy: {e}')
return [404, 'text/plain', '']
def _extract_article_id(self, url):
try:
if '/archives/' in url:
match = re.search(r'/archives/(\d+)/?', url)
return match.group(1) if match else None
return None
except:
return None
def _fetch_heiliao_comments(self, article_id, max_pages=3):
comments = []
try:
for page in range(1, max_pages + 1):
url = f"https://heiliao.com/comments/1/{article_id}/{page}.json"
resp = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10, proxies=self.proxies)
if resp.status_code == 200:
data = resp.json()
if 'data' in data and 'list' in data['data'] and data['data']['list']:
for comment in data['data']['list']:
text = comment.get('content', '').strip()
if text and len(text) <= 100:
comments.append(text)
if 'comments' in comment and 'list' in comment['comments'] and comment['comments']['list']:
for reply in comment['comments']['list']:
reply_text = reply.get('content', '').strip()
if reply_text and len(reply_text) <= 100:
comments.append(reply_text)
if not data['data'].get('next', False):
break
else:
break
else:
break
except Exception as e:
print(f'[ERROR] _fetch_heiliao_comments: {e}')
return comments[:50]
def _generate_danmaku_xml(self, comments, video_duration):
try:
total_comments = len(comments)
tsrt = f'共有{total_comments}条弹幕来袭!!!'
danmu_xml = f'<?xml version="1.0" encoding="UTF-8"?>\n<i>\n'
danmu_xml += '\t<chatserver>chat.heiliao.com</chatserver>\n\t<chatid>88888888</chatid>\n'
danmu_xml += '\t<mission>0</mission>\n\t<maxlimit>99999</maxlimit>\n\t<state>0</state>\n'
danmu_xml += '\t<real_name>0</real_name>\n\t<source>heiliao</source>\n'
danmu_xml += f'\t<d p="0,5,25,16711680,0">{tsrt}</d>\n'
for i, comment in enumerate(comments):
base_time = (i / total_comments) * video_duration if total_comments > 0 else 0
dm_time = round(max(0, min(base_time + random.uniform(-3, 3), video_duration)), 1)
dm_color = self._get_danmaku_color()
dm_text = re.sub(r'[<>&\u0000\b]', '', comment)
danmu_xml += f'\t<d p="{dm_time},1,25,{dm_color},0">{dm_text}</d>\n'
danmu_xml += '</i>'
return [200, "text/xml", danmu_xml]
except Exception as e:
print(f'[ERROR] _generate_danmaku_xml: {e}')
return [500, 'text/html', '']
def _get_danmaku_color(self):
if random.random() < 0.1:
h = random.random()
s = random.uniform(0.7, 1.0)
v = random.uniform(0.8, 1.0)
r, g, b = colorsys.hsv_to_rgb(h, s, v)
r = int(r * 255)
g = int(g * 255)
b = int(b * 255)
return str((r << 16) + (g << 8) + b)
else:
return '16777215'
def some_background_task(self, article_id, video_duration):
try:
time.sleep(1)
danmaku_url = f"{self.getProxyUrl()}&path={quote(article_id)}&times={video_duration}&type=hlxdm"
self.fetch(f"http://127.0.0.1:9978/action?do=refresh&type=danmaku&path={quote(danmaku_url)}")
print(f'[INFO] 弹幕刷新成功: {article_id}')
except Exception as e:
print(f'[ERROR] some_background_task: {e}')

View File

@ -1243,6 +1243,24 @@
"quickSearch": 1,
"filterable": 1
},
{
"key": "黑料不打烊",
"name": "黑料不打烊",
"type": 3,
"api": "./PyramidStore/plugin/adult/黑料弹幕版.py",
"searchable": 1,
"quickSearch": 1,
"filterable": 1
},
{
"key": "今日看料",
"name": "今日看料",
"type": 3,
"api": "./PyramidStore/plugin/adult/今日看料.py",
"searchable": 1,
"quickSearch": 1,
"filterable": 1
},
{
"key": "花都",
"name": "花都",