feat(adult): 添加911爆料网、UVod和花都资源站支持 新增三个成人内容站点的配置与解析脚本: - 911爆料网(911.py):支持分类、搜索、详情及播放链接解析 - UVod(UVod.py):支持加密接口请求、视频分类与播放源解析 - 花都:在 adult.json 中添加对应资源配置项 同时更新 api.json,增加“余白”和“星空”两个 APP 数据源配置。 ```
435 lines
17 KiB
Python
435 lines
17 KiB
Python
# -*- coding: utf-8 -*-
|
||
import json
|
||
import random
|
||
import re
|
||
import sys
|
||
import threading
|
||
import time
|
||
import requests
|
||
from base64 import b64decode, b64encode
|
||
from urllib.parse import urlparse, urljoin
|
||
from Crypto.Cipher import AES
|
||
from Crypto.Util.Padding import unpad
|
||
from bs4 import BeautifulSoup
|
||
sys.path.append('..')
|
||
from base.spider import Spider
|
||
|
||
class Spider(Spider):
|
||
|
||
def init(self, extend="{}"):
|
||
config = json.loads(extend)
|
||
self.domin = config.get('site', "https://911blw.com")
|
||
self.proxies = config.get('proxy', {}) or {}
|
||
self.plp = config.get('plp', '')
|
||
self.backup_urls = ["https://hlj.fun", "https://911bl16.com"]
|
||
|
||
self.headers = {
|
||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
|
||
'Accept-Language': 'zh-CN,zh;q=0.9'
|
||
}
|
||
|
||
# 获取最佳主机
|
||
self.host = self.host_late([self.domin] + self.backup_urls)
|
||
self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
|
||
|
||
# 缓存主机信息
|
||
self.getcnh()
|
||
|
||
def getName(self):
|
||
return "911爆料网"
|
||
|
||
def isVideoFormat(self, url):
|
||
pass
|
||
|
||
def manualVideoCheck(self):
|
||
pass
|
||
|
||
def destroy(self):
|
||
pass
|
||
|
||
def homeContent(self, filter):
|
||
result = {}
|
||
classes = []
|
||
|
||
# 分类列表(根据911爆料网的实际分类)
|
||
categories = [
|
||
{"type_id": "/category/jrgb/", "type_name": "最新爆料"},
|
||
{"type_id": "/category/rmgb/", "type_name": "精选大瓜"},
|
||
{"type_id": "/category/blqw/", "type_name": "猎奇吃瓜"},
|
||
{"type_id": "/category/rlph/", "type_name": "TOP5大瓜"},
|
||
{"type_id": "/category/ssdbl/", "type_name": "社会热点"},
|
||
{"type_id": "/category/hjsq/", "type_name": "海角社区"},
|
||
{"type_id": "/category/mrds/", "type_name": "每日大赛"},
|
||
{"type_id": "/category/xyss/", "type_name": "校园吃瓜"},
|
||
{"type_id": "/category/mxhl/", "type_name": "明星吃瓜"},
|
||
{"type_id": "/category/whbl/", "type_name": "网红爆料"},
|
||
{"type_id": "/category/bgzq/", "type_name": "反差爆料"},
|
||
{"type_id": "/category/fljq/", "type_name": "网黄福利"},
|
||
{"type_id": "/category/crfys/", "type_name": "午夜剧场"},
|
||
{"type_id": "/category/thjx/", "type_name": "探花经典"},
|
||
{"type_id": "/category/dmhv/", "type_name": "禁漫天堂"},
|
||
{"type_id": "/category/slec/", "type_name": "吃瓜精选"},
|
||
{"type_id": "/category/zksr/", "type_name": "重口调教"},
|
||
{"type_id": "/category/crlz/", "type_name": "精选连载"}
|
||
]
|
||
|
||
result['class'] = categories
|
||
|
||
# 首页推荐内容
|
||
html = self.fetch_page(f"{self.host}/")
|
||
if html:
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
articles = soup.select('article, .post-item, .article-item')
|
||
result['list'] = self.getlist(articles)
|
||
else:
|
||
result['list'] = []
|
||
|
||
return result
|
||
|
||
def homeVideoContent(self):
|
||
# 首页推荐视频
|
||
html = self.fetch_page(f"{self.host}/category/jrgb/1/")
|
||
videos = self.extract_content(html, f"{self.host}/category/jrgb/1/")
|
||
return {'list': videos}
|
||
|
||
def categoryContent(self, tid, pg, filter, extend):
|
||
if '@folder' in tid:
|
||
# 文件夹类型内容
|
||
id = tid.replace('@folder', '')
|
||
videos = self.getfod(id)
|
||
else:
|
||
# 普通分类内容
|
||
url = f"{self.host}{tid}{pg}/" if pg != "1" else f"{self.host}{tid}"
|
||
html = self.fetch_page(url)
|
||
if html:
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
articles = soup.select('article, .post-item, .article-item, ul.row li')
|
||
videos = self.getlist(articles, tid)
|
||
else:
|
||
videos = []
|
||
|
||
result = {}
|
||
result['list'] = videos
|
||
result['page'] = pg
|
||
result['pagecount'] = 1 if '@folder' in tid else 99999
|
||
result['limit'] = 90
|
||
result['total'] = 999999
|
||
return result
|
||
|
||
def detailContent(self, ids):
|
||
url = ids[0] if ids[0].startswith("http") else f"{self.host}{ids[0]}"
|
||
html = self.fetch_page(url)
|
||
|
||
if not html:
|
||
return {'list': []}
|
||
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
vod = {'vod_play_from': '911爆料网'}
|
||
|
||
try:
|
||
# 提取标签信息
|
||
clist = []
|
||
tags = soup.select('.tags .keywords a, .tagcloud a, a[rel="tag"]')
|
||
for tag in tags:
|
||
title = tag.get_text(strip=True)
|
||
href = tag.get('href', '')
|
||
if href and title:
|
||
clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
|
||
|
||
vod['vod_content'] = '点击展开↓↓↓\n'+' '.join(clist) if clist else soup.select_one('.post-content, .entry-content').get_text(strip=True)[:200] + '...'
|
||
except:
|
||
title_elem = soup.select_one('h1, .post-title, .entry-title')
|
||
vod['vod_content'] = title_elem.get_text(strip=True) if title_elem else "无简介"
|
||
|
||
try:
|
||
# 提取播放列表(类似51吸瓜的dplayer方式)
|
||
plist = []
|
||
|
||
# 方式1:检查dplayer
|
||
dplayers = soup.select('.dplayer, [data-config]')
|
||
for c, player in enumerate(dplayers, start=1):
|
||
config_str = player.get('data-config', '{}')
|
||
try:
|
||
config = json.loads(config_str)
|
||
if 'video' in config and 'url' in config['video']:
|
||
plist.append(f"视频{c}${config['video']['url']}")
|
||
except:
|
||
pass
|
||
|
||
# 方式2:检查视频标签
|
||
if not plist:
|
||
video_tags = soup.select('video source, video[src]')
|
||
for c, video in enumerate(video_tags, start=1):
|
||
src = video.get('src') or ''
|
||
if src:
|
||
plist.append(f"视频{c}${src}")
|
||
|
||
# 方式3:检查iframe
|
||
if not plist:
|
||
iframes = soup.select('iframe[src]')
|
||
for c, iframe in enumerate(iframes, start=1):
|
||
src = iframe.get('src', '')
|
||
if src and ('player' in src or 'video' in src):
|
||
plist.append(f"视频{c}${src}")
|
||
|
||
# 方式4:从脚本中提取
|
||
if not plist:
|
||
scripts = soup.find_all('script')
|
||
for script in scripts:
|
||
if script.string:
|
||
# 查找m3u8、mp4等视频链接
|
||
video_matches = re.findall(r'(https?://[^\s"\']*\.(?:m3u8|mp4|flv|ts|mkv)[^\s"\']*)', script.string)
|
||
for c, match in enumerate(video_matches, start=1):
|
||
plist.append(f"视频{c}${match}")
|
||
|
||
vod['vod_play_url'] = '#'.join(plist) if plist else f"请检查页面,可能没有视频${url}"
|
||
|
||
except Exception as e:
|
||
print(f"详情页解析错误: {e}")
|
||
vod['vod_play_url'] = f"解析错误${url}"
|
||
|
||
return {'list': [vod]}
|
||
|
||
def searchContent(self, key, quick, pg="1"):
|
||
url = f"{self.host}/search/{key}/{pg}/"
|
||
html = self.fetch_page(url)
|
||
if html:
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
articles = soup.select('article, .post-item, .article-item, ul.row li')
|
||
videos = self.getlist(articles)
|
||
else:
|
||
videos = []
|
||
|
||
return {'list': videos, 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 999999}
|
||
|
||
def playerContent(self, flag, id, vipFlags):
|
||
# 判断是否为直接播放的视频格式
|
||
p = 0 if re.search(r'\.(m3u8|mp4|flv|ts|mkv|mov|avi|webm)', id) else 1
|
||
return {'parse': p, 'url': f"{self.plp}{id}", 'header': self.headers}
|
||
|
||
def localProxy(self, param):
|
||
try:
|
||
url = self.d64(param['url'])
|
||
match = re.search(r"loadBannerDirect\('([^']*)'", url)
|
||
if match:
|
||
url = match.group(1)
|
||
|
||
res = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
|
||
|
||
# 检查是否需要AES解密(根据文件类型判断)
|
||
if url.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
|
||
# 普通图片直接返回
|
||
return [200, res.headers.get('Content-Type'), res.content]
|
||
else:
|
||
# 加密内容进行AES解密
|
||
return [200, res.headers.get('Content-Type'), self.aesimg(res.content)]
|
||
|
||
except Exception as e:
|
||
print(f"图片代理错误: {str(e)}")
|
||
return [500, 'text/html', '']
|
||
|
||
def e64(self, text):
|
||
try:
|
||
text_bytes = text.encode('utf-8')
|
||
encoded_bytes = b64encode(text_bytes)
|
||
return encoded_bytes.decode('utf-8')
|
||
except Exception as e:
|
||
print(f"Base64编码错误: {str(e)}")
|
||
return ""
|
||
|
||
def d64(self, encoded_text):
|
||
try:
|
||
encoded_bytes = encoded_text.encode('utf-8')
|
||
decoded_bytes = b64decode(encoded_bytes)
|
||
return decoded_bytes.decode('utf-8')
|
||
except Exception as e:
|
||
print(f"Base64解码错误: {str(e)}")
|
||
return ""
|
||
|
||
def aesimg(self, word):
|
||
key = b'f5d965df75336270'
|
||
iv = b'97b60394abc2fbe1'
|
||
cipher = AES.new(key, AES.MODE_CBC, iv)
|
||
decrypted = unpad(cipher.decrypt(word), AES.block_size)
|
||
return decrypted
|
||
|
||
def fetch_page(self, url, use_backup=False):
|
||
original_url = url
|
||
if use_backup:
|
||
for backup in self.backup_urls:
|
||
test_url = url.replace(self.domin, backup)
|
||
try:
|
||
time.sleep(1)
|
||
res = requests.get(test_url, headers=self.headers, proxies=self.proxies, timeout=10)
|
||
res.raise_for_status()
|
||
res.encoding = "utf-8"
|
||
text = res.text
|
||
if len(text) > 1000:
|
||
print(f"[DEBUG] 使用备用 {backup}: {test_url}")
|
||
return text
|
||
except:
|
||
continue
|
||
|
||
try:
|
||
time.sleep(1)
|
||
res = requests.get(original_url, headers=self.headers, proxies=self.proxies, timeout=10)
|
||
res.raise_for_status()
|
||
res.encoding = "utf-8"
|
||
text = res.text
|
||
if len(text) < 1000:
|
||
print(f"[DEBUG] 内容过短,尝试备用域名")
|
||
return self.fetch_page(original_url, use_backup=True)
|
||
return text
|
||
except Exception as e:
|
||
print(f"[ERROR] 请求失败 {original_url}: {e}")
|
||
return None
|
||
|
||
def getcnh(self):
|
||
try:
|
||
html = self.fetch_page(f"{self.host}/about.html")
|
||
if html:
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
link = soup.select_one('a[href]')
|
||
if link:
|
||
url = link.get('href')
|
||
parsed_url = urlparse(url)
|
||
host = parsed_url.scheme + "://" + parsed_url.netloc
|
||
self.setCache('host_911blw', host)
|
||
except Exception as e:
|
||
print(f"获取主机信息错误: {str(e)}")
|
||
|
||
def host_late(self, url_list):
|
||
if not url_list:
|
||
return self.domin
|
||
|
||
results = {}
|
||
threads = []
|
||
|
||
def test_host(url):
|
||
try:
|
||
start_time = time.time()
|
||
response = requests.head(url, headers=self.headers, proxies=self.proxies, timeout=1.0, allow_redirects=False)
|
||
delay = (time.time() - start_time) * 1000
|
||
results[url] = delay
|
||
except Exception as e:
|
||
results[url] = float('inf')
|
||
|
||
for url in url_list:
|
||
t = threading.Thread(target=test_host, args=(url,))
|
||
threads.append(t)
|
||
t.start()
|
||
|
||
for t in threads:
|
||
t.join()
|
||
|
||
return min(results.items(), key=lambda x: x[1])[0]
|
||
|
||
def getfod(self, id):
|
||
url = f"{self.host}{id}"
|
||
html = self.fetch_page(url)
|
||
if not html:
|
||
return []
|
||
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
videos = []
|
||
|
||
# 查找文件夹内容
|
||
content = soup.select_one('.post-content, .entry-content')
|
||
if content:
|
||
# 移除不需要的元素
|
||
for elem in content.select('.txt-apps, .line, blockquote, .tags, .content-tabs'):
|
||
elem.decompose()
|
||
|
||
# 提取标题和链接
|
||
headings = content.select('h2, h3, h4')
|
||
paragraphs = content.select('p')
|
||
|
||
for i, heading in enumerate(headings):
|
||
title = heading.get_text(strip=True)
|
||
if i < len(paragraphs):
|
||
link = paragraphs[i].select_one('a')
|
||
if link:
|
||
videos.append({
|
||
'vod_id': link.get('href', ''),
|
||
'vod_name': link.get_text(strip=True),
|
||
'vod_pic': f"{self.getProxyUrl()}&url={self.e64(link.get('data-img', ''))}",
|
||
'vod_remarks': title
|
||
})
|
||
|
||
return videos
|
||
|
||
def getlist(self, articles, tid=''):
|
||
videos = []
|
||
is_folder = '/mrdg' in tid
|
||
|
||
for article in articles:
|
||
try:
|
||
# 标题
|
||
title_elem = article.select_one('h2, h3, .headline, .title, a[title]')
|
||
name = title_elem.get_text(strip=True) if title_elem else ""
|
||
|
||
# 链接
|
||
link_elem = article.select_one('a')
|
||
href = link_elem.get('href', '') if link_elem else ""
|
||
|
||
# 日期/备注
|
||
date_elem = article.select_one('time, .date, .published')
|
||
remarks = date_elem.get_text(strip=True) if date_elem else ""
|
||
|
||
# 图片(使用吸瓜的方式)
|
||
pic = None
|
||
script_elem = article.select_one('script')
|
||
if script_elem and script_elem.string:
|
||
base64_match = re.search(r'base64,[\'"]?([A-Za-z0-9+/=]+)[\'"]?', script_elem.string)
|
||
if base64_match:
|
||
encoded_url = base64_match.group(1)
|
||
pic = f"{self.getProxyUrl()}&url={self.e64(encoded_url)}"
|
||
|
||
if not pic:
|
||
img_elem = article.select_one('img[data-xkrkllgl]')
|
||
if img_elem and img_elem.get('data-xkrkllgl'):
|
||
encoded_url = img_elem.get('data-xkrkllgl')
|
||
pic = f"{self.getProxyUrl()}&url={self.e64(encoded_url)}"
|
||
|
||
if not pic:
|
||
img_elem = article.select_one('img')
|
||
if img_elem:
|
||
for attr in ["data-lazy-src", "data-original", "data-src", "src"]:
|
||
pic = img_elem.get(attr)
|
||
if pic:
|
||
pic = urljoin(self.host, pic)
|
||
break
|
||
|
||
if name and href:
|
||
videos.append({
|
||
'vod_id': f"{href}{'@folder' if is_folder else ''}",
|
||
'vod_name': name.replace('\n', ' '),
|
||
'vod_pic': pic,
|
||
'vod_remarks': remarks,
|
||
'vod_tag': 'folder' if is_folder else '',
|
||
'style': {"type": "rect", "ratio": 1.33}
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"列表项解析错误: {e}")
|
||
continue
|
||
|
||
return videos
|
||
|
||
if __name__ == "__main__":
|
||
spider = Spider()
|
||
spider.init('{"site": "https://911blw.com"}')
|
||
|
||
# 测试首页
|
||
result = spider.homeContent({})
|
||
print(f"首页分类: {len(result['class'])} 个")
|
||
print(f"首页内容: {len(result['list'])} 个")
|
||
|
||
# 测试分类
|
||
result = spider.categoryContent("/category/jrgb/", "1", False, {})
|
||
print(f"分类内容: {len(result['list'])} 个")
|
||
|
||
# 测试搜索
|
||
result = spider.searchContent("测试", False, "1")
|
||
print(f"搜索结果: {len(result['list'])} 个") |