```
feat(PyramidStore): 初始化项目并添加基础配置文件 添加 .gitignore 忽略子仓库的 .git 目录 添加 LICENSE 文件,使用 GNU General Public License v3.0 添加 README.md 说明文档,包含调试示例、免责声明和配置说明 添加 base/localProxy.py 基础代理配置文件 添加版本控制图片文件(二进制差异) ```
This commit is contained in:
435
py/adult/911.py
Normal file
435
py/adult/911.py
Normal file
@@ -0,0 +1,435 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import requests
|
||||
from base64 import b64decode, b64encode
|
||||
from urllib.parse import urlparse, urljoin
|
||||
from Crypto.Cipher import AES
|
||||
from Crypto.Util.Padding import unpad
|
||||
from bs4 import BeautifulSoup
|
||||
sys.path.append('..')
|
||||
from base.spider import Spider
|
||||
|
||||
class Spider(Spider):
|
||||
|
||||
def init(self, extend="{}"):
|
||||
config = json.loads(extend)
|
||||
self.domin = config.get('site', "https://911blw.com")
|
||||
self.proxies = config.get('proxy', {}) or {}
|
||||
self.plp = config.get('plp', '')
|
||||
self.backup_urls = ["https://hlj.fun", "https://911bl16.com"]
|
||||
|
||||
self.headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.9'
|
||||
}
|
||||
|
||||
# 获取最佳主机
|
||||
self.host = self.host_late([self.domin] + self.backup_urls)
|
||||
self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
|
||||
|
||||
# 缓存主机信息
|
||||
self.getcnh()
|
||||
|
||||
def getName(self):
|
||||
return "911爆料网"
|
||||
|
||||
def isVideoFormat(self, url):
|
||||
pass
|
||||
|
||||
def manualVideoCheck(self):
|
||||
pass
|
||||
|
||||
def destroy(self):
|
||||
pass
|
||||
|
||||
def homeContent(self, filter):
|
||||
result = {}
|
||||
classes = []
|
||||
|
||||
# 分类列表(根据911爆料网的实际分类)
|
||||
categories = [
|
||||
{"type_id": "/category/jrgb/", "type_name": "最新爆料"},
|
||||
{"type_id": "/category/rmgb/", "type_name": "精选大瓜"},
|
||||
{"type_id": "/category/blqw/", "type_name": "猎奇吃瓜"},
|
||||
{"type_id": "/category/rlph/", "type_name": "TOP5大瓜"},
|
||||
{"type_id": "/category/ssdbl/", "type_name": "社会热点"},
|
||||
{"type_id": "/category/hjsq/", "type_name": "海角社区"},
|
||||
{"type_id": "/category/mrds/", "type_name": "每日大赛"},
|
||||
{"type_id": "/category/xyss/", "type_name": "校园吃瓜"},
|
||||
{"type_id": "/category/mxhl/", "type_name": "明星吃瓜"},
|
||||
{"type_id": "/category/whbl/", "type_name": "网红爆料"},
|
||||
{"type_id": "/category/bgzq/", "type_name": "反差爆料"},
|
||||
{"type_id": "/category/fljq/", "type_name": "网黄福利"},
|
||||
{"type_id": "/category/crfys/", "type_name": "午夜剧场"},
|
||||
{"type_id": "/category/thjx/", "type_name": "探花经典"},
|
||||
{"type_id": "/category/dmhv/", "type_name": "禁漫天堂"},
|
||||
{"type_id": "/category/slec/", "type_name": "吃瓜精选"},
|
||||
{"type_id": "/category/zksr/", "type_name": "重口调教"},
|
||||
{"type_id": "/category/crlz/", "type_name": "精选连载"}
|
||||
]
|
||||
|
||||
result['class'] = categories
|
||||
|
||||
# 首页推荐内容
|
||||
html = self.fetch_page(f"{self.host}/")
|
||||
if html:
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
articles = soup.select('article, .post-item, .article-item')
|
||||
result['list'] = self.getlist(articles)
|
||||
else:
|
||||
result['list'] = []
|
||||
|
||||
return result
|
||||
|
||||
def homeVideoContent(self):
|
||||
# 首页推荐视频
|
||||
html = self.fetch_page(f"{self.host}/category/jrgb/1/")
|
||||
videos = self.extract_content(html, f"{self.host}/category/jrgb/1/")
|
||||
return {'list': videos}
|
||||
|
||||
def categoryContent(self, tid, pg, filter, extend):
|
||||
if '@folder' in tid:
|
||||
# 文件夹类型内容
|
||||
id = tid.replace('@folder', '')
|
||||
videos = self.getfod(id)
|
||||
else:
|
||||
# 普通分类内容
|
||||
url = f"{self.host}{tid}{pg}/" if pg != "1" else f"{self.host}{tid}"
|
||||
html = self.fetch_page(url)
|
||||
if html:
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
articles = soup.select('article, .post-item, .article-item, ul.row li')
|
||||
videos = self.getlist(articles, tid)
|
||||
else:
|
||||
videos = []
|
||||
|
||||
result = {}
|
||||
result['list'] = videos
|
||||
result['page'] = pg
|
||||
result['pagecount'] = 1 if '@folder' in tid else 99999
|
||||
result['limit'] = 90
|
||||
result['total'] = 999999
|
||||
return result
|
||||
|
||||
def detailContent(self, ids):
|
||||
url = ids[0] if ids[0].startswith("http") else f"{self.host}{ids[0]}"
|
||||
html = self.fetch_page(url)
|
||||
|
||||
if not html:
|
||||
return {'list': []}
|
||||
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
vod = {'vod_play_from': '911爆料网'}
|
||||
|
||||
try:
|
||||
# 提取标签信息
|
||||
clist = []
|
||||
tags = soup.select('.tags .keywords a, .tagcloud a, a[rel="tag"]')
|
||||
for tag in tags:
|
||||
title = tag.get_text(strip=True)
|
||||
href = tag.get('href', '')
|
||||
if href and title:
|
||||
clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
|
||||
|
||||
vod['vod_content'] = '点击展开↓↓↓\n'+' '.join(clist) if clist else soup.select_one('.post-content, .entry-content').get_text(strip=True)[:200] + '...'
|
||||
except:
|
||||
title_elem = soup.select_one('h1, .post-title, .entry-title')
|
||||
vod['vod_content'] = title_elem.get_text(strip=True) if title_elem else "无简介"
|
||||
|
||||
try:
|
||||
# 提取播放列表(类似51吸瓜的dplayer方式)
|
||||
plist = []
|
||||
|
||||
# 方式1:检查dplayer
|
||||
dplayers = soup.select('.dplayer, [data-config]')
|
||||
for c, player in enumerate(dplayers, start=1):
|
||||
config_str = player.get('data-config', '{}')
|
||||
try:
|
||||
config = json.loads(config_str)
|
||||
if 'video' in config and 'url' in config['video']:
|
||||
plist.append(f"视频{c}${config['video']['url']}")
|
||||
except:
|
||||
pass
|
||||
|
||||
# 方式2:检查视频标签
|
||||
if not plist:
|
||||
video_tags = soup.select('video source, video[src]')
|
||||
for c, video in enumerate(video_tags, start=1):
|
||||
src = video.get('src') or ''
|
||||
if src:
|
||||
plist.append(f"视频{c}${src}")
|
||||
|
||||
# 方式3:检查iframe
|
||||
if not plist:
|
||||
iframes = soup.select('iframe[src]')
|
||||
for c, iframe in enumerate(iframes, start=1):
|
||||
src = iframe.get('src', '')
|
||||
if src and ('player' in src or 'video' in src):
|
||||
plist.append(f"视频{c}${src}")
|
||||
|
||||
# 方式4:从脚本中提取
|
||||
if not plist:
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.string:
|
||||
# 查找m3u8、mp4等视频链接
|
||||
video_matches = re.findall(r'(https?://[^\s"\']*\.(?:m3u8|mp4|flv|ts|mkv)[^\s"\']*)', script.string)
|
||||
for c, match in enumerate(video_matches, start=1):
|
||||
plist.append(f"视频{c}${match}")
|
||||
|
||||
vod['vod_play_url'] = '#'.join(plist) if plist else f"请检查页面,可能没有视频${url}"
|
||||
|
||||
except Exception as e:
|
||||
print(f"详情页解析错误: {e}")
|
||||
vod['vod_play_url'] = f"解析错误${url}"
|
||||
|
||||
return {'list': [vod]}
|
||||
|
||||
def searchContent(self, key, quick, pg="1"):
|
||||
url = f"{self.host}/search/{key}/{pg}/"
|
||||
html = self.fetch_page(url)
|
||||
if html:
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
articles = soup.select('article, .post-item, .article-item, ul.row li')
|
||||
videos = self.getlist(articles)
|
||||
else:
|
||||
videos = []
|
||||
|
||||
return {'list': videos, 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 999999}
|
||||
|
||||
def playerContent(self, flag, id, vipFlags):
|
||||
# 判断是否为直接播放的视频格式
|
||||
p = 0 if re.search(r'\.(m3u8|mp4|flv|ts|mkv|mov|avi|webm)', id) else 1
|
||||
return {'parse': p, 'url': f"{self.plp}{id}", 'header': self.headers}
|
||||
|
||||
def localProxy(self, param):
|
||||
try:
|
||||
url = self.d64(param['url'])
|
||||
match = re.search(r"loadBannerDirect\('([^']*)'", url)
|
||||
if match:
|
||||
url = match.group(1)
|
||||
|
||||
res = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
|
||||
|
||||
# 检查是否需要AES解密(根据文件类型判断)
|
||||
if url.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
|
||||
# 普通图片直接返回
|
||||
return [200, res.headers.get('Content-Type'), res.content]
|
||||
else:
|
||||
# 加密内容进行AES解密
|
||||
return [200, res.headers.get('Content-Type'), self.aesimg(res.content)]
|
||||
|
||||
except Exception as e:
|
||||
print(f"图片代理错误: {str(e)}")
|
||||
return [500, 'text/html', '']
|
||||
|
||||
def e64(self, text):
|
||||
try:
|
||||
text_bytes = text.encode('utf-8')
|
||||
encoded_bytes = b64encode(text_bytes)
|
||||
return encoded_bytes.decode('utf-8')
|
||||
except Exception as e:
|
||||
print(f"Base64编码错误: {str(e)}")
|
||||
return ""
|
||||
|
||||
def d64(self, encoded_text):
|
||||
try:
|
||||
encoded_bytes = encoded_text.encode('utf-8')
|
||||
decoded_bytes = b64decode(encoded_bytes)
|
||||
return decoded_bytes.decode('utf-8')
|
||||
except Exception as e:
|
||||
print(f"Base64解码错误: {str(e)}")
|
||||
return ""
|
||||
|
||||
def aesimg(self, word):
|
||||
key = b'f5d965df75336270'
|
||||
iv = b'97b60394abc2fbe1'
|
||||
cipher = AES.new(key, AES.MODE_CBC, iv)
|
||||
decrypted = unpad(cipher.decrypt(word), AES.block_size)
|
||||
return decrypted
|
||||
|
||||
def fetch_page(self, url, use_backup=False):
|
||||
original_url = url
|
||||
if use_backup:
|
||||
for backup in self.backup_urls:
|
||||
test_url = url.replace(self.domin, backup)
|
||||
try:
|
||||
time.sleep(1)
|
||||
res = requests.get(test_url, headers=self.headers, proxies=self.proxies, timeout=10)
|
||||
res.raise_for_status()
|
||||
res.encoding = "utf-8"
|
||||
text = res.text
|
||||
if len(text) > 1000:
|
||||
print(f"[DEBUG] 使用备用 {backup}: {test_url}")
|
||||
return text
|
||||
except:
|
||||
continue
|
||||
|
||||
try:
|
||||
time.sleep(1)
|
||||
res = requests.get(original_url, headers=self.headers, proxies=self.proxies, timeout=10)
|
||||
res.raise_for_status()
|
||||
res.encoding = "utf-8"
|
||||
text = res.text
|
||||
if len(text) < 1000:
|
||||
print(f"[DEBUG] 内容过短,尝试备用域名")
|
||||
return self.fetch_page(original_url, use_backup=True)
|
||||
return text
|
||||
except Exception as e:
|
||||
print(f"[ERROR] 请求失败 {original_url}: {e}")
|
||||
return None
|
||||
|
||||
def getcnh(self):
|
||||
try:
|
||||
html = self.fetch_page(f"{self.host}/about.html")
|
||||
if html:
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
link = soup.select_one('a[href]')
|
||||
if link:
|
||||
url = link.get('href')
|
||||
parsed_url = urlparse(url)
|
||||
host = parsed_url.scheme + "://" + parsed_url.netloc
|
||||
self.setCache('host_911blw', host)
|
||||
except Exception as e:
|
||||
print(f"获取主机信息错误: {str(e)}")
|
||||
|
||||
def host_late(self, url_list):
|
||||
if not url_list:
|
||||
return self.domin
|
||||
|
||||
results = {}
|
||||
threads = []
|
||||
|
||||
def test_host(url):
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = requests.head(url, headers=self.headers, proxies=self.proxies, timeout=1.0, allow_redirects=False)
|
||||
delay = (time.time() - start_time) * 1000
|
||||
results[url] = delay
|
||||
except Exception as e:
|
||||
results[url] = float('inf')
|
||||
|
||||
for url in url_list:
|
||||
t = threading.Thread(target=test_host, args=(url,))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
return min(results.items(), key=lambda x: x[1])[0]
|
||||
|
||||
def getfod(self, id):
|
||||
url = f"{self.host}{id}"
|
||||
html = self.fetch_page(url)
|
||||
if not html:
|
||||
return []
|
||||
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
videos = []
|
||||
|
||||
# 查找文件夹内容
|
||||
content = soup.select_one('.post-content, .entry-content')
|
||||
if content:
|
||||
# 移除不需要的元素
|
||||
for elem in content.select('.txt-apps, .line, blockquote, .tags, .content-tabs'):
|
||||
elem.decompose()
|
||||
|
||||
# 提取标题和链接
|
||||
headings = content.select('h2, h3, h4')
|
||||
paragraphs = content.select('p')
|
||||
|
||||
for i, heading in enumerate(headings):
|
||||
title = heading.get_text(strip=True)
|
||||
if i < len(paragraphs):
|
||||
link = paragraphs[i].select_one('a')
|
||||
if link:
|
||||
videos.append({
|
||||
'vod_id': link.get('href', ''),
|
||||
'vod_name': link.get_text(strip=True),
|
||||
'vod_pic': f"{self.getProxyUrl()}&url={self.e64(link.get('data-img', ''))}",
|
||||
'vod_remarks': title
|
||||
})
|
||||
|
||||
return videos
|
||||
|
||||
def getlist(self, articles, tid=''):
|
||||
videos = []
|
||||
is_folder = '/mrdg' in tid
|
||||
|
||||
for article in articles:
|
||||
try:
|
||||
# 标题
|
||||
title_elem = article.select_one('h2, h3, .headline, .title, a[title]')
|
||||
name = title_elem.get_text(strip=True) if title_elem else ""
|
||||
|
||||
# 链接
|
||||
link_elem = article.select_one('a')
|
||||
href = link_elem.get('href', '') if link_elem else ""
|
||||
|
||||
# 日期/备注
|
||||
date_elem = article.select_one('time, .date, .published')
|
||||
remarks = date_elem.get_text(strip=True) if date_elem else ""
|
||||
|
||||
# 图片(使用吸瓜的方式)
|
||||
pic = None
|
||||
script_elem = article.select_one('script')
|
||||
if script_elem and script_elem.string:
|
||||
base64_match = re.search(r'base64,[\'"]?([A-Za-z0-9+/=]+)[\'"]?', script_elem.string)
|
||||
if base64_match:
|
||||
encoded_url = base64_match.group(1)
|
||||
pic = f"{self.getProxyUrl()}&url={self.e64(encoded_url)}"
|
||||
|
||||
if not pic:
|
||||
img_elem = article.select_one('img[data-xkrkllgl]')
|
||||
if img_elem and img_elem.get('data-xkrkllgl'):
|
||||
encoded_url = img_elem.get('data-xkrkllgl')
|
||||
pic = f"{self.getProxyUrl()}&url={self.e64(encoded_url)}"
|
||||
|
||||
if not pic:
|
||||
img_elem = article.select_one('img')
|
||||
if img_elem:
|
||||
for attr in ["data-lazy-src", "data-original", "data-src", "src"]:
|
||||
pic = img_elem.get(attr)
|
||||
if pic:
|
||||
pic = urljoin(self.host, pic)
|
||||
break
|
||||
|
||||
if name and href:
|
||||
videos.append({
|
||||
'vod_id': f"{href}{'@folder' if is_folder else ''}",
|
||||
'vod_name': name.replace('\n', ' '),
|
||||
'vod_pic': pic,
|
||||
'vod_remarks': remarks,
|
||||
'vod_tag': 'folder' if is_folder else '',
|
||||
'style': {"type": "rect", "ratio": 1.33}
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f"列表项解析错误: {e}")
|
||||
continue
|
||||
|
||||
return videos
|
||||
|
||||
if __name__ == "__main__":
|
||||
spider = Spider()
|
||||
spider.init('{"site": "https://911blw.com"}')
|
||||
|
||||
# 测试首页
|
||||
result = spider.homeContent({})
|
||||
print(f"首页分类: {len(result['class'])} 个")
|
||||
print(f"首页内容: {len(result['list'])} 个")
|
||||
|
||||
# 测试分类
|
||||
result = spider.categoryContent("/category/jrgb/", "1", False, {})
|
||||
print(f"分类内容: {len(result['list'])} 个")
|
||||
|
||||
# 测试搜索
|
||||
result = spider.searchContent("测试", False, "1")
|
||||
print(f"搜索结果: {len(result['list'])} 个")
|
||||
Reference in New Issue
Block a user