tvbox/py/adult/911.py

# -*- coding: utf-8 -*-
import json
import random
import re
import sys
import threading
import time
import requests
from base64 import b64decode, b64encode
from urllib.parse import urlparse, urljoin
from Crypto.Cipher import AES
from Crypto.Util.Padding import unpad
from bs4 import BeautifulSoup
sys.path.append('..')
from base.spider import Spider

class Spider(Spider):

    def init(self, extend="{}"):
        config = json.loads(extend)
        self.domin = config.get('site', "https://911blw.com")
        self.proxies = config.get('proxy', {}) or {}
        self.plp = config.get('plp', '')
        self.backup_urls = ["https://hlj.fun", "https://911bl16.com"]
        
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
            'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="134", "Google Chrome";v="134"',
            'Accept-Language': 'zh-CN,zh;q=0.9'
        }
        
        # 获取最佳主机
        self.host = self.host_late([self.domin] + self.backup_urls)
        self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
        
        # 缓存主机信息
        self.getcnh()

    def getName(self):
        return "911爆料网"

    def isVideoFormat(self, url):
        pass

    def manualVideoCheck(self):
        pass

    def destroy(self):
        pass

    def homeContent(self, filter):
        result = {}
        classes = []
        
        # 分类列表（根据911爆料网的实际分类）
        categories = [
            {"type_id": "/category/jrgb/", "type_name": "最新爆料"},
            {"type_id": "/category/rmgb/", "type_name": "精选大瓜"},
            {"type_id": "/category/blqw/", "type_name": "猎奇吃瓜"},
            {"type_id": "/category/rlph/", "type_name": "TOP5大瓜"},
            {"type_id": "/category/ssdbl/", "type_name": "社会热点"},
            {"type_id": "/category/hjsq/", "type_name": "海角社区"},
            {"type_id": "/category/mrds/", "type_name": "每日大赛"},
            {"type_id": "/category/xyss/", "type_name": "校园吃瓜"},
            {"type_id": "/category/mxhl/", "type_name": "明星吃瓜"},
            {"type_id": "/category/whbl/", "type_name": "网红爆料"},
            {"type_id": "/category/bgzq/", "type_name": "反差爆料"},
            {"type_id": "/category/fljq/", "type_name": "网黄福利"},
            {"type_id": "/category/crfys/", "type_name": "午夜剧场"},
            {"type_id": "/category/thjx/", "type_name": "探花经典"},
            {"type_id": "/category/dmhv/", "type_name": "禁漫天堂"},
            {"type_id": "/category/slec/", "type_name": "吃瓜精选"},
            {"type_id": "/category/zksr/", "type_name": "重口调教"},
            {"type_id": "/category/crlz/", "type_name": "精选连载"}
        ]
        
        result['class'] = categories
        
        # 首页推荐内容
        html = self.fetch_page(f"{self.host}/")
        if html:
            soup = BeautifulSoup(html, 'html.parser')
            articles = soup.select('article, .post-item, .article-item')
            result['list'] = self.getlist(articles)
        else:
            result['list'] = []
            
        return result

    def homeVideoContent(self):
        # 首页推荐视频
        html = self.fetch_page(f"{self.host}/category/jrgb/1/")
        videos = self.extract_content(html, f"{self.host}/category/jrgb/1/")
        return {'list': videos}

    def categoryContent(self, tid, pg, filter, extend):
        if '@folder' in tid:
            # 文件夹类型内容
            id = tid.replace('@folder', '')
            videos = self.getfod(id)
        else:
            # 普通分类内容
            url = f"{self.host}{tid}{pg}/" if pg != "1" else f"{self.host}{tid}"
            html = self.fetch_page(url)
            if html:
                soup = BeautifulSoup(html, 'html.parser')
                articles = soup.select('article, .post-item, .article-item, ul.row li')
                videos = self.getlist(articles, tid)
            else:
                videos = []
                
        result = {}
        result['list'] = videos
        result['page'] = pg
        result['pagecount'] = 1 if '@folder' in tid else 99999
        result['limit'] = 90
        result['total'] = 999999
        return result

    def detailContent(self, ids):
        url = ids[0] if ids[0].startswith("http") else f"{self.host}{ids[0]}"
        html = self.fetch_page(url)
        
        if not html:
            return {'list': []}
            
        soup = BeautifulSoup(html, 'html.parser')
        vod = {'vod_play_from': '911爆料网'}
        
        try:
            # 提取标签信息
            clist = []
            tags = soup.select('.tags .keywords a, .tagcloud a, a[rel="tag"]')
            for tag in tags:
                title = tag.get_text(strip=True)
                href = tag.get('href', '')
                if href and title:
                    clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
                    
            vod['vod_content'] = '点击展开↓↓↓\n'+' '.join(clist) if clist else soup.select_one('.post-content, .entry-content').get_text(strip=True)[:200] + '...'
        except:
            title_elem = soup.select_one('h1, .post-title, .entry-title')
            vod['vod_content'] = title_elem.get_text(strip=True) if title_elem else "无简介"
        
        try:
            # 提取播放列表（类似51吸瓜的dplayer方式）
            plist = []
            
            # 方式1：检查dplayer
            dplayers = soup.select('.dplayer, [data-config]')
            for c, player in enumerate(dplayers, start=1):
                config_str = player.get('data-config', '{}')
                try:
                    config = json.loads(config_str)
                    if 'video' in config and 'url' in config['video']:
                        plist.append(f"视频{c}${config['video']['url']}")
                except:
                    pass
            
            # 方式2：检查视频标签
            if not plist:
                video_tags = soup.select('video source, video[src]')
                for c, video in enumerate(video_tags, start=1):
                    src = video.get('src') or ''
                    if src:
                        plist.append(f"视频{c}${src}")
            
            # 方式3：检查iframe
            if not plist:
                iframes = soup.select('iframe[src]')
                for c, iframe in enumerate(iframes, start=1):
                    src = iframe.get('src', '')
                    if src and ('player' in src or 'video' in src):
                        plist.append(f"视频{c}${src}")
            
            # 方式4：从脚本中提取
            if not plist:
                scripts = soup.find_all('script')
                for script in scripts:
                    if script.string:
                        # 查找m3u8、mp4等视频链接
                        video_matches = re.findall(r'(https?://[^\s"\']*\.(?:m3u8|mp4|flv|ts|mkv)[^\s"\']*)', script.string)
                        for c, match in enumerate(video_matches, start=1):
                            plist.append(f"视频{c}${match}")
            
            vod['vod_play_url'] = '#'.join(plist) if plist else f"请检查页面，可能没有视频${url}"
            
        except Exception as e:
            print(f"详情页解析错误: {e}")
            vod['vod_play_url'] = f"解析错误${url}"
        
        return {'list': [vod]}

    def searchContent(self, key, quick, pg="1"):
        url = f"{self.host}/search/{key}/{pg}/"
        html = self.fetch_page(url)
        if html:
            soup = BeautifulSoup(html, 'html.parser')
            articles = soup.select('article, .post-item, .article-item, ul.row li')
            videos = self.getlist(articles)
        else:
            videos = []
            
        return {'list': videos, 'page': pg, 'pagecount': 9999, 'limit': 90, 'total': 999999}

    def playerContent(self, flag, id, vipFlags):
        # 判断是否为直接播放的视频格式
        p = 0 if re.search(r'\.(m3u8|mp4|flv|ts|mkv|mov|avi|webm)', id) else 1
        return {'parse': p, 'url': f"{self.plp}{id}", 'header': self.headers}

    def localProxy(self, param):
        try:
            url = self.d64(param['url'])
            match = re.search(r"loadBannerDirect\('([^']*)'", url)
            if match:
                url = match.group(1)
                
            res = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
            
            # 检查是否需要AES解密（根据文件类型判断）
            if url.endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
                # 普通图片直接返回
                return [200, res.headers.get('Content-Type'), res.content]
            else:
                # 加密内容进行AES解密
                return [200, res.headers.get('Content-Type'), self.aesimg(res.content)]
                
        except Exception as e:
            print(f"图片代理错误: {str(e)}")
            return [500, 'text/html', '']

    def e64(self, text):
        try:
            text_bytes = text.encode('utf-8')
            encoded_bytes = b64encode(text_bytes)
            return encoded_bytes.decode('utf-8')
        except Exception as e:
            print(f"Base64编码错误: {str(e)}")
            return ""

    def d64(self, encoded_text):
        try:
            encoded_bytes = encoded_text.encode('utf-8')
            decoded_bytes = b64decode(encoded_bytes)
            return decoded_bytes.decode('utf-8')
        except Exception as e:
            print(f"Base64解码错误: {str(e)}")
            return ""

    def aesimg(self, word):
        key = b'f5d965df75336270'
        iv = b'97b60394abc2fbe1'
        cipher = AES.new(key, AES.MODE_CBC, iv)
        decrypted = unpad(cipher.decrypt(word), AES.block_size)
        return decrypted

    def fetch_page(self, url, use_backup=False):
        original_url = url
        if use_backup:
            for backup in self.backup_urls:
                test_url = url.replace(self.domin, backup)
                try:
                    time.sleep(1)
                    res = requests.get(test_url, headers=self.headers, proxies=self.proxies, timeout=10)
                    res.raise_for_status()
                    res.encoding = "utf-8"
                    text = res.text
                    if len(text) > 1000:
                        print(f"[DEBUG] 使用备用 {backup}: {test_url}")
                        return text
                except:
                    continue
        
        try:
            time.sleep(1)
            res = requests.get(original_url, headers=self.headers, proxies=self.proxies, timeout=10)
            res.raise_for_status()
            res.encoding = "utf-8"
            text = res.text
            if len(text) < 1000:
                print(f"[DEBUG] 内容过短，尝试备用域名")
                return self.fetch_page(original_url, use_backup=True)
            return text
        except Exception as e:
            print(f"[ERROR] 请求失败 {original_url}: {e}")
            return None

    def getcnh(self):
        try:
            html = self.fetch_page(f"{self.host}/about.html")
            if html:
                soup = BeautifulSoup(html, 'html.parser')
                link = soup.select_one('a[href]')
                if link:
                    url = link.get('href')
                    parsed_url = urlparse(url)
                    host = parsed_url.scheme + "://" + parsed_url.netloc
                    self.setCache('host_911blw', host)
        except Exception as e:
            print(f"获取主机信息错误: {str(e)}")

    def host_late(self, url_list):
        if not url_list:
            return self.domin
            
        results = {}
        threads = []

        def test_host(url):
            try:
                start_time = time.time()
                response = requests.head(url, headers=self.headers, proxies=self.proxies, timeout=1.0, allow_redirects=False)
                delay = (time.time() - start_time) * 1000
                results[url] = delay
            except Exception as e:
                results[url] = float('inf')

        for url in url_list:
            t = threading.Thread(target=test_host, args=(url,))
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        return min(results.items(), key=lambda x: x[1])[0]

    def getfod(self, id):
        url = f"{self.host}{id}"
        html = self.fetch_page(url)
        if not html:
            return []
            
        soup = BeautifulSoup(html, 'html.parser')
        videos = []
        
        # 查找文件夹内容
        content = soup.select_one('.post-content, .entry-content')
        if content:
            # 移除不需要的元素
            for elem in content.select('.txt-apps, .line, blockquote, .tags, .content-tabs'):
                elem.decompose()
                
            # 提取标题和链接
            headings = content.select('h2, h3, h4')
            paragraphs = content.select('p')
            
            for i, heading in enumerate(headings):
                title = heading.get_text(strip=True)
                if i < len(paragraphs):
                    link = paragraphs[i].select_one('a')
                    if link:
                        videos.append({
                            'vod_id': link.get('href', ''),
                            'vod_name': link.get_text(strip=True),
                            'vod_pic': f"{self.getProxyUrl()}&url={self.e64(link.get('data-img', ''))}",
                            'vod_remarks': title
                        })
        
        return videos

    def getlist(self, articles, tid=''):
        videos = []
        is_folder = '/mrdg' in tid
        
        for article in articles:
            try:
                # 标题
                title_elem = article.select_one('h2, h3, .headline, .title, a[title]')
                name = title_elem.get_text(strip=True) if title_elem else ""
                
                # 链接
                link_elem = article.select_one('a')
                href = link_elem.get('href', '') if link_elem else ""
                
                # 日期/备注
                date_elem = article.select_one('time, .date, .published')
                remarks = date_elem.get_text(strip=True) if date_elem else ""
                
                # 图片（使用吸瓜的方式）
                pic = None
                script_elem = article.select_one('script')
                if script_elem and script_elem.string:
                    base64_match = re.search(r'base64,[\'"]?([A-Za-z0-9+/=]+)[\'"]?', script_elem.string)
                    if base64_match:
                        encoded_url = base64_match.group(1)
                        pic = f"{self.getProxyUrl()}&url={self.e64(encoded_url)}"
                
                if not pic:
                    img_elem = article.select_one('img[data-xkrkllgl]')
                    if img_elem and img_elem.get('data-xkrkllgl'):
                        encoded_url = img_elem.get('data-xkrkllgl')
                        pic = f"{self.getProxyUrl()}&url={self.e64(encoded_url)}"
                
                if not pic:
                    img_elem = article.select_one('img')
                    if img_elem:
                        for attr in ["data-lazy-src", "data-original", "data-src", "src"]:
                            pic = img_elem.get(attr)
                            if pic:
                                pic = urljoin(self.host, pic)
                                break
                
                if name and href:
                    videos.append({
                        'vod_id': f"{href}{'@folder' if is_folder else ''}",
                        'vod_name': name.replace('\n', ' '),
                        'vod_pic': pic,
                        'vod_remarks': remarks,
                        'vod_tag': 'folder' if is_folder else '',
                        'style': {"type": "rect", "ratio": 1.33}
                    })
                    
            except Exception as e:
                print(f"列表项解析错误: {e}")
                continue
                
        return videos

if __name__ == "__main__":
    spider = Spider()
    spider.init('{"site": "https://911blw.com"}')
    
    # 测试首页
    result = spider.homeContent({})
    print(f"首页分类: {len(result['class'])} 个")
    print(f"首页内容: {len(result['list'])} 个")
    
    # 测试分类
    result = spider.categoryContent("/category/jrgb/", "1", False, {})
    print(f"分类内容: {len(result['list'])} 个")
    
    # 测试搜索
    result = spider.searchContent("测试", False, "1")
    print(f"搜索结果: {len(result['list'])} 个")