更新河马短剧、更新spider.jar

2025-07-07 00:24:08 +08:00 · 2025-07-07 00:24:08 +08:00 · a4f88037cc
commit a4f88037cc
parent 1f9c966adf
3 changed files with 264 additions and 463 deletions
--- a/api.json
+++ b/api.json
@ -929,6 +929,17 @@
            "quickSearch": 0,
            "filterable": 0
        },
        {
            "key": "河马短剧",
            "name": "河马｜短剧",
            "type": 3,
            "api": "./py/河马短剧.py",
            "searchable": 1,
            "changeable": 1,
            "quickSearch": 1,
            "filterable": 1,
            "playerType": 2
        },
        {
            "key": "偷乐短剧",
            "name": "偷乐｜短剧",
@ -957,15 +968,6 @@
            "quickSearch": 1,
            "filterable": 1
        },
        {
            "key": "海马影视APP",
            "name": "海马｜APP",
            "type": 3,
            "api": "./PyramidStore/plugin/app/海马影视APP.py",
            "searchable": 1,
            "quickSearch": 1,
            "filterable": 1
        },
        {
            "key": "国外剧APP",
            "name": "国外剧｜APP",
--- a/py/河马短剧.py
+++ b/py/河马短剧.py
@ -4,6 +4,7 @@ import re
 import json
 import traceback
 import sys
 from urllib.parse import quote
 sys.path.append('../../')
 try:
@ -17,7 +18,6 @@ except ImportError:
 class Spider(Spider):
    def __init__(self):
        self.siteUrl = "https://www.kuaikaw.cn"
        self.nextData = None  # 缓存NEXT_DATA数据
        self.cateManual = {
            "甜宠": "462",
            "古装仙侠": "1102",
@ -30,552 +30,351 @@ class Spider(Spider):
            "总裁": "1147",
            "职场商战": "943"
        }
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
            "Referer": self.siteUrl,
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
        }
    def getName(self):
        # 返回爬虫名称
        return "河马短剧"
    def init(self, extend=""):
        return
-    def fetch(self, url, headers=None):
+    def fetch(self, url, headers=None, retry=2):
        """统一的网络请求接口"""
        if headers is None:
-            headers = {
+            headers = self.headers
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
                "Referer": self.siteUrl,
                "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
                "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
            }
        for i in range(retry + 1):
            try:
                response = requests.get(url, headers=headers, timeout=10, allow_redirects=True)
                response.raise_for_status()
                return response
            except Exception as e:
                if i == retry:
                    print(f"请求异常: {url}, 错误: {str(e)}")
                    return None
                continue
    def isVideoFormat(self, url):
        # 检查是否为视频格式
        video_formats = ['.mp4', '.mkv', '.avi', '.wmv', '.m3u8', '.flv', '.rmvb']
-        for format in video_formats:
+        return any(format in url.lower() for format in video_formats)
            if format in url.lower():
                return True
        return False
    def manualVideoCheck(self):
        # 不需要手动检查
        return False
    def homeContent(self, filter):
        """获取首页分类及筛选"""
        result = {}
-        # 分类列表，使用已初始化的cateManual
+        classes = [{'type_name': k, 'type_id': v} for k, v in self.cateManual.items()]
        classes = []
        for k in self.cateManual:
            classes.append({
                'type_name': k,
                'type_id': self.cateManual[k]
            })
        result['class'] = classes
-        # 获取首页推荐视频
+        
        try:
            result['list'] = self.homeVideoContent()['list']
        except:
            result['list'] = []
        return result
    def homeVideoContent(self):
        """获取首页推荐视频内容"""
        videos = []
        try:
            response = self.fetch(self.siteUrl)
            if not response:
                return {'list': []}
            html_content = response.text
            # 提取NEXT_DATA JSON数据
            next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
            next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
-            if next_data_match:
+            if not next_data_match:
                return {'list': []}
            next_data_json = json.loads(next_data_match.group(1))
            page_props = next_data_json.get("props", {}).get("pageProps", {})
-                # 获取轮播图数据 - 这些通常是推荐内容
+            
-                if "bannerList" in page_props and isinstance(page_props["bannerList"], list):
+            # 处理轮播图数据
-                    banner_list = page_props["bannerList"]
+            if "bannerList" in page_props:
-                    for banner in banner_list:
+                for banner in page_props["bannerList"]:
-                        book_id = banner.get("bookId", "")
+                    if banner.get("bookId"):
                        book_name = banner.get("bookName", "")
                        cover_url = banner.get("coverWap", banner.get("wapUrl", ""))
                        # 获取状态和章节数
                        status = banner.get("statusDesc", "")
                        total_chapters = banner.get("totalChapterNum", "")
                        if book_id and book_name:
                        videos.append({
-                                "vod_id": f"/drama/{book_id}",
+                            "vod_id": f"/drama/{banner['bookId']}",
-                                "vod_name": book_name,
+                            "vod_name": banner.get("bookName", ""),
-                                "vod_pic": cover_url,
+                            "vod_pic": banner.get("coverWap", ""),
-                                "vod_remarks": f"{status} {total_chapters}集" if total_chapters else status
+                            "vod_remarks": f"{banner.get('statusDesc', '')} {banner.get('totalChapterNum', '')}集".strip()
                        })
-                # SEO分类下的推荐
+            # 处理SEO分类推荐
-                if "seoColumnVos" in page_props and isinstance(page_props["seoColumnVos"], list):
+            if "seoColumnVos" in page_props:
                for column in page_props["seoColumnVos"]:
-                        book_infos = column.get("bookInfos", [])
+                    for book in column.get("bookInfos", []):
-                        for book in book_infos:
+                        if book.get("bookId"):
                            book_id = book.get("bookId", "")
                            book_name = book.get("bookName", "")
                            cover_url = book.get("coverWap", "")
                            status = book.get("statusDesc", "")
                            total_chapters = book.get("totalChapterNum", "")
                            if book_id and book_name:
                            videos.append({
-                                    "vod_id": f"/drama/{book_id}",
+                                "vod_id": f"/drama/{book['bookId']}",
-                                    "vod_name": book_name,
+                                "vod_name": book.get("bookName", ""),
-                                    "vod_pic": cover_url,
+                                "vod_pic": book.get("coverWap", ""),
-                                    "vod_remarks": f"{status} {total_chapters}集" if total_chapters else status
+                                "vod_remarks": f"{book.get('statusDesc', '')} {book.get('totalChapterNum', '')}集".strip()
                            })
-            # # 去重
+            # 去重处理
-            # seen = set()
+            seen = set()
-            # unique_videos = []
+            unique_videos = []
-            # for video in videos:
+            for video in videos:
-            #     if video["vod_id"] not in seen:
+                key = (video["vod_id"], video["vod_name"])
-            #         seen.add(video["vod_id"])
+                if key not in seen:
-            #         unique_videos.append(video)
+                    seen.add(key)
-            # videos = unique_videos
+                    unique_videos.append(video)
        except Exception as e:
            print(f"获取首页推荐内容出错: {e}")
            unique_videos = []
-        result = {
+        return {'list': unique_videos}
            "list": videos
        }
        return result
    def categoryContent(self, tid, pg, filter, extend):
-        """获取分类内容"""
+        result = {'list': [], 'page': pg, 'pagecount': 1, 'limit': 20, 'total': 0}
        result = {}
        videos = []
        url = f"{self.siteUrl}/browse/{tid}/{pg}"
        response = self.fetch(url)
-        html_content = response.text
+        if not response:
        # 提取NEXT_DATA JSON数据
        next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
        next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
        if next_data_match:
            next_data_json = json.loads(next_data_match.group(1))
            page_props = next_data_json.get("props", {}).get("pageProps", {})
            # 获取总页数和当前页
            current_page = page_props.get("page", 1)
            total_pages = page_props.get("pages", 1)
            # 获取书籍列表
            book_list = page_props.get("bookList", [])
            # 转换为通用格式
            for book in book_list:
                book_id = book.get("bookId", "")
                book_name = book.get("bookName", "")
                cover_url = book.get("coverWap", "")
                status_desc = book.get("statusDesc", "")
                total_chapters = book.get("totalChapterNum", "")
                if book_id and book_name:
                    videos.append({
                        "vod_id": f"/drama/{book_id}",
                        "vod_name": book_name,
                        "vod_pic": cover_url,
                        "vod_remarks": f"{status_desc} {total_chapters}集" if total_chapters else status_desc
                    })
            # 构建返回结果
            result = {
                "list": videos,
                "page": int(current_page),
                "pagecount": total_pages,
                "limit": len(videos),
                "total": total_pages * len(videos) if videos else 0
            }
            return result
    def switch(self, key, pg):
        # 搜索功能
        search_results = []
        # 获取第一页结果，并检查总页数
        url = f"{self.siteUrl}/search?searchValue={key}&page={pg}"
        response = self.fetch(url)
        html_content = response.text
-        # 提取NEXT_DATA JSON数据
+        next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html_content, re.DOTALL)
-        next_data_pattern = r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>'
+        if not next_data_match:
-        next_data_match = re.search(next_data_pattern, html_content, re.DOTALL)
+            return result
-        if next_data_match:
+            
        try:
            next_data_json = json.loads(next_data_match.group(1))
            page_props = next_data_json.get("props", {}).get("pageProps", {})
-            # 获取总页数
+            
            current_page = page_props.get("page", 1)
            total_pages = page_props.get("pages", 1)
            # 处理所有页的数据
            all_book_list = []
            # 添加第一页的书籍列表
            book_list = page_props.get("bookList", [])
-            all_book_list.extend(book_list)
+            
-            # 如果有多页，获取其他页的数据
+            videos = []
-            if total_pages > 1 :  # quick模式只获取第一页
+            for book in book_list:
-                for page in range(2, total_pages + 1):
+                if book.get("bookId"):
-                    next_page_url = f"{self.siteUrl}/search?searchValue={key}&page={page}"
+                    videos.append({
-                    next_page_response = self.fetch(next_page_url)
+                        "vod_id": f"/drama/{book['bookId']}",
-                    next_page_html = next_page_response.text
+                        "vod_name": book.get("bookName", ""),
-                    next_page_match = re.search(next_data_pattern, next_page_html, re.DOTALL)
+                        "vod_pic": book.get("coverWap", ""),
-                    if next_page_match:
+                        "vod_remarks": f"{book.get('statusDesc', '')} {book.get('totalChapterNum', '')}集".strip()
-                        next_page_json = json.loads(next_page_match.group(1))
+                    })
-                        next_page_props = next_page_json.get("props", {}).get("pageProps", {})
+            
-                        next_page_books = next_page_props.get("bookList", [])
+            result.update({
-                        all_book_list.extend(next_page_books)
+                'list': videos,
-            # 转换为统一的搜索结果格式
+                'page': int(current_page),
-            for book in all_book_list:
+                'pagecount': total_pages,
-                book_id = book.get("bookId", "")
+                'limit': len(videos),
-                book_name = book.get("bookName", "")
+                'total': len(videos) * total_pages if videos else 0
-                cover_url = book.get("coverWap", "")
+            })
-                total_chapters = book.get("totalChapterNum", "0")
+            
-                status_desc = book.get("statusDesc", "")
+        except Exception as e:
-                # 构建视频项
+            print(f"分类内容获取出错: {e}")
-                vod = {
+            
                    "vod_id": f"/drama/{book_id}",
                    "vod_name": book_name,
                    "vod_pic": cover_url,
                    "vod_remarks": f"{status_desc} {total_chapters}集"
                }
                search_results.append(vod)
        result = {
            "list": search_results,
            "page": pg
        }
        return result
    def searchContent(self, key, quick, pg=1):
-        result = self.switch(key, pg=pg)
+        return self.searchContentPage(key, quick, pg)
        result['page'] = pg
        return result
    def searchContentPage(self, key, quick, pg=1):
-        return self.searchContent(key, quick, pg)
+        result = {'list': [], 'page': pg, 'pagecount': 1, 'limit': 20, 'total': 0}
        search_url = f"{self.siteUrl}/search?searchValue={quote(key)}&page={pg}"
        response = self.fetch(search_url)
        if not response:
            return result
        html_content = response.text
        next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html_content, re.DOTALL)
        if not next_data_match:
            return result
        try:
            next_data_json = json.loads(next_data_match.group(1))
            page_props = next_data_json.get("props", {}).get("pageProps", {})
            total_pages = page_props.get("pages", 1)
            book_list = page_props.get("bookList", [])
            videos = []
            for book in book_list:
                if book.get("bookId"):
                    videos.append({
                        "vod_id": f"/drama/{book['bookId']}",
                        "vod_name": book.get("bookName", ""),
                        "vod_pic": book.get("coverWap", ""),
                        "vod_remarks": f"{book.get('statusDesc', '')} {book.get('totalChapterNum', '')}集".strip()
                    })
            result.update({
                'list': videos,
                'pagecount': total_pages,
                'total': len(videos) * total_pages if videos else 0
            })
        except Exception as e:
            print(f"搜索内容出错: {e}")
        return result
    def detailContent(self, ids):
-        # 获取剧集信息
+        result = {'list': []}
        if not ids:
            return result
        vod_id = ids[0]
        episode_id = None
        chapter_id = None
        if not vod_id.startswith('/drama/'):
-            if vod_id.startswith('/episode/'):
+            vod_id = f'/drama/{vod_id}'
                episode_info = vod_id.replace('/episode/', '').split('/')
                if len(episode_info) >= 2:
                    episode_id = episode_info[0]
                    chapter_id = episode_info[1]
                    vod_id = f'/drama/{episode_id}'
            else:
                vod_id = '/drama/' + vod_id
-        drama_url = self.siteUrl + vod_id
+        drama_url = f"{self.siteUrl}{vod_id}"
-        print(f"请求URL: {drama_url}")
+        response = self.fetch(drama_url)
        if not response:
            return result
-        headers = {
+        html = response.text
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
            "Referer": self.siteUrl,
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
        }
        rsp = self.fetch(drama_url, headers=headers)
        if not rsp or rsp.status_code != 200:
            print(f"请求失败，状态码: {getattr(rsp, 'status_code', 'N/A')}")
            return {}
        html = rsp.text
        next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html, re.DOTALL)
        if not next_data_match:
-            print("未找到NEXT_DATA内容")
+            return result
            return {}
        try:
            next_data = json.loads(next_data_match.group(1))
            page_props = next_data.get("props", {}).get("pageProps", {})
            print(f"找到页面属性，包含 {len(page_props.keys())} 个键")
            book_info = page_props.get("bookInfoVo", {})
            chapter_list = page_props.get("chapterList", [])
-            title = book_info.get("title", "")
+            if not book_info.get("bookId"):
-            sub_title = f"{book_info.get('totalChapterNum', '')}集"
+                return result
-            categories = []
+            # 基本信息
-            for category in book_info.get("categoryList", []):
+            categories = [c.get("name", "") for c in book_info.get("categoryList", [])]
-                categories.append(category.get("name", ""))
+            performers = [p.get("name", "") for p in book_info.get("performerList", [])]
            vod_content = book_info.get("introduction", "")
            vod = {
                "vod_id": vod_id,
-                "vod_name": title,
+                "vod_name": book_info.get("title", ""),
                "vod_pic": book_info.get("coverWap", ""),
                "type_name": ",".join(categories),
                "vod_year": "",
                "vod_area": book_info.get("countryName", ""),
-                "vod_remarks": sub_title,
+                "vod_remarks": f"{book_info.get('statusDesc', '')} {book_info.get('totalChapterNum', '')}集".strip(),
-                "vod_actor": ", ".join([p.get("name", "") for p in book_info.get("performerList", [])]),
+                "vod_actor": ", ".join(performers),
                "vod_director": "",
-                "vod_content": vod_content
+                "vod_content": book_info.get("introduction", "")
            }
-            # 处理播放列表
+            # 处理剧集
-            play_url_list = []
+            play_urls = self.processEpisodes(vod_id, chapter_list)
            if play_urls:
                vod['vod_play_from'] = '河马剧场'
                vod['vod_play_url'] = '$$$'.join(play_urls)
            result['list'] = [vod]
        except Exception as e:
            print(f"详情页解析出错: {e}")
            traceback.print_exc()
        return result
    def processEpisodes(self, vod_id, chapter_list):
        play_urls = []
        episodes = []
            if chapter_list:
                print(f"找到 {len(chapter_list)} 个章节")
                # 先检查是否有可以直接使用的MP4链接作为模板
                mp4_template = None
                first_mp4_chapter_id = None
                # 先搜索第一个章节的MP4链接
                # 为提高成功率，尝试直接请求第一个章节的播放页
                if chapter_list and len(chapter_list) > 0:
                    first_chapter = chapter_list[0]
                    first_chapter_id = first_chapter.get("chapterId", "")
                    drama_id_clean = vod_id.replace('/drama/', '')
                    if first_chapter_id and drama_id_clean:
                        first_episode_url = f"{self.siteUrl}/episode/{drama_id_clean}/{first_chapter_id}"
                        print(f"请求第一集播放页: {first_episode_url}")
                        first_rsp = self.fetch(first_episode_url, headers=headers)
                        if first_rsp and first_rsp.status_code == 200:
                            first_html = first_rsp.text
                            # 直接从HTML提取MP4链接
                            mp4_pattern = r'(https?://[^"\']+\.mp4)'
                            mp4_matches = re.findall(mp4_pattern, first_html)
                            if mp4_matches:
                                mp4_template = mp4_matches[0]
                                first_mp4_chapter_id = first_chapter_id
                                print(f"找到MP4链接模板: {mp4_template}")
                                print(f"模板对应的章节ID: {first_mp4_chapter_id}")
                # 如果未找到模板，再检查章节对象中是否有MP4链接
                if not mp4_template:
                    for chapter in chapter_list[:5]:  # 只检查前5个章节以提高效率
                        if "chapterVideoVo" in chapter and chapter["chapterVideoVo"]:
                            chapter_video = chapter["chapterVideoVo"]
                            mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
                            if mp4_url and ".mp4" in mp4_url:
                                mp4_template = mp4_url
                                first_mp4_chapter_id = chapter.get("chapterId", "")
                                print(f"从chapterVideoVo找到MP4链接模板: {mp4_template}")
                                print(f"模板对应的章节ID: {first_mp4_chapter_id}")
                                break
                # 遍历所有章节处理播放信息
        for chapter in chapter_list:
            chapter_id = chapter.get("chapterId", "")
            chapter_name = chapter.get("chapterName", "")
-                    # 1. 如果章节自身有MP4链接，直接使用
+            if not chapter_id or not chapter_name:
                    if "chapterVideoVo" in chapter and chapter["chapterVideoVo"]:
                        chapter_video = chapter["chapterVideoVo"]
                        mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
                        if mp4_url and ".mp4" in mp4_url:
                            episodes.append(f"{chapter_name}${mp4_url}")
                continue
-                    # 2. 如果有MP4模板，尝试替换章节ID构建MP4链接
+            # 尝试获取直接视频链接
-                    if mp4_template and first_mp4_chapter_id and chapter_id:
+            video_url = self.getDirectVideoUrl(chapter)
-                        # 替换模板中的章节ID部分
+            if video_url:
-                        if first_mp4_chapter_id in mp4_template:
+                episodes.append(f"{chapter_name}${video_url}")
                            new_mp4_url = mp4_template.replace(first_mp4_chapter_id, chapter_id)
                            episodes.append(f"{chapter_name}${new_mp4_url}")
                continue
-                    # 3. 如果上述方法都不可行，回退到使用chapter_id构建中间URL
+            # 回退方案
-                    if chapter_id and chapter_name:
+            episodes.append(f"{chapter_name}${vod_id}${chapter_id}${chapter_name}")
                        url = f"{vod_id}${chapter_id}${chapter_name}"
                        episodes.append(f"{chapter_name}${url}")
            if not episodes and vod_id:
                # 尝试构造默认的集数
                total_chapters = int(book_info.get("totalChapterNum", "0"))
                if total_chapters > 0:
                    print(f"尝试构造 {total_chapters} 个默认集数")
                    # 如果知道章节ID的模式，可以构造
                    if chapter_id and episode_id:
                        for i in range(1, total_chapters + 1):
                            chapter_name = f"第{i}集"
                            url = f"{vod_id}${chapter_id}${chapter_name}"
                            episodes.append(f"{chapter_name}${url}")
                    else:
                        # 使用普通的构造方式
                        for i in range(1, total_chapters + 1):
                            chapter_name = f"第{i}集"
                            url = f"{vod_id}${chapter_name}"
                            episodes.append(f"{chapter_name}${url}")
        if episodes:
-                play_url_list.append("#".join(episodes))
+            play_urls.append("#".join(episodes))
                vod['vod_play_from'] = '河马剧场'
                vod['vod_play_url'] = '$$$'.join(play_url_list)
-            result = {
+        return play_urls
-                'list': [vod]
+    
-            }
+    def getDirectVideoUrl(self, chapter):
-            return result
+        if "chapterVideoVo" not in chapter or not chapter["chapterVideoVo"]:
-        except Exception as e:
+            return None
-            print(f"解析详情页失败: {str(e)}")
+            
-            print(traceback.format_exc())
+        video_info = chapter["chapterVideoVo"]
-            return {}
+        for key in ["mp4", "mp4720p", "vodMp4Url"]:
            if key in video_info and video_info[key] and ".mp4" in video_info[key].lower():
                return video_info[key]
        return None
    def playerContent(self, flag, id, vipFlags):
-        result = {}
+        result = {
-        print(f"调用playerContent: flag={flag}, id={id}")
+            "parse": 0,
-        
+            "url": id,
-        headers = {
+            "header": json.dumps(self.headers)
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
            "Referer": self.siteUrl,
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
            "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
        }
-        # 解析id参数
+        # 如果已经是视频链接直接返回
        if 'http' in id and ('.mp4' in id or '.m3u8' in id):
            return result
        # 解析参数
        parts = id.split('$')
-        drama_id = None
+        if len(parts) < 2:
-        chapter_id = None
+            return result
-        if len(parts) >= 2:
+        drama_id = parts[0].replace('/drama/', '')
            drama_id = parts[0]
        chapter_id = parts[1]
-            chapter_name = parts[2] if len(parts) > 2 else "第一集"
+        
-            print(f"解析参数: drama_id={drama_id}, chapter_id={chapter_id}")
+        # 尝试获取视频链接
-        else:
+        video_url = self.getEpisodeVideoUrl(drama_id, chapter_id)
-            # 处理旧数据格式
+        if video_url:
-            print(f"使用原始URL格式: {id}")
+            result["url"] = video_url
-            result["parse"] = 0
+            
            result["url"] = id
            result["header"] = json.dumps(headers)
        return result
-        # 直接检查chapter_id是否包含http（可能已经是视频链接）
+    def getEpisodeVideoUrl(self, drama_id, chapter_id):
-        if 'http' in chapter_id and '.mp4' in chapter_id:
+        episode_url = f"{self.siteUrl}/episode/{drama_id}/{chapter_id}"
-            print(f"已经是MP4链接: {chapter_id}")
+        response = self.fetch(episode_url)
-            result["parse"] = 0
+        if not response:
-            result["url"] = chapter_id
+            return None
            result["header"] = json.dumps(headers)
            return result
-        # 构建episode页面URL
+        html = response.text
        drama_id_clean = drama_id.replace('/drama/', '')
        episode_url = f"{self.siteUrl}/episode/{drama_id_clean}/{chapter_id}"
        print(f"请求episode页面: {episode_url}")
        try:
            rsp = self.fetch(episode_url, headers=headers)
            if not rsp or rsp.status_code != 200:
                print(f"请求失败，状态码: {getattr(rsp, 'status_code', 'N/A')}")
                result["parse"] = 0
                result["url"] = id
                result["header"] = json.dumps(headers)
                return result
            html = rsp.text
            print(f"获取页面大小: {len(html)} 字节")
            # 尝试从NEXT_DATA提取视频链接
            mp4_url = None
        # 方法1: 从NEXT_DATA提取
-            next_data_match = re.search(r'<script id="__NEXT_DATA__" type="application/json">(.*?)</script>', html, re.DOTALL)
+        next_data_match = re.search(r'<script id="__NEXT_DATA__".*?>(.*?)</script>', html, re.DOTALL)
        if next_data_match:
            try:
                    print("找到NEXT_DATA")
                next_data = json.loads(next_data_match.group(1))
                page_props = next_data.get("props", {}).get("pageProps", {})
                chapter_info = page_props.get("chapterInfo", {})
-                    # 从chapterList中查找当前章节
+                if chapter_info and "chapterVideoVo" in chapter_info:
-                    chapter_list = page_props.get("chapterList", [])
+                    video_info = chapter_info["chapterVideoVo"]
-                    print(f"找到章节列表，长度: {len(chapter_list)}")
+                    for key in ["mp4", "mp4720p", "vodMp4Url"]:
                        if key in video_info and video_info[key] and ".mp4" in video_info[key].lower():
                            return video_info[key]
            except:
                pass
-                    for chapter in chapter_list:
+        # 方法2: 直接从HTML提取
-                        if chapter.get("chapterId") == chapter_id:
+        mp4_matches = re.findall(r'(https?://[^"\']+\.mp4)', html)
                            print(f"找到匹配的章节: {chapter.get('chapterName')}")
                            chapter_video = chapter.get("chapterVideoVo", {})
                            mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
                            if mp4_url:
                                print(f"从chapterList找到MP4链接: {mp4_url}")
                                break
                    # 如果未找到，尝试从当前章节获取
                    if not mp4_url:
                        current_chapter = page_props.get("chapterInfo", {})
                        if current_chapter:
                            print("找到当前章节信息")
                            chapter_video = current_chapter.get("chapterVideoVo", {})
                            mp4_url = chapter_video.get("mp4", "") or chapter_video.get("mp4720p", "") or chapter_video.get("vodMp4Url", "")
                            if mp4_url:
                                print(f"从chapterInfo找到MP4链接: {mp4_url}")
                except Exception as e:
                    print(f"解析NEXT_DATA失败: {str(e)}")
                    print(traceback.format_exc())
            # 方法2: 直接从HTML中提取MP4链接
            if not mp4_url:
                mp4_pattern = r'(https?://[^"\']+\.mp4)'
                mp4_matches = re.findall(mp4_pattern, html)
        if mp4_matches:
                    # 查找含有chapter_id的链接
                    matched_mp4 = False
            for url in mp4_matches:
-                        if chapter_id in url:
+                if chapter_id in url or drama_id in url:
-                            mp4_url = url
+                    return url
-                            matched_mp4 = True
+            return mp4_matches[0]
                            print(f"从HTML直接提取章节MP4链接: {mp4_url}")
                            break
-                    # 如果没找到包含chapter_id的链接，使用第一个
+        return None
                    if not matched_mp4 and mp4_matches:
                        mp4_url = mp4_matches[0]
                        print(f"从HTML直接提取MP4链接: {mp4_url}")
            if mp4_url and ".mp4" in mp4_url:
                print(f"最终找到的MP4链接: {mp4_url}")
                result["parse"] = 0
                result["url"] = mp4_url
                result["header"] = json.dumps(headers)
                return result
            else:
                print(f"未找到有效的MP4链接，尝试再次解析页面内容")
                # 再尝试一次从HTML中广泛搜索所有可能的MP4链接
                all_mp4_pattern = r'(https?://[^"\']+\.mp4)'
                all_mp4_matches = re.findall(all_mp4_pattern, html)
                if all_mp4_matches:
                    mp4_url = all_mp4_matches[0]
                    print(f"从HTML广泛搜索找到MP4链接: {mp4_url}")
                    result["parse"] = 0
                    result["url"] = mp4_url
                    result["header"] = json.dumps(headers)
                    return result
                print(f"未找到视频链接，返回原episode URL: {episode_url}")
                result["parse"] = 0
                result["url"] = episode_url
                result["header"] = json.dumps(headers)
                return result
        except Exception as e:
            print(f"请求或解析失败: {str(e)}")
            print(traceback.format_exc())
            result["parse"] = 0
            result["url"] = id
            result["header"] = json.dumps(headers)
            return result
    def localProxy(self, param):
        # 本地代理处理，此处简单返回传入的参数
        return [200, "video/MP2T", {}, param]
    def destroy(self):
        # 资源回收
        pass
--- a/spider.jar
+++ b/spider.jar