Angular规范的commit message格式如下: - `<type>`: 变更类型(如 feat, fix, docs, style, refactor, test, chore 等) - `<scope>`: 变更范围(如 component, service, module 等) - `<subject>`: 简短描述(中文) - `<body>`: 详细描述(中文) 请提供代码变更详情,我将为您生成完整的commit message。
		
			
				
	
	
		
			716 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			716 lines
		
	
	
		
			30 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # -*- coding: utf-8 -*-
 | ||
| # 🌈 Love 
 | ||
| import json
 | ||
| import random
 | ||
| import re
 | ||
| import sys
 | ||
| import threading
 | ||
| import time
 | ||
| from base64 import b64decode, b64encode
 | ||
| from urllib.parse import urlparse, quote
 | ||
| 
 | ||
| import requests
 | ||
| from pyquery import PyQuery as pq
 | ||
| sys.path.append('..')
 | ||
| from base.spider import Spider
 | ||
| 
 | ||
| 
 | ||
| class Spider(Spider):
 | ||
| 
 | ||
|     def init(self, extend=""):
 | ||
|         try:self.proxies = json.loads(extend)
 | ||
|         except:self.proxies = {}
 | ||
|         self.headers = {
 | ||
|             'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
 | ||
|             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
 | ||
|             'Accept-Language': 'zh-CN,zh;q=0.9',
 | ||
|             'Connection': 'keep-alive',
 | ||
|             'Cache-Control': 'no-cache',
 | ||
|         }
 | ||
|         # Use working dynamic URLs directly
 | ||
|         self.host = self.get_working_host()
 | ||
|         self.headers.update({'Origin': self.host, 'Referer': f"{self.host}/"})
 | ||
|         self.log(f"使用站点: {self.host}")
 | ||
|         print(f"使用站点: {self.host}")
 | ||
|         pass
 | ||
| 
 | ||
|     def getName(self):
 | ||
|         return "🌈 今日看料"
 | ||
| 
 | ||
|     def isVideoFormat(self, url):
 | ||
|         # Treat direct media formats as playable without parsing
 | ||
|         return any(ext in (url or '') for ext in ['.m3u8', '.mp4', '.ts'])
 | ||
| 
 | ||
|     def manualVideoCheck(self):
 | ||
|         return False
 | ||
| 
 | ||
|     def destroy(self):
 | ||
|         pass
 | ||
| 
 | ||
|     def homeContent(self, filter):
 | ||
|         try:
 | ||
|             response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
 | ||
|             if response.status_code != 200:
 | ||
|                 return {'class': [], 'list': []}
 | ||
|                 
 | ||
|             data = self.getpq(response.text)
 | ||
|             result = {}
 | ||
|             classes = []
 | ||
|             
 | ||
|             # 优先从导航栏获取分类
 | ||
|             nav_selectors = [
 | ||
|                 '#navbarCollapse .navbar-nav .nav-item .nav-link',
 | ||
|                 '.navbar-nav .nav-item .nav-link',
 | ||
|                 '#nav .menu-item a',
 | ||
|                 '.menu .menu-item a'
 | ||
|             ]
 | ||
|             
 | ||
|             found_categories = False
 | ||
|             for selector in nav_selectors:
 | ||
|                 for item in data(selector).items():
 | ||
|                     href = item.attr('href') or ''
 | ||
|                     name = item.text().strip()
 | ||
|                     
 | ||
|                     # 过滤掉非分类链接
 | ||
|                     if (not href or not name or 
 | ||
|                         href == '#' or 
 | ||
|                         href.startswith('http') or
 | ||
|                         'about' in href.lower() or
 | ||
|                         'contact' in href.lower() or
 | ||
|                         'tags' in href.lower() or
 | ||
|                         'top' in href.lower() or
 | ||
|                         'start' in href.lower() or
 | ||
|                         'time' in href.lower()):
 | ||
|                         continue
 | ||
|                     
 | ||
|                     # 确保是分类链接(包含category或明确的分类路径)
 | ||
|                     if '/category/' in href or any(cat in href for cat in ['/dy/', '/ks/', '/douyu/', '/hy/', '/hj/', '/tt/', '/wh/', '/asmr/', '/xb/', '/xsp/', '/rdgz/']):
 | ||
|                         # 处理相对路径
 | ||
|                         if href.startswith('/'):
 | ||
|                             type_id = href
 | ||
|                         else:
 | ||
|                             type_id = f'/{href}'
 | ||
|                             
 | ||
|                         classes.append({
 | ||
|                             'type_name': name,
 | ||
|                             'type_id': type_id
 | ||
|                         })
 | ||
|                         found_categories = True
 | ||
|             
 | ||
|             # 如果导航栏没找到,尝试从分类下拉菜单获取
 | ||
|             if not found_categories:
 | ||
|                 category_selectors = [
 | ||
|                     '.category-list a',
 | ||
|                     '.slide-toggle + .category-list a',
 | ||
|                     '.menu .category-list a'
 | ||
|                 ]
 | ||
|                 for selector in category_selectors:
 | ||
|                     for item in data(selector).items():
 | ||
|                         href = item.attr('href') or ''
 | ||
|                         name = item.text().strip()
 | ||
|                         
 | ||
|                         if href and name and href != '#':
 | ||
|                             if href.startswith('/'):
 | ||
|                                 type_id = href
 | ||
|                             else:
 | ||
|                                 type_id = f'/{href}'
 | ||
|                                 
 | ||
|                             classes.append({
 | ||
|                                 'type_name': name,
 | ||
|                                 'type_id': type_id
 | ||
|                             })
 | ||
|                             found_categories = True
 | ||
|             
 | ||
|             # 去重
 | ||
|             unique_classes = []
 | ||
|             seen_ids = set()
 | ||
|             for cls in classes:
 | ||
|                 if cls['type_id'] not in seen_ids:
 | ||
|                     unique_classes.append(cls)
 | ||
|                     seen_ids.add(cls['type_id'])
 | ||
|             
 | ||
|             # 如果没有找到分类,创建默认分类
 | ||
|             if not unique_classes:
 | ||
|                 unique_classes = [
 | ||
|                     {'type_name': '热点关注', 'type_id': '/category/rdgz/'},
 | ||
|                     {'type_name': '抖音', 'type_id': '/category/dy/'},
 | ||
|                     {'type_name': '快手', 'type_id': '/category/ks/'},
 | ||
|                     {'type_name': '斗鱼', 'type_id': '/category/douyu/'},
 | ||
|                     {'type_name': '虎牙', 'type_id': '/category/hy/'},
 | ||
|                     {'type_name': '花椒', 'type_id': '/category/hj/'},
 | ||
|                     {'type_name': '推特', 'type_id': '/category/tt/'},
 | ||
|                     {'type_name': '网红', 'type_id': '/category/wh/'},
 | ||
|                     {'type_name': 'ASMR', 'type_id': '/category/asmr/'},
 | ||
|                     {'type_name': 'X播', 'type_id': '/category/xb/'},
 | ||
|                     {'type_name': '小视频', 'type_id': '/category/xsp/'}
 | ||
|                 ]
 | ||
|             
 | ||
|             result['class'] = unique_classes
 | ||
|             result['list'] = self.getlist(data('#index article a, #archive article a'))
 | ||
|             return result
 | ||
|             
 | ||
|         except Exception as e:
 | ||
|             print(f"homeContent error: {e}")
 | ||
|             return {'class': [], 'list': []}
 | ||
| 
 | ||
|     def homeVideoContent(self):
 | ||
|         try:
 | ||
|             response = requests.get(self.host, headers=self.headers, proxies=self.proxies, timeout=15)
 | ||
|             if response.status_code != 200:
 | ||
|                 return {'list': []}
 | ||
|             data = self.getpq(response.text)
 | ||
|             return {'list': self.getlist(data('#index article a, #archive article a'))}
 | ||
|         except Exception as e:
 | ||
|             print(f"homeVideoContent error: {e}")
 | ||
|             return {'list': []}
 | ||
| 
 | ||
|     def categoryContent(self, tid, pg, filter, extend):
 | ||
|         try:
 | ||
|             # 修复URL构建 - 去除多余的斜杠
 | ||
|             base_url = tid.lstrip('/').rstrip('/')
 | ||
|             if pg and pg != '1':
 | ||
|                 url = f"{self.host}{base_url}/{pg}/"
 | ||
|             else:
 | ||
|                 url = f"{self.host}{base_url}/"
 | ||
|                 
 | ||
|             print(f"分类页面URL: {url}")
 | ||
|             
 | ||
|             response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
 | ||
|             if response.status_code != 200:
 | ||
|                 print(f"分类页面请求失败: {response.status_code}")
 | ||
|                 return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
 | ||
|                 
 | ||
|             data = self.getpq(response.text)
 | ||
|             videos = self.getlist(data('#archive article a, #index article a, .post-card'), tid)
 | ||
|             
 | ||
|             # 如果没有找到视频,尝试其他选择器
 | ||
|             if not videos:
 | ||
|                 videos = self.getlist(data('article a, .post a, .entry-title a'), tid)
 | ||
|             
 | ||
|             print(f"找到 {len(videos)} 个视频")
 | ||
|             
 | ||
|             # 改进的页数检测逻辑
 | ||
|             pagecount = self.detect_page_count(data, pg)
 | ||
|             
 | ||
|             result = {}
 | ||
|             result['list'] = videos
 | ||
|             result['page'] = pg
 | ||
|             result['pagecount'] = pagecount
 | ||
|             result['limit'] = 90
 | ||
|             result['total'] = 999999
 | ||
|             return result
 | ||
|             
 | ||
|         except Exception as e:
 | ||
|             print(f"categoryContent error: {e}")
 | ||
|             return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
 | ||
| 
 | ||
|     def tagContent(self, tid, pg, filter, extend):
 | ||
|         """标签页面内容"""
 | ||
|         try:
 | ||
|             # 修复URL构建 - 去除多余的斜杠
 | ||
|             base_url = tid.lstrip('/').rstrip('/')
 | ||
|             if pg and pg != '1':
 | ||
|                 url = f"{self.host}{base_url}/{pg}/"
 | ||
|             else:
 | ||
|                 url = f"{self.host}{base_url}/"
 | ||
|                 
 | ||
|             print(f"标签页面URL: {url}")
 | ||
|             
 | ||
|             response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
 | ||
|             if response.status_code != 200:
 | ||
|                 print(f"标签页面请求失败: {response.status_code}")
 | ||
|                 return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
 | ||
|                 
 | ||
|             data = self.getpq(response.text)
 | ||
|             videos = self.getlist(data('#archive article a, #index article a, .post-card'), tid)
 | ||
|             
 | ||
|             # 如果没有找到视频,尝试其他选择器
 | ||
|             if not videos:
 | ||
|                 videos = self.getlist(data('article a, .post a, .entry-title a'), tid)
 | ||
|             
 | ||
|             print(f"找到 {len(videos)} 个标签相关视频")
 | ||
|             
 | ||
|             # 页数检测
 | ||
|             pagecount = self.detect_page_count(data, pg)
 | ||
|             
 | ||
|             result = {}
 | ||
|             result['list'] = videos
 | ||
|             result['page'] = pg
 | ||
|             result['pagecount'] = pagecount
 | ||
|             result['limit'] = 90
 | ||
|             result['total'] = 999999
 | ||
|             return result
 | ||
|             
 | ||
|         except Exception as e:
 | ||
|             print(f"tagContent error: {e}")
 | ||
|             return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0}
 | ||
| 
 | ||
|     def detect_page_count(self, data, current_page):
 | ||
|         """改进的页数检测方法"""
 | ||
|         pagecount = 99999  # 默认大数字,允许无限翻页
 | ||
|         
 | ||
|         # 方法1: 检查分页器中的所有页码链接
 | ||
|         page_numbers = []
 | ||
|         
 | ||
|         # 查找所有可能的页码链接
 | ||
|         page_selectors = [
 | ||
|             '.page-navigator a',
 | ||
|             '.pagination a', 
 | ||
|             '.pages a',
 | ||
|             '.page-numbers a'
 | ||
|         ]
 | ||
|         
 | ||
|         for selector in page_selectors:
 | ||
|             for page_link in data(selector).items():
 | ||
|                 href = page_link.attr('href') or ''
 | ||
|                 text = page_link.text().strip()
 | ||
|                 
 | ||
|                 # 从href中提取页码
 | ||
|                 if href:
 | ||
|                     # 匹配 /category/dy/2/ 这种格式
 | ||
|                     match = re.search(r'/(\d+)/?$', href.rstrip('/'))
 | ||
|                     if match:
 | ||
|                         page_num = int(match.group(1))
 | ||
|                         if page_num not in page_numbers:
 | ||
|                             page_numbers.append(page_num)
 | ||
|                 
 | ||
|                 # 从文本中提取数字页码
 | ||
|                 if text and text.isdigit():
 | ||
|                     page_num = int(text)
 | ||
|                     if page_num not in page_numbers:
 | ||
|                         page_numbers.append(page_num)
 | ||
|         
 | ||
|         # 如果有找到页码,取最大值
 | ||
|         if page_numbers:
 | ||
|             max_page = max(page_numbers)
 | ||
|             print(f"从分页器检测到最大页码: {max_page}")
 | ||
|             return max_page
 | ||
|         
 | ||
|         # 方法2: 检查是否存在"下一页"按钮
 | ||
|         next_selectors = [
 | ||
|             '.page-navigator .next',
 | ||
|             '.pagination .next',
 | ||
|             '.next-page',
 | ||
|             'a:contains("下一页")'
 | ||
|         ]
 | ||
|         
 | ||
|         for selector in next_selectors:
 | ||
|             if data(selector):
 | ||
|                 print("检测到下一页按钮,允许继续翻页")
 | ||
|                 return 99999
 | ||
|         
 | ||
|         # 方法3: 如果当前页视频数量很少,可能没有下一页
 | ||
|         if len(data('#archive article, #index article, .post-card')) < 5:
 | ||
|             print("当前页内容较少,可能没有下一页")
 | ||
|             return int(current_page)
 | ||
|         
 | ||
|         print("使用默认页数: 99999")
 | ||
|         return 99999
 | ||
| 
 | ||
|     def detailContent(self, ids):
 | ||
|         try:
 | ||
|             url = f"{self.host}{ids[0]}" if not ids[0].startswith('http') else ids[0]
 | ||
|             response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
 | ||
|             
 | ||
|             if response.status_code != 200:
 | ||
|                 return {'list': [{'vod_play_from': '今日看料', 'vod_play_url': f'页面加载失败${url}'}]}
 | ||
|                 
 | ||
|             data = self.getpq(response.text)
 | ||
|             vod = {'vod_play_from': '今日看料'}
 | ||
|             
 | ||
|             # 获取标题
 | ||
|             title_selectors = ['.post-title', 'h1.entry-title', 'h1', '.post-card-title']
 | ||
|             for selector in title_selectors:
 | ||
|                 title_elem = data(selector)
 | ||
|                 if title_elem:
 | ||
|                     vod['vod_name'] = title_elem.text().strip()
 | ||
|                     break
 | ||
|             
 | ||
|             if 'vod_name' not in vod:
 | ||
|                 vod['vod_name'] = '今日看料视频'
 | ||
|             
 | ||
|             # 获取内容/描述
 | ||
|             try:
 | ||
|                 clist = []
 | ||
|                 if data('.tags .keywords a'):
 | ||
|                     for k in data('.tags .keywords a').items():
 | ||
|                         title = k.text()
 | ||
|                         href = k.attr('href')
 | ||
|                         if title and href:
 | ||
|                             # 使href相对路径
 | ||
|                             if href.startswith(self.host):
 | ||
|                                 href = href.replace(self.host, '')
 | ||
|                             clist.append('[a=cr:' + json.dumps({'id': href, 'name': title}) + '/]' + title + '[/a]')
 | ||
|                 vod['vod_content'] = ' '.join(clist) if clist else data('.post-content').text() or vod['vod_name']
 | ||
|             except:
 | ||
|                 vod['vod_content'] = vod['vod_name']
 | ||
|             
 | ||
|             # 获取视频URLs
 | ||
|             try:
 | ||
|                 plist = []
 | ||
|                 used_names = set()
 | ||
|                 
 | ||
|                 # 查找DPlayer视频
 | ||
|                 if data('.dplayer'):
 | ||
|                     for c, k in enumerate(data('.dplayer').items(), start=1):
 | ||
|                         config_attr = k.attr('data-config')
 | ||
|                         if config_attr:
 | ||
|                             try:
 | ||
|                                 config = json.loads(config_attr)
 | ||
|                                 video_url = config.get('video', {}).get('url', '')
 | ||
|                                 if video_url:
 | ||
|                                     name = f"视频{c}"
 | ||
|                                     count = 2
 | ||
|                                     while name in used_names:
 | ||
|                                         name = f"视频{c}_{count}"
 | ||
|                                         count += 1
 | ||
|                                     used_names.add(name)
 | ||
|                                     self.log(f"解析到视频: {name} -> {video_url}")
 | ||
|                                     print(f"解析到视频: {name} -> {video_url}")
 | ||
|                                     plist.append(f"{name}${video_url}")
 | ||
|                             except:
 | ||
|                                 continue
 | ||
|                 
 | ||
|                 # 查找视频标签
 | ||
|                 if not plist:
 | ||
|                     video_selectors = ['video source', 'video', 'iframe[src*="video"]', 'a[href*=".m3u8"]', 'a[href*=".mp4"]']
 | ||
|                     for selector in video_selectors:
 | ||
|                         for c, elem in enumerate(data(selector).items(), start=1):
 | ||
|                             src = elem.attr('src') or elem.attr('href') or ''
 | ||
|                             if src and any(ext in src for ext in ['.m3u8', '.mp4', 'video']):
 | ||
|                                 name = f"视频{c}"
 | ||
|                                 count = 2
 | ||
|                                 while name in used_names:
 | ||
|                                     name = f"视频{c}_{count}"
 | ||
|                                     count += 1
 | ||
|                                 used_names.add(name)
 | ||
|                                 plist.append(f"{name}${src}")
 | ||
|                 
 | ||
|                 if plist:
 | ||
|                     self.log(f"拼装播放列表,共{len(plist)}个")
 | ||
|                     print(f"拼装播放列表,共{len(plist)}个")
 | ||
|                     vod['vod_play_url'] = '#'.join(plist)
 | ||
|                 else:
 | ||
|                     vod['vod_play_url'] = f"正片${url}"
 | ||
|                     
 | ||
|             except Exception as e:
 | ||
|                 print(f"视频解析错误: {e}")
 | ||
|                 vod['vod_play_url'] = f"正片${url}"
 | ||
|                 
 | ||
|             return {'list': [vod]}
 | ||
|             
 | ||
|         except Exception as e:
 | ||
|             print(f"detailContent error: {e}")
 | ||
|             return {'list': [{'vod_play_from': '今日看料', 'vod_play_url': f'详情页加载失败${ids[0] if ids else ""}'}]}
 | ||
| 
 | ||
|     def searchContent(self, key, quick, pg="1"):
 | ||
|         try:
 | ||
|             # 优先使用标签搜索
 | ||
|             encoded_key = quote(key)
 | ||
|             url = f"{self.host}/tag/{encoded_key}/{pg}" if pg != "1" else f"{self.host}/tag/{encoded_key}/"
 | ||
|             response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
 | ||
|             
 | ||
|             if response.status_code != 200:
 | ||
|                 # 尝试搜索页面
 | ||
|                 url = f"{self.host}/search/{encoded_key}/{pg}" if pg != "1" else f"{self.host}/search/{encoded_key}/"
 | ||
|                 response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
 | ||
|             
 | ||
|             if response.status_code != 200:
 | ||
|                 return {'list': [], 'page': pg}
 | ||
|                 
 | ||
|             data = self.getpq(response.text)
 | ||
|             videos = self.getlist(data('#archive article a, #index article a, .post-card'))
 | ||
|             
 | ||
|             # 使用改进的页数检测方法
 | ||
|             pagecount = self.detect_page_count(data, pg)
 | ||
|             
 | ||
|             return {'list': videos, 'page': pg, 'pagecount': pagecount}
 | ||
|             
 | ||
|         except Exception as e:
 | ||
|             print(f"searchContent error: {e}")
 | ||
|             return {'list': [], 'page': pg}
 | ||
| 
 | ||
|     def getTagsContent(self, pg="1"):
 | ||
|         """获取标签页面内容"""
 | ||
|         try:
 | ||
|             url = f"{self.host}/tags.html"
 | ||
|             response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=15)
 | ||
|             
 | ||
|             if response.status_code != 200:
 | ||
|                 return {'list': [], 'page': pg}
 | ||
|                 
 | ||
|             data = self.getpq(response.text)
 | ||
|             tags = []
 | ||
|             
 | ||
|             # 从标签页面提取所有标签 - 使用更宽松的选择器
 | ||
|             for tag_elem in data('a[href*="/tag/"]').items():
 | ||
|                 tag_name = tag_elem.text().strip()
 | ||
|                 tag_href = tag_elem.attr('href') or ''
 | ||
|                 
 | ||
|                 if tag_name and tag_href and '/tag/' in tag_href and tag_name != '全部标签':  # 排除标题链接
 | ||
|                     # 处理为相对路径
 | ||
|                     tag_id = tag_href.replace(self.host, '')
 | ||
|                     if not tag_id.startswith('/'):
 | ||
|                         tag_id = '/' + tag_id
 | ||
|                     
 | ||
|                     tags.append({
 | ||
|                         'vod_id': tag_id,
 | ||
|                         'vod_name': f"🏷️ {tag_name}",
 | ||
|                         'vod_pic': '',
 | ||
|                         'vod_remarks': '标签',
 | ||
|                         'vod_tag': 'tag',
 | ||
|                         'style': {"type": "rect", "ratio": 1.33}
 | ||
|                     })
 | ||
|             
 | ||
|             print(f"找到 {len(tags)} 个标签")
 | ||
|             
 | ||
|             # 分页处理 - 标签页面通常不需要分页
 | ||
|             result = {}
 | ||
|             result['list'] = tags
 | ||
|             result['page'] = pg
 | ||
|             result['pagecount'] = 1  # 标签页面通常只有一页
 | ||
|             result['limit'] = 999
 | ||
|             result['total'] = len(tags)
 | ||
|             return result
 | ||
|             
 | ||
|         except Exception as e:
 | ||
|             print(f"getTagsContent error: {e}")
 | ||
|             return {'list': [], 'page': pg}
 | ||
| 
 | ||
|     def playerContent(self, flag, id, vipFlags):
 | ||
|         url = id
 | ||
|         p = 1
 | ||
|         if self.isVideoFormat(url):
 | ||
|             if '.m3u8' in url:
 | ||
|                 url = self.proxy(url)
 | ||
|             p = 0
 | ||
|         self.log(f"播放请求: parse={p}, url={url}")
 | ||
|         print(f"播放请求: parse={p}, url={url}")
 | ||
|         return {'parse': p, 'url': url, 'header': self.headers}
 | ||
| 
 | ||
|     def localProxy(self, param):
 | ||
|         try:
 | ||
|             if param.get('type') == 'img':
 | ||
|                 img_url = self.d64(param['url'])
 | ||
|                 if not img_url.startswith(('http://', 'https://')):
 | ||
|                     if img_url.startswith('/'):
 | ||
|                         img_url = f"{self.host}{img_url}"
 | ||
|                     else:
 | ||
|                         img_url = f"{self.host}/{img_url}"
 | ||
|                 
 | ||
|                 res = requests.get(img_url, headers=self.headers, proxies=self.proxies, timeout=10)
 | ||
|                 return [200, res.headers.get('Content-Type', 'image/jpeg'), res.content]
 | ||
|             elif param.get('type') == 'm3u8':
 | ||
|                 return self.m3Proxy(param['url'])
 | ||
|             else:
 | ||
|                 return self.tsProxy(param['url'])
 | ||
|         except Exception as e:
 | ||
|             print(f"localProxy error: {e}")
 | ||
|             return [500, "text/plain", f"Proxy error: {str(e)}".encode()]
 | ||
| 
 | ||
|     def proxy(self, data, type='m3u8'):
 | ||
|         if data and len(self.proxies):
 | ||
|             return f"{self.getProxyUrl()}&url={self.e64(data)}&type={type}"
 | ||
|         else:
 | ||
|             return data
 | ||
| 
 | ||
|     def m3Proxy(self, url):
 | ||
|         try:
 | ||
|             url = self.d64(url)
 | ||
|             ydata = requests.get(url, headers=self.headers, proxies=self.proxies, allow_redirects=False)
 | ||
|             data = ydata.content.decode('utf-8')
 | ||
|             if ydata.headers.get('Location'):
 | ||
|                 url = ydata.headers['Location']
 | ||
|                 data = requests.get(url, headers=self.headers, proxies=self.proxies).content.decode('utf-8')
 | ||
|             lines = data.strip().split('\n')
 | ||
|             last_r = url[:url.rfind('/')]
 | ||
|             parsed_url = urlparse(url)
 | ||
|             durl = parsed_url.scheme + "://" + parsed_url.netloc
 | ||
|             iskey = True
 | ||
|             for index, string in enumerate(lines):
 | ||
|                 if iskey and 'URI' in string:
 | ||
|                     pattern = r'URI="([^"]*)"'
 | ||
|                     match = re.search(pattern, string)
 | ||
|                     if match:
 | ||
|                         lines[index] = re.sub(pattern, f'URI="{self.proxy(match.group(1), "mkey")}"', string)
 | ||
|                         iskey = False
 | ||
|                         continue
 | ||
|                 if '#EXT' not in string:
 | ||
|                     if 'http' not in string:
 | ||
|                         domain = last_r if string.count('/') < 2 else durl
 | ||
|                         string = domain + ('' if string.startswith('/') else '/') + string
 | ||
|                     lines[index] = self.proxy(string, string.split('.')[-1].split('?')[0])
 | ||
|             data = '\n'.join(lines)
 | ||
|             return [200, "application/vnd.apple.mpegur", data]
 | ||
|         except Exception as e:
 | ||
|             print(f"m3Proxy error: {e}")
 | ||
|             return [500, "text/plain", f"m3u8 proxy error: {str(e)}".encode()]
 | ||
| 
 | ||
|     def tsProxy(self, url):
 | ||
|         try:
 | ||
|             url = self.d64(url)
 | ||
|             data = requests.get(url, headers=self.headers, proxies=self.proxies, stream=True)
 | ||
|             return [200, data.headers.get('Content-Type', 'video/mp2t'), data.content]
 | ||
|         except Exception as e:
 | ||
|             print(f"tsProxy error: {e}")
 | ||
|             return [500, "text/plain", f"ts proxy error: {str(e)}".encode()]
 | ||
| 
 | ||
|     def e64(self, text):
 | ||
|         try:
 | ||
|             text_bytes = text.encode('utf-8')
 | ||
|             encoded_bytes = b64encode(text_bytes)
 | ||
|             return encoded_bytes.decode('utf-8')
 | ||
|         except Exception as e:
 | ||
|             print(f"Base64编码错误: {str(e)}")
 | ||
|             return ""
 | ||
| 
 | ||
|     def d64(self, encoded_text):
 | ||
|         try:
 | ||
|             encoded_bytes = encoded_text.encode('utf-8')
 | ||
|             decoded_bytes = b64decode(encoded_bytes)
 | ||
|             return decoded_bytes.decode('utf-8')
 | ||
|         except Exception as e:
 | ||
|             print(f"Base64解码错误: {str(e)}")
 | ||
|             return ""
 | ||
| 
 | ||
|     def get_working_host(self):
 | ||
|         """Get working host from known dynamic URLs"""
 | ||
|         dynamic_urls = [
 | ||
|             'https://kanliao2.one/',
 | ||
|             'https://kanliao7.org/',
 | ||
|             'https://kanliao7.net/',
 | ||
|             'https://kanliao14.com/'
 | ||
|         ]
 | ||
|         
 | ||
|         for url in dynamic_urls:
 | ||
|             try:
 | ||
|                 response = requests.get(url, headers=self.headers, proxies=self.proxies, timeout=10)
 | ||
|                 if response.status_code == 200:
 | ||
|                     data = self.getpq(response.text)
 | ||
|                     articles = data('#index article a, #archive article a')
 | ||
|                     if len(articles) > 0:
 | ||
|                         self.log(f"选用可用站点: {url}")
 | ||
|                         print(f"选用可用站点: {url}")
 | ||
|                         return url
 | ||
|             except Exception as e:
 | ||
|                 continue
 | ||
|         
 | ||
|         self.log(f"未检测到可用站点,回退: {dynamic_urls[0]}")
 | ||
|         print(f"未检测到可用站点,回退: {dynamic_urls[0]}")
 | ||
|         return dynamic_urls[0]
 | ||
| 
 | ||
|     def getlist(self, data, tid=''):
 | ||
|         videos = []
 | ||
|         for k in data.items():
 | ||
|             a = k.attr('href')
 | ||
|             b = k('h2').text() or k('.post-card-title').text() or k('.entry-title').text() or k.text()
 | ||
|             c = k('span[itemprop="datePublished"]').text() or k('.post-meta, .entry-meta, time, .post-card-info').text()
 | ||
|             
 | ||
|             # 过滤广告:检查是否包含"热搜HOT"标志
 | ||
|             if self.is_advertisement(k):
 | ||
|                 print(f"过滤广告: {b}")
 | ||
|                 continue
 | ||
|                 
 | ||
|             if a and b and b.strip():
 | ||
|                 # 处理相对路径
 | ||
|                 if not a.startswith('http'):
 | ||
|                     if a.startswith('/'):
 | ||
|                         vod_id = a
 | ||
|                     else:
 | ||
|                         vod_id = f'/{a}'
 | ||
|                 else:
 | ||
|                     vod_id = a
 | ||
|                     
 | ||
|                 videos.append({
 | ||
|                     'vod_id': vod_id,
 | ||
|                     'vod_name': b.replace('\n', ' ').strip(),
 | ||
|                     'vod_pic': self.get_article_img(k),
 | ||
|                     'vod_remarks': c.strip() if c else '',
 | ||
|                     'vod_tag': '',
 | ||
|                     'style': {"type": "rect", "ratio": 1.33}
 | ||
|                 })
 | ||
|         return videos
 | ||
| 
 | ||
|     def is_advertisement(self, article_elem):
 | ||
|         """判断是否为广告(包含热搜HOT标志)"""
 | ||
|         # 检查.wraps元素是否包含"热搜HOT"文本
 | ||
|         hot_elements = article_elem.find('.wraps')
 | ||
|         for elem in hot_elements.items():
 | ||
|             if '热搜HOT' in elem.text():
 | ||
|                 return True
 | ||
|         
 | ||
|         # 检查标题是否包含广告关键词
 | ||
|         title = article_elem('h2').text() or article_elem('.post-card-title').text() or ''
 | ||
|         ad_keywords = ['热搜HOT', '手机链接', 'DNS设置', '修改DNS', 'WIFI设置']
 | ||
|         if any(keyword in title for keyword in ad_keywords):
 | ||
|             return True
 | ||
|             
 | ||
|         # 检查背景颜色是否为广告特有的渐变背景
 | ||
|         style = article_elem.attr('style') or ''
 | ||
|         if 'background:' in style and any(gradient in style for gradient in ['-webkit-linear-gradient', 'linear-gradient']):
 | ||
|             # 进一步检查是否包含特定的广告颜色组合
 | ||
|             ad_gradients = ['#ec008c,#fc6767', '#ffe259,#ffa751']
 | ||
|             if any(gradient in style for gradient in ad_gradients):
 | ||
|                 return True
 | ||
|                 
 | ||
|         return False
 | ||
| 
 | ||
|     def get_article_img(self, article_elem):
 | ||
|         """从文章元素中提取图片,多种方式尝试"""
 | ||
|         # 方式1: 从script标签中提取loadBannerDirect
 | ||
|         script_text = article_elem('script').text()
 | ||
|         if script_text:
 | ||
|             match = re.search(r"loadBannerDirect\('([^']+)'", script_text)
 | ||
|             if match:
 | ||
|                 url = match.group(1)
 | ||
|                 if not url.startswith(('http://', 'https://')):
 | ||
|                     if url.startswith('/'):
 | ||
|                         url = f"{self.host}{url}"
 | ||
|                     else:
 | ||
|                         url = f"{self.host}/{url}"
 | ||
|                 return f"{self.getProxyUrl()}&url={self.e64(url)}&type=img"
 | ||
|         
 | ||
|         # 方式2: 从背景图片中提取
 | ||
|         bg_elem = article_elem.find('.blog-background')
 | ||
|         if bg_elem:
 | ||
|             style = bg_elem.attr('style') or ''
 | ||
|             bg_match = re.search(r'background-image:\s*url\(["\']?([^"\'\)]+)["\']?\)', style)
 | ||
|             if bg_match:
 | ||
|                 img_url = bg_match.group(1)
 | ||
|                 if img_url and not img_url.startswith('data:'):
 | ||
|                     if not img_url.startswith(('http://', 'https://')):
 | ||
|                         if img_url.startswith('/'):
 | ||
|                             img_url = f"{self.host}{img_url}"
 | ||
|                         else:
 | ||
|                             img_url = f"{self.host}/{img_url}"
 | ||
|                     return f"{self.getProxyUrl()}&url={self.e64(img_url)}&type=img"
 | ||
|         
 | ||
|         # 方式3: 从图片标签中提取
 | ||
|         img_elem = article_elem.find('img')
 | ||
|         if img_elem:
 | ||
|             data_src = img_elem.attr('data-src')
 | ||
|             if data_src:
 | ||
|                 if not data_src.startswith(('http://', 'https://')):
 | ||
|                     if data_src.startswith('/'):
 | ||
|                         data_src = f"{self.host}{data_src}"
 | ||
|                     else:
 | ||
|                         data_src = f"{self.host}/{data_src}"
 | ||
|                 return f"{self.getProxyUrl()}&url={self.e64(data_src)}&type=img"
 | ||
|             
 | ||
|             src = img_elem.attr('src')
 | ||
|             if src:
 | ||
|                 if not src.startswith(('http://', 'https://')):
 | ||
|                     if src.startswith('/'):
 | ||
|                         src = f"{self.host}{src}"
 | ||
|                     else:
 | ||
|                         src = f"{self.host}/{src}"
 | ||
|                 return f"{self.getProxyUrl()}&url={self.e64(src)}&type=img"
 | ||
|         
 | ||
|         return ''
 | ||
| 
 | ||
|     def getpq(self, data):
 | ||
|         try:
 | ||
|             return pq(data)
 | ||
|         except Exception as e:
 | ||
|             print(f"{str(e)}")
 | ||
|             return pq(data.encode('utf-8')) |