# -*- coding: utf-8 -*- # by @嗷呜 import gzip import html import json import re import sys import base64 from base64 import b64decode from urllib.parse import unquote, urlparse import requests from pyquery import PyQuery as pq sys.path.append('..') from base.spider import Spider class Spider(Spider): def init(self, extend='{}'): config = json.loads(extend) self.proxies = config.get('proxy', {}) self.plp = config.get('plp', '') pass def getName(self): pass def isVideoFormat(self, url): pass def manualVideoCheck(self): pass def destroy(self): pass host = 'https://javxx.com' contr = 'cn' conh = f'{host}/{contr}' headers = { 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', 'referer': f'{conh}/', 'sec-ch-ua': '"Not)A;Brand";v="8", "Chromium";v="138", "Google Chrome";v="138"', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36', } gcate = 'H4sIAAAAAAAAA6tWejan4dm0DUpWCkp5qeVKOkrPm9e+nL4CxM/ILwHygfIv9k8E8YtSk1PzwELTFzxf0AgSKs0DChXnF6WmwIWfbW55OWcTqqRuTmpiNljN8427n3asBsmmp+YVpRaDtO2Z8nTiDJBQYnIJUKgYLPq0Y9uTvXOeTm0DSeQCdReBRJ9vBmqfDhIqTi3KhGhf0P587T6QUElierFSLQCk4MAf0gAAAA==' flts = 'H4sIAAAAAAAAA23QwYrCMBAG4FeRnH0CX0WKBDJiMRpoY0WkIOtFXLQU1IoEFFHWw4qHPazgii/TRPctNKK1Ro/zz8cM/PkmKkMD5TLIZQ5HWVTFFUiNHqY1PeebyNOxAxSwCwWCOWitMxmEcttW0VKJKfKzN4kJAfLk1O9OdmemKzF+B8f2+j9aPVacEdwoeDbU3TuJd93LgdPXx1F8PmAdoEwNqTaBDFemrLAqL72hSnReqcuvDkgCRUsGkfqenw59AxaxxxybP9uRuFjkW5reai7alIOTKjoJzKoxpUnDvWG8bcnlj/obyHCcKi95JxeTeN9LEcu3zoYr9GndAQAA' actft = 'H4sIAAAAAAAAA22UTUsbURSG/0qYtQMxZvIhIvidxI/oVpEy6GiCmpFkEhEpVBcqikYprV2kG6GkhYK2XRbxzziT+C88c2/OnLnnunznec47zJ3LWTsydpxDYzRhVJzqdsUzhoyavecoD1r2bjN8snZktEIwPJI0h0fSoRqL/vW33p9/xsehyLLgcZ4sETUrDcNp6pJRt2A4TV0yapYFwxZ1yahbMGxRl4yalYHhDHXJqFswnKEuGTUrC8NZ6pJRt2A4S10yalYOhnPUJaNuwXCOumTUrDwM56lLRrTWQ29wNzaa+7GLIRO/FRPYM9F7+hV8f6D3TCKZ5GQKyRQn00imOZlBMsPJLJJZTuaQzHFSQFLgpIikyEkJSYmTeSTznCwgWeBkEckiJ0tIljgpIylzsoxkmZMVJCucrCJZRRL/9/a2E/v3MvF/H14cLBlLpJL+32OqTyXNVHTJRFCxZaaiYREUDMuFVo0IKrZM2jEiKBjWCS0XEVRsmbRVRFAwLBBaJyIoGHZCPpoeT2TkZ8fPruHW4xt1EPnpCTyo8buf/ZsreseG26x5CPvd09f72+DL4+tZmxTP3bQPP7SqzkEDxZf/F8Hdj373pNe5JPHAcXZ2mRk8tP3bn9zcc2te5R016JzrasMTnrMZiZ1Pfvsu+H3ff75m4pbdcutVT3W/dsAND279DSxD8pmOBgAA' def homeContent(self, filter): data = self.getpq(requests.get(f"{self.conh}", headers=self.headers, proxies=self.proxies).text) result = {} cate = self.ungzip(self.gcate) classes = [] filters = {} for k, j in cate.items(): classes.append({ 'type_name': k, 'type_id': j }) if j == 'actresses': fts = self.ungzip(self.actft) else: fts = self.ungzip(self.flts) filters[j] = fts result['class'] = classes result['filters'] = filters result['list'] = self.getvl(data('.vid-items .item')) return result def homeVideoContent(self): pass def categoryContent(self, tid, pg, filter, extend): videos = [] if tid in ['genres', 'makers', 'series', 'tags']: gggg = tid if tid == 'series' else tid[:-1] pagecount = 1 data = self.getpq(requests.get(f"{self.conh}/{tid}", headers=self.headers, proxies=self.proxies).text) for i in data(f'.term-items.{gggg} .item').items(): videos.append({ 'vod_id': i('a').attr('href'), 'vod_name': i('h2').text(), 'vod_remarks': i('.meta').text(), 'vod_tag': 'folder', 'style': {"type": "rect", "ratio": 2} }) elif tid == 'actresses': params = { 'height': extend.get('height'), "cup": extend.get('cup'), "sort": extend.get('sort'), 'age': extend.get('age'), "page": pg } c_params = {k: v for k, v in params.items() if v} data = self.getpq( requests.get(f"{self.conh}/{tid}", headers=self.headers, params=c_params, proxies=self.proxies).text) pagecount = self.getpgc(data('ul.pagination li').eq(-1)) for i in data('.chanel-items .item').items(): i = i('.main') videos.append({ 'vod_id': i('.info a').attr('href'), 'vod_name': i('.info h2').text(), 'vod_pic': i('.avatar img').attr('src'), 'vod_year': i('.meta div div').eq(-1).text(), 'vod_remarks': i('.meta div div').eq(0).text(), 'vod_tag': 'folder', 'style': {"type": "oval", "ratio": 0.75} }) else: tid = tid.split('_click')[0].replace(f"/{self.contr}/", "") params = { "filter": extend.get('filter'), "sort": extend.get('sort'), "page": pg } c_params = {k: v for k, v in params.items() if v} data = self.getpq( requests.get(f"{self.conh}/{tid}", params=c_params, headers=self.headers, proxies=self.proxies).text) videos = self.getvl(data('.vid-items .item')) pagecount = self.getpgc(data('ul.pagination li').eq(-1)) result = {} result['list'] = videos result['page'] = pg result['pagecount'] = pagecount result['limit'] = 90 result['total'] = 999999 return result def detailContent(self, ids): data = self.getpq(requests.get(f"{self.host}{ids[0]}", headers=self.headers, proxies=self.proxies).text) dv = data('#video-details') pnpn = { '老僧酿酒、名妓读经': f"{data('#video-info h1').text()}${data('#video-files div').attr('data-url')}", '书生玩剑': '#'.join( [f"{i('.info .title span').eq(-1).text()}$_gggb_{i('.info .title').attr('href')}" for i in data('.main .vid-items .item').items()]), '将军作文': '#'.join([f"{i('.info .title span').eq(-1).text()}$_gggb_{i('.info .title').attr('href')}" for i in data('.vid-items.side .item').items()]) } n, p = [], [] for k, v in pnpn.items(): if v: n.append(k) p.append(v) vod = { 'vod_content': dv('.content').text(), 'vod_play_from': '$$$'.join(n), 'vod_play_url': '$$$'.join(p) } a, b, c, d = [], [], [], [] for i in dv('.meta div').items(): if re.search(r'发布日期', i('label').text()): vod['vod_year'] = i('span').text() elif re.search(r'演员', i('label').text()): a.extend(['[a=cr:' + json.dumps( {'id': f"{j.attr('href')}_click", 'name': j.text()}) + '/]' + j.text() + '[/a]' for j in i('a').items()]) elif re.search(r'制作商|系列', i('label').text()): b.extend(['[a=cr:' + json.dumps( {'id': f"{j.attr('href')}_click", 'name': j.text()}) + '/]' + j.text() + '[/a]' for j in i('a').items()]) elif re.search(r'标签', i('label').text()): c.extend(['[a=cr:' + json.dumps( {'id': f"{j.attr('href')}_click", 'name': j.text()}) + '/]' + j.text() + '[/a]' for j in i('a').items()]) elif re.search(r'类别', i('label').text()): d.extend(['[a=cr:' + json.dumps( {'id': f"{j.attr('href')}_click", 'name': j.text()}) + '/]' + j.text() + '[/a]' for j in i('a').items()]) vod.update({'vod_actor': ' '.join(a), 'vod_director': ' '.join(b), 'vod_remarks': ' '.join(c), 'vod_content': ' '.join(d) + '\n' + vod['vod_content']}) return {'list': [vod]} def searchContent(self, key, quick, pg="1"): params = {'keyword': key, 'page': pg} data = self.getpq( requests.get(f"{self.conh}/search", headers=self.headers, params=params, proxies=self.proxies).text) return {'list': self.getvl(data('.vid-items .item')), 'page': pg} def playerContent(self, flag, id, vipFlags): # 处理跳转标识,获取初始加密地址 if id.startswith('_gggb_'): data = self.getpq( requests.get(f"{self.host}{id.replace('_gggb_', '')}", headers=self.headers).text) id = data('#video-files div').attr('data-url') # 解密初始URL url = self.de_url(id) parsed_url = urlparse(url) durl = parsed_url.scheme + "://" + parsed_url.netloc video_id = parsed_url.path.split('/')[-1] # 生成加密的token tkid = self.encrypt_video_id(video_id) data_url = f"{durl}/stream?token={tkid}" # 请求视频流数据 response = requests.get(data_url, timeout=10) data = response.json() # 解密媒体数据 media = data["result"]["media"] decrypted_media = self.decrypt_media(media) decrypted_data = json.loads(decrypted_media) playeurl = decrypted_data["stream"] # 构建请求头并返回结果 headers = { 'user-agent': self.headers['user-agent'], 'origin': durl, 'referer': f"{durl}/" } return {'parse': 0, 'url': playeurl, 'header': headers} def encrypt_video_id(self, video_id, key=None): """使用指定密钥对视频ID进行XOR加密并Base64编码""" if key is None: key = "kBxSj373GhC18iOc" # 默认密钥 # XOR加密 key_bytes = key.encode('utf-8') encrypted_bytes = [] for i, char in enumerate(video_id): key_byte = key_bytes[i % len(key_bytes)] encrypted_byte = ord(char) ^ key_byte encrypted_bytes.append(encrypted_byte) # Base64编码 encrypted_base64 = base64.b64encode(bytes(encrypted_bytes)).decode('utf-8') return encrypted_base64 def decrypt_media(self, encrypted_media, key="kBxSj373GhC18iOc"): """使用指定密钥解密媒体数据""" # Base64解码 encrypted_bytes = base64.b64decode(encrypted_media) # XOR解密 key_bytes = key.encode('utf-8') decrypted_chars = [] for i, byte in enumerate(encrypted_bytes): key_byte = key_bytes[i % len(key_bytes)] decrypted_char = byte ^ key_byte decrypted_chars.append(chr(decrypted_char)) # 组合成字符串并URL解码 decrypted_text = ''.join(decrypted_chars) url_decoded_text = unquote(decrypted_text) return url_decoded_text def localProxy(self, param): pass def liveContent(self, url): pass def getvl(self, data): videos = [] for i in data.items(): img = i('.img') imgurl = img('.image img').attr('src') if imgurl: imgurl = imgurl.replace("/s360/", "/s1080/") videos.append({ 'vod_id': img('a').attr('href'), 'vod_name': i('.info .title').text(), 'vod_pic': imgurl, 'vod_year': i('.info .meta div').eq(-1).text(), 'vod_remarks': i('.duration').text(), 'style': {"type": "rect", "ratio": 1.33} }) return videos def de_url(self, encoded_str): decoded = b64decode(encoded_str).decode() key = "G9zhUyphqPWZGWzZ" # 更新为第一个密钥 result = [] for i, char in enumerate(decoded): key_char = key[i % len(key)] decrypted_char = chr(ord(char) ^ ord(key_char)) result.append(decrypted_char) return unquote(''.join(result)) def getpgc(self, data): try: if data: if data('a'): return int(data('a').attr('href').split('page=')[-1]) else: return int(data.text()) else: raise Exception("获取页数失败") except: return 1 def p_qjs(self, js_code): try: from com.whl.quickjs.wrapper import QuickJSContext ctx = QuickJSContext.create() jctx = ctx.evaluate(js_code) code = jctx.strip().split('const posterUrl', 1)[0].split('{', 1)[-1] result = ctx.evaluate(f"{code}\nJSON.stringify(media)") ctx.destroy() return json.loads(result) except Exception as e: self.log(f"执行失败: {e}") return [] def ungzip(self, data): result = gzip.decompress(b64decode(data)).decode() return json.loads(result) def getpq(self, data): try: return pq(data) except Exception as e: print(f"{str(e)}") return pq(data.encode('utf-8'))