From 00ff77f7d70c6a499d575794968b2601407a51d9 Mon Sep 17 00:00:00 2001 From: Krcia <1503175889@qq.com> Date: Fri, 7 Nov 2025 11:15:09 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90API=E4=B8=8B=E8=BD=BD?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api.py | 180 +++++++++++++++ docker/Dockerfile | 61 +++++ docker/html/idnex.html | 10 + docker/nginx.conf | 54 +++++ docker/server/api.py | 180 +++++++++++++++ docker/server/download.py | 355 +++++++++++++++++++++++++++++ docker/server/function.py | 57 +++++ docker/server/requirements.txt | 8 + download.py | 396 +++++++++++++++++++++++++++++++++ function.py | 57 +++++ requirements.txt | 8 + 11 files changed, 1366 insertions(+) create mode 100644 api.py create mode 100644 docker/Dockerfile create mode 100644 docker/html/idnex.html create mode 100644 docker/nginx.conf create mode 100644 docker/server/api.py create mode 100644 docker/server/download.py create mode 100644 docker/server/function.py create mode 100644 docker/server/requirements.txt create mode 100644 download.py create mode 100644 function.py create mode 100644 requirements.txt diff --git a/api.py b/api.py new file mode 100644 index 0000000..f83b133 --- /dev/null +++ b/api.py @@ -0,0 +1,180 @@ +import asyncio +from flask import Flask, jsonify, request +import jwt +import datetime +import os +from functools import wraps + +from download import M3U8Downloader +from function import crawl_missav +from urllib.parse import urlparse + +app = Flask(__name__) +app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', 'your-secret-key-here') + +downloader = M3U8Downloader(max_workers=10, output_dir=r"download") + +# 从环境变量获取用户名密码 +USERNAME = os.getenv('USER') +PASSWORD = os.getenv('PASSWORD') + + +def token_required(f): + @wraps(f) + def decorated(*args, **kwargs): + token = request.headers.get(f'Authorization') + + if not token: + return jsonify({ + 'msg': '请登录', + 'code': 403, + }), 403 + + # 检查token格式 + if token.startswith('Bearer '): + token = token[7:] + + try: + # 解码token + data = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256']) + current_user = data['user'] + except jwt.ExpiredSignatureError: + return jsonify({ + 'msg': '登录已过期,请重新登录', + 'code': 403, + }), 403 + except jwt.InvalidTokenError: + return jsonify({ + 'msg': '无效的token', + 'code': 403, + }), 403 + + return f(*args, **kwargs) + + return decorated + + +@app.route('/api/login', methods=['POST']) +def login(): + data = request.get_json() + + if not data: + return jsonify({ + 'msg': '请提供用户名和密码', + 'code': 400, + }), 400 + + username = data.get('username') + password = data.get('password') + + # 验证用户名密码 + if username == USERNAME and password == PASSWORD: + # 生成token,1小时过期 + token = jwt.encode({ + 'user': username, + 'exp': datetime.datetime.utcnow() + datetime.timedelta(hours=1) + }, app.config['SECRET_KEY'], algorithm='HS256') + + return jsonify({ + 'msg': '登录成功', + 'code': 200, + 'data': { + 'token': token, + 'expires_in': 3600 # 1小时,单位秒 + } + }), 200 + else: + return jsonify({ + 'msg': '用户名或密码错误', + 'code': 401, + }), 401 + + +@app.route('/api/check/') +@token_required +def check_url(url): + status = is_from_missav(url) + if (status): + result = asyncio.run(crawl_missav( + url + )) + return jsonify({ + 'msg': '成功', + 'code': 200, + 'dat': result + }), 200 + else: + return jsonify({ + 'msg': '不是来自missav的链接', + 'code': 500 + }), 200 + + +@app.route('/api/download', methods=['POST']) +# @token_required +def download(): + data = request.get_json() + + if not data: + return jsonify({'error': 'No JSON data provided'}), 400 + + name = data.get('name') + url = data.get('url') + + if not name or not url: + return jsonify({'error': 'Missing name or url parameter'}), 400 + + task_id = downloader.download( + output_filename=f"{name}.mp4", + m3u8_url=url + ) + return jsonify({ + 'msg': '成功', + 'code': 200, + 'dat': task_id + }), 200 + + +@app.route('/api/all-task', methods=['GET']) +# @token_required +def all_task(): + all_tasks = downloader.get_all_tasks() + return jsonify({ + 'msg': '成功', + 'code': 200, + 'data': all_tasks + }), 200 + + +@app.route('/api/progress/', methods=['GET']) +@token_required +def progress(task_id): + progress_info = downloader.get_progress(task_id) + + filename = progress_info['filename'] + progress = progress_info['progress'] # 0~1的浮点数,如0.56表示56% + status = progress_info['status'] + + print(f"文件: {filename}, 进度: {progress:.2%}, 状态: {status}") + return jsonify({ + 'msg': '成功', + 'code': 200, + 'data': {'name': filename, 'progress': progress} + }), 200 + + +def is_from_missav(url): + try: + parsed = urlparse(url) + hostname = parsed.netloc.lower() + return hostname == 'missav.ws' or hostname.endswith('.missav.ws') + except: + return False + + +if __name__ == '__main__': + # 检查环境变量是否设置 + if not USERNAME or not PASSWORD: + print("警告: 请设置环境变量 USER 和 PASSWORD") + + app.run(debug=True, host='0.0.0.0', port=5000) diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..5ca973f --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,61 @@ +FROM python:3.12-slim + +# 安装系统依赖 +RUN apt-get update && apt-get install -y \ + nginx \ + curl \ + wget \ + libglib2.0-0 \ + libnss3 \ + libnspr4 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libcups2 \ + libdrm2 \ + libdbus-1-3 \ + libxkbcommon0 \ + libxcomposite1 \ + libxdamage1 \ + libxfixes3 \ + libxrandr2 \ + libgbm1 \ + libasound2 \ + libpango-1.0-0 \ + libcairo2 \ + libatspi2.0-0 \ + fonts-liberation \ + libnss3-tools \ + xvfb \ + && rm -rf /var/lib/apt/lists/* + +# 设置工作目录 +WORKDIR /app + +# 复制server文件 +COPY ./server/* /app/server/ + +# 复制nginx配置文件 +COPY ./nginx.conf /etc/nginx/nginx.conf + +# 安装Python依赖 +RUN pip install --no-cache-dir -r /app/server/requirements.txt + +# 配置Playwright代理和安装Chromium +RUN playwright install chromium + +# 创建下载目录和nginx运行所需目录 +RUN mkdir -p /app/server/download /var/run/nginx + +# 设置环境变量 +ENV USER=admin +ENV PASSWORD=password +ENV SECRET_KEY='asd78yujncisa32r89' + +# 设置卷 +VOLUME ["/app/server/download"] + +# 暴露端口 +EXPOSE 80 + +# 启动命令 +CMD service nginx start && cd /app/server && xvfb-run -a python3 api.py \ No newline at end of file diff --git a/docker/html/idnex.html b/docker/html/idnex.html new file mode 100644 index 0000000..42550db --- /dev/null +++ b/docker/html/idnex.html @@ -0,0 +1,10 @@ + + + + + 下载器 + + + + + \ No newline at end of file diff --git a/docker/nginx.conf b/docker/nginx.conf new file mode 100644 index 0000000..8314572 --- /dev/null +++ b/docker/nginx.conf @@ -0,0 +1,54 @@ + +worker_processes 1; + +events { + worker_connections 1024; +} +http { + include mime.types; + default_type application/octet-stream; + sendfile on; + keepalive_timeout 65; + map $proxy_protocol_addr $real_ip { + default $remote_addr; + } + server { + listen 4560; + server_name localhost; + client_header_buffer_size 64k; + large_client_header_buffers 8 128k; + client_max_body_size 50m; + + location / { + root /app/html; + try_files $uri $uri/ /index.html; + } + + location /api { + proxy_pass http://localhost:5000/api; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # 跨域配置 + add_header Access-Control-Allow-Origin * always; + add_header Access-Control-Allow-Methods 'GET, POST, OPTIONS, PUT, DELETE' always; + add_header Access-Control-Allow-Headers 'Authorization, Content-Type, X-Requested-With, Accept, Origin' always; + add_header Access-Control-Allow-Credentials true always; + add_header Access-Control-Expose-Headers 'Content-Length, Content-Range' always; + + # 处理OPTIONS预检请求 + if ($request_method = 'OPTIONS') { + add_header Access-Control-Allow-Origin *; + add_header Access-Control-Allow-Methods 'GET, POST, OPTIONS, PUT, DELETE'; + add_header Access-Control-Allow-Headers 'Authorization, Content-Type, X-Requested-With, Accept, Origin'; + add_header Access-Control-Max-Age 86400; + add_header Content-Length 0; + add_header Content-Type text/plain; + return 200; + } +} +} + +} diff --git a/docker/server/api.py b/docker/server/api.py new file mode 100644 index 0000000..57e3969 --- /dev/null +++ b/docker/server/api.py @@ -0,0 +1,180 @@ +import asyncio +from flask import Flask, jsonify, request +import jwt +import datetime +import os +from functools import wraps + +from download import M3U8Downloader +from function import crawl_missav +from urllib.parse import urlparse + +app = Flask(__name__) +app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', 'your-secret-key-here') + +downloader = M3U8Downloader(max_workers=10, output_dir=r"download") + +# 从环境变量获取用户名密码 +USERNAME = os.getenv('USER') +PASSWORD = os.getenv('PASSWORD') + + +def token_required(f): + @wraps(f) + def decorated(*args, **kwargs): + token = request.headers.get('Authorization') + + if not token: + return jsonify({ + 'msg': '请登录', + 'code': 403, + }), 403 + + # 检查token格式 + if token.startswith('Bearer '): + token = token[7:] + + try: + # 解码token + data = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256']) + current_user = data['user'] + except jwt.ExpiredSignatureError: + return jsonify({ + 'msg': '登录已过期,请重新登录', + 'code': 403, + }), 403 + except jwt.InvalidTokenError: + return jsonify({ + 'msg': '无效的token', + 'code': 403, + }), 403 + + return f(*args, **kwargs) + + return decorated + + +@app.route('/api/login', methods=['POST']) +def login(): + data = request.get_json() + + if not data: + return jsonify({ + 'msg': '请提供用户名和密码', + 'code': 400, + }), 400 + + username = data.get('username') + password = data.get('password') + + # 验证用户名密码 + if username == USERNAME and password == PASSWORD: + # 生成token,1小时过期 + token = jwt.encode({ + 'user': username, + 'exp': datetime.datetime.utcnow() + datetime.timedelta(hours=1) + }, app.config['SECRET_KEY'], algorithm='HS256') + + return jsonify({ + 'msg': '登录成功', + 'code': 200, + 'data': { + 'token': token, + 'expires_in': 3600 # 1小时,单位秒 + } + }), 200 + else: + return jsonify({ + 'msg': '用户名或密码错误', + 'code': 401, + }), 401 + + +@app.route('/api/check/') +@token_required +def check_url(url): + status = is_from_missav(url) + if (status): + result = asyncio.run(crawl_missav( + url + )) + return jsonify({ + 'msg': '成功', + 'code': 200, + 'dat': result + }), 200 + else: + return jsonify({ + 'msg': '不是来自missav的链接', + 'code': 500 + }), 200 + + +@app.route('/api/download', methods=['POST']) +@token_required +def download(): + data = request.get_json() + + if not data: + return jsonify({'error': 'No JSON data provided'}), 400 + + name = data.get('name') + url = data.get('url') + + if not name or not url: + return jsonify({'error': 'Missing name or url parameter'}), 400 + + task_id = downloader.download( + output_filename=f"{name}.mp4", + m3u8_url=url + ) + return jsonify({ + 'msg': '成功', + 'code': 200, + 'dat': task_id + }), 200 + + +@app.route('/api/all-task', methods=['GET']) +@token_required +def all_task(): + all_tasks = downloader.get_all_tasks() + return jsonify({ + 'msg': '成功', + 'code': 200, + 'data': all_tasks + }), 200 + + +@app.route('/api/progress/', methods=['GET']) +@token_required +def progress(task_id): + progress_info = downloader.get_progress(task_id) + + filename = progress_info['filename'] + progress = progress_info['progress'] # 0~1的浮点数,如0.56表示56% + status = progress_info['status'] + + print(f"文件: {filename}, 进度: {progress:.2%}, 状态: {status}") + return jsonify({ + 'msg': '成功', + 'code': 200, + 'data': {'name': filename, 'progress': progress} + }), 200 + + +def is_from_missav(url): + try: + parsed = urlparse(url) + hostname = parsed.netloc.lower() + return hostname == 'missav.ws' or hostname.endswith('.missav.ws') + except: + return False + + +if __name__ == '__main__': + # 检查环境变量是否设置 + if not USERNAME or not PASSWORD: + print("警告: 请设置环境变量 USER 和 PASSWORD") + + app.run(debug=True, host='0.0.0.0', port=5000) diff --git a/docker/server/download.py b/docker/server/download.py new file mode 100644 index 0000000..5e3f36d --- /dev/null +++ b/docker/server/download.py @@ -0,0 +1,355 @@ +import os +import time +import threading +import requests +from urllib.parse import urljoin +import m3u8 +from Crypto.Cipher import AES +import concurrent.futures +from pathlib import Path + + +class M3U8Downloader: + def __init__(self, max_workers=5, output_dir="downloads"): + self.max_workers = max_workers + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + + # 存储下载任务状态 + self.tasks = {} + self.lock = threading.Lock() + self.task_counter = 0 + + def get_task_info(self, task_id): + """获取任务信息""" + with self.lock: + return self.tasks.get(task_id, {"status": "not_found"}) + + def list_tasks(self): + """列出所有任务""" + with self.lock: + return {task_id: info for task_id, info in self.tasks.items()} + + def get_all_tasks(self): + """ + 获取全部任务的信息,包括文件名和任务ID + + Returns: + list: 包含所有任务信息的列表,每个元素为字典 + [{'task_id': 'task_1', 'filename': 'video1.mp4', 'status': 'downloading', 'progress': 0.56}, ...] + """ + with self.lock: + all_tasks = [] + for task_id, task_info in self.tasks.items(): + # 计算进度 + progress = 0.0 + if task_info['status'] == 'preparing': + progress = 0.0 + elif task_info['status'] == 'downloading': + if task_info['total_segments'] > 0: + progress = task_info['downloaded_segments'] / task_info['total_segments'] + else: + progress = 0.0 + elif task_info['status'] == 'merging': + progress = 1.0 + elif task_info['status'] == 'completed': + progress = 1.0 + elif task_info['status'] == 'failed': + progress = 0.0 + + all_tasks.append({ + 'task_id': task_id, + 'filename': task_info['output_filename'], + 'status': task_info['status'], + 'progress': round(progress, 4), + 'start_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(task_info.get('start_time', time.time()))) + }) + + # 按开始时间倒序排列,最新的任务在前面 + all_tasks.sort(key=lambda x: x['start_time'], reverse=True) + return all_tasks + + def get_tasks_summary(self): + """ + 获取任务摘要信息 + + Returns: + dict: 包含任务统计信息的字典 + """ + all_tasks = self.get_all_tasks() + + summary = { + 'total': len(all_tasks), + 'preparing': 0, + 'downloading': 0, + 'merging': 0, + 'completed': 0, + 'failed': 0 + } + + for task in all_tasks: + status = task['status'] + if status in summary: + summary[status] += 1 + + return summary + + def download_ts_segment(self, task_info, ts_url, output_path, segment_index): + """下载单个TS片段""" + try: + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + } + + response = requests.get(ts_url, headers=headers, stream=True, timeout=30) + response.raise_for_status() + + ts_data = response.content + + # 如果有加密,进行解密 + if task_info['key'] and task_info['iv']: + cipher = AES.new(task_info['key'], AES.MODE_CBC, task_info['iv']) + ts_data = cipher.decrypt(ts_data) + + with open(output_path, 'wb') as f: + f.write(ts_data) + + # 更新进度 + with self.lock: + if task_info['task_id'] in self.tasks: + self.tasks[task_info['task_id']]['downloaded_segments'] += 1 + + return True + + except Exception as e: + print(f"下载片段 {segment_index} 失败: {e}") + return False + + def get_decryption_key(self, key_uri, iv=None): + """获取解密密钥""" + try: + response = requests.get(key_uri) + response.raise_for_status() + key = response.content + + # 如果IV是十六进制字符串,转换为bytes + if iv and isinstance(iv, str): + if iv.startswith('0x'): + iv = bytes.fromhex(iv[2:]) + else: + iv = bytes.fromhex(iv) + elif not iv: + iv = b'\x00' * 16 # 默认IV + + return key, iv + except Exception as e: + print(f"获取解密密钥失败: {e}") + return None, None + + def _download_m3u8(self, m3u8_url, output_filename, task_id): + """内部下载方法""" + # 初始化任务信息 + task_info = { + 'task_id': task_id, + 'm3u8_url': m3u8_url, + 'output_filename': output_filename, + 'status': 'preparing', + 'total_segments': 0, + 'downloaded_segments': 0, + 'progress': 0.0, + 'output_file': '', + 'start_time': time.time(), + 'key': None, + 'iv': None + } + + with self.lock: + self.tasks[task_id] = task_info + + try: + # 解析M3U8文件 + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + } + + response = requests.get(m3u8_url, headers=headers) + response.raise_for_status() + + m3u8_content = response.text + m3u8_obj = m3u8.loads(m3u8_content) + + # 处理密钥 + key = None + iv = None + if m3u8_obj.keys and m3u8_obj.keys[0]: + key_uri = m3u8_obj.keys[0].uri + if not key_uri.startswith('http'): + key_uri = urljoin(m3u8_url, key_uri) + + key, iv = self.get_decryption_key(key_uri, m3u8_obj.keys[0].iv) + task_info['key'] = key + task_info['iv'] = iv + + # 获取所有TS片段URL + ts_segments = [] + for segment in m3u8_obj.segments: + ts_url = segment.uri + if not ts_url.startswith('http'): + ts_url = urljoin(m3u8_url, ts_url) + ts_segments.append(ts_url) + + task_info['total_segments'] = len(ts_segments) + task_info['status'] = 'downloading' + + # 设置输出文件路径 + output_path = self.output_dir / output_filename + task_info['output_file'] = str(output_path) + + # 创建临时目录存储TS片段 + temp_dir = self.output_dir / f"temp_{task_id}" + temp_dir.mkdir(exist_ok=True) + + print(f"开始下载任务 {task_id}: {len(ts_segments)} 个片段") + + # 使用线程池下载所有TS片段 + with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor: + futures = [] + for i, ts_url in enumerate(ts_segments): + ts_path = temp_dir / f"segment_{i:05d}.ts" + future = executor.submit( + self.download_ts_segment, + task_info, + ts_url, + ts_path, + i + ) + futures.append(future) + + # 等待所有下载完成 + results = [] + for future in concurrent.futures.as_completed(futures): + results.append(future.result()) + + # 检查下载结果 + if not all(results): + task_info['status'] = 'failed' + task_info['error'] = '部分片段下载失败' + task_info['progress'] = 0.0 + print(f"任务 {task_id} 下载失败,部分片段下载失败") + return + + # 合并TS文件 + print(f"开始合并TS文件...") + task_info['status'] = 'merging' + task_info['progress'] = 1.0 + + with open(output_path, 'wb') as outfile: + for i in range(len(ts_segments)): + ts_path = temp_dir / f"segment_{i:05d}.ts" + if ts_path.exists(): + with open(ts_path, 'rb') as infile: + outfile.write(infile.read()) + ts_path.unlink() + + # 清理临时目录 + temp_dir.rmdir() + + task_info['status'] = 'completed' + task_info['progress'] = 1.0 + task_info['end_time'] = time.time() + + print(f"任务 {task_id} 完成: {output_path}") + + except Exception as e: + task_info['status'] = 'failed' + task_info['error'] = str(e) + task_info['progress'] = 0.0 + print(f"任务 {task_id} 失败: {e}") + + def download(self, output_filename, m3u8_url): + """ + 下载M3U8视频 + + Args: + output_filename: 输出文件名(如:video.mp4) + m3u8_url: M3U8文件URL + + Returns: + str: 任务ID + """ + with self.lock: + self.task_counter += 1 + task_id = f"task_{self.task_counter}" + + thread = threading.Thread( + target=self._download_m3u8, + args=(m3u8_url, output_filename, task_id) + ) + thread.daemon = True + thread.start() + + return task_id + + def get_progress(self, task_id): + """ + 获取下载进度 + + Args: + task_id: 任务ID + + Returns: + dict: 包含文件名和进度(0~1浮点数)的字典 + """ + task_info = self.get_task_info(task_id) + + if task_info['status'] == 'not_found': + return {'filename': '', 'progress': 0.0, 'status': 'not_found'} + + progress = 0.0 + if task_info['status'] == 'preparing': + progress = 0.0 + elif task_info['status'] == 'downloading': + if task_info['total_segments'] > 0: + progress = task_info['downloaded_segments'] / task_info['total_segments'] + else: + progress = 0.0 + elif task_info['status'] == 'merging': + progress = 1.0 + elif task_info['status'] == 'completed': + progress = 1.0 + elif task_info['status'] == 'failed': + progress = 0.0 + + return { + 'filename': task_info['output_filename'], + 'progress': round(progress, 4), + 'status': task_info['status'], + 'task_id': task_id, + 'output_file': task_info.get('output_file', ''), + 'downloaded_segments': task_info.get('downloaded_segments', 0), + 'total_segments': task_info.get('total_segments', 0) + } + + def wait_for_completion(self, task_id, timeout=None): + """ + 等待任务完成 + + Args: + task_id: 任务ID + timeout: 超时时间(秒) + + Returns: + bool: 是否成功完成 + """ + start_time = time.time() + while True: + task_info = self.get_task_info(task_id) + + if task_info['status'] == 'completed': + return True + elif task_info['status'] == 'failed': + return False + elif timeout and (time.time() - start_time) > timeout: + return False + + time.sleep(1) \ No newline at end of file diff --git a/docker/server/function.py b/docker/server/function.py new file mode 100644 index 0000000..69875c7 --- /dev/null +++ b/docker/server/function.py @@ -0,0 +1,57 @@ +from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError +import os + + +async def crawl_missav(url): + result = { + 'title': '', + 'url': [], + 'serial_number': '' # 新增字段存储番号 + } + launch_args = { + "headless": False, + "args": ["--disable-blink-features=AutomationControlled"] + } + + # 从环境变量获取代理 + env_proxy = os.getenv('PROXY') + if env_proxy: + proxy = env_proxy + launch_args["proxy"] = {"server": proxy} + + async with async_playwright() as p: + browser = await p.chromium.launch(**launch_args) + page = await browser.new_page() + page.set_default_timeout(60000) + try: + await page.goto(url, wait_until="domcontentloaded") + result['title'] = await page.title() + videos = await page.query_selector_all("video") + + for i, video in enumerate(videos): + src = await video.get_attribute("src") + if (src != None): + result['url'].append(src) + + # 新增:查找包含"番号:"的span标签,并获取其同级下一个标签的文本 + try: + # 查找所有包含"番号:"文本的span标签 + spans_with_serial = await page.query_selector_all('span') + for span in spans_with_serial: + span_text = await span.text_content() + if span_text and '番号:' in span_text: + # 获取span的下一个同级元素 + next_element = await span.evaluate_handle('element => element.nextElementSibling') + if next_element: + next_element_text = await next_element.text_content() + if next_element_text: + result['serial_number'] = next_element_text.strip() + break # 找到第一个就退出 + except Exception as e: + print(f"[INFO] 查找番号时出错: {e}") + + except PlaywrightTimeoutError: + print("[ERROR] 页面加载超时,可能被 Cloudflare 拦截") + finally: + await browser.close() + return result \ No newline at end of file diff --git a/docker/server/requirements.txt b/docker/server/requirements.txt new file mode 100644 index 0000000..6ef3d2b --- /dev/null +++ b/docker/server/requirements.txt @@ -0,0 +1,8 @@ +playwright +flask +requests +m3u8 +pycryptodome +tqdm +pathlib2 +pyjwt \ No newline at end of file diff --git a/download.py b/download.py new file mode 100644 index 0000000..1e0cfe2 --- /dev/null +++ b/download.py @@ -0,0 +1,396 @@ +import os +import time +import threading +import requests +from urllib.parse import urljoin +import m3u8 +from Crypto.Cipher import AES +import concurrent.futures +from pathlib import Path +import shutil + + +class M3U8Downloader: + def __init__(self, max_workers=5, output_dir="downloads", cache_dir="cache"): + self.max_workers = max_workers + self.output_dir = Path(output_dir) + self.cache_dir = Path(cache_dir) + + # 创建目录 + self.output_dir.mkdir(exist_ok=True) + self.cache_dir.mkdir(exist_ok=True) + + # 清空缓存目录 + self.clear_cache() + + # 存储下载任务状态 + self.tasks = {} + self.lock = threading.Lock() + self.task_counter = 0 + + def clear_cache(self): + """清空缓存目录""" + try: + if self.cache_dir.exists(): + # 删除缓存目录中的所有内容 + for item in self.cache_dir.iterdir(): + if item.is_file(): + item.unlink() + elif item.is_dir(): + shutil.rmtree(item) + print(f"缓存目录已清空: {self.cache_dir}") + except Exception as e: + print(f"清空缓存目录失败: {e}") + + def get_task_info(self, task_id): + """获取任务信息""" + with self.lock: + return self.tasks.get(task_id, {"status": "not_found"}) + + def list_tasks(self): + """列出所有任务""" + with self.lock: + return {task_id: info for task_id, info in self.tasks.items()} + + def get_all_tasks(self): + """ + 获取全部任务的信息,包括文件名和任务ID + + Returns: + list: 包含所有任务信息的列表,每个元素为字典 + [{'task_id': 'task_1', 'filename': 'video1.mp4', 'status': 'downloading', 'progress': 0.56}, ...] + """ + with self.lock: + all_tasks = [] + for task_id, task_info in self.tasks.items(): + # 计算进度 + progress = 0.0 + if task_info['status'] == 'preparing': + progress = 0.0 + elif task_info['status'] == 'downloading': + if task_info['total_segments'] > 0: + progress = task_info['downloaded_segments'] / task_info['total_segments'] + else: + progress = 0.0 + elif task_info['status'] == 'merging': + progress = 1.0 + elif task_info['status'] == 'completed': + progress = 1.0 + elif task_info['status'] == 'failed': + progress = 0.0 + + all_tasks.append({ + 'task_id': task_id, + 'filename': task_info['output_filename'], + 'status': task_info['status'], + 'progress': round(progress, 4), + 'start_time': time.strftime('%Y-%m-%d %H:%M:%S', + time.localtime(task_info.get('start_time', time.time()))) + }) + + # 按开始时间倒序排列,最新的任务在前面 + all_tasks.sort(key=lambda x: x['start_time'], reverse=True) + return all_tasks + + def get_tasks_summary(self): + """ + 获取任务摘要信息 + + Returns: + dict: 包含任务统计信息的字典 + """ + all_tasks = self.get_all_tasks() + + summary = { + 'total': len(all_tasks), + 'preparing': 0, + 'downloading': 0, + 'merging': 0, + 'completed': 0, + 'failed': 0 + } + + for task in all_tasks: + status = task['status'] + if status in summary: + summary[status] += 1 + + return summary + + def download_ts_segment(self, task_info, ts_url, output_path, segment_index): + """下载单个TS片段""" + try: + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + } + + response = requests.get(ts_url, headers=headers, stream=True, timeout=30) + response.raise_for_status() + + ts_data = response.content + + # 如果有加密,进行解密 + if task_info['key'] and task_info['iv']: + cipher = AES.new(task_info['key'], AES.MODE_CBC, task_info['iv']) + ts_data = cipher.decrypt(ts_data) + + # 确保缓存目录存在 + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, 'wb') as f: + f.write(ts_data) + + # 更新进度 + with self.lock: + if task_info['task_id'] in self.tasks: + self.tasks[task_info['task_id']]['downloaded_segments'] += 1 + + return True + + except Exception as e: + print(f"下载片段 {segment_index} 失败: {e}") + return False + + def get_decryption_key(self, key_uri, iv=None): + """获取解密密钥""" + try: + response = requests.get(key_uri) + response.raise_for_status() + key = response.content + + # 如果IV是十六进制字符串,转换为bytes + if iv and isinstance(iv, str): + if iv.startswith('0x'): + iv = bytes.fromhex(iv[2:]) + else: + iv = bytes.fromhex(iv) + elif not iv: + iv = b'\x00' * 16 # 默认IV + + return key, iv + except Exception as e: + print(f"获取解密密钥失败: {e}") + return None, None + + def _download_m3u8(self, m3u8_url, output_filename, task_id): + """内部下载方法""" + # 初始化任务信息 + task_info = { + 'task_id': task_id, + 'm3u8_url': m3u8_url, + 'output_filename': output_filename, + 'status': 'preparing', + 'total_segments': 0, + 'downloaded_segments': 0, + 'progress': 0.0, + 'output_file': '', + 'start_time': time.time(), + 'key': None, + 'iv': None + } + + with self.lock: + self.tasks[task_id] = task_info + + try: + # 解析M3U8文件 + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + } + + response = requests.get(m3u8_url, headers=headers) + response.raise_for_status() + + m3u8_content = response.text + m3u8_obj = m3u8.loads(m3u8_content) + + # 处理密钥 + key = None + iv = None + if m3u8_obj.keys and m3u8_obj.keys[0]: + key_uri = m3u8_obj.keys[0].uri + if not key_uri.startswith('http'): + key_uri = urljoin(m3u8_url, key_uri) + + key, iv = self.get_decryption_key(key_uri, m3u8_obj.keys[0].iv) + task_info['key'] = key + task_info['iv'] = iv + + # 获取所有TS片段URL + ts_segments = [] + for segment in m3u8_obj.segments: + ts_url = segment.uri + if not ts_url.startswith('http'): + ts_url = urljoin(m3u8_url, ts_url) + ts_segments.append(ts_url) + + task_info['total_segments'] = len(ts_segments) + task_info['status'] = 'downloading' + + # 设置输出文件路径(在下载目录中) + output_path = self.output_dir / output_filename + task_info['output_file'] = str(output_path) + + # 创建临时目录存储TS片段(在缓存目录中) + temp_dir = self.cache_dir / f"temp_{task_id}" + temp_dir.mkdir(exist_ok=True) + + print(f"开始下载任务 {task_id}: {len(ts_segments)} 个片段") + print(f"缓存目录: {temp_dir}") + print(f"输出文件: {output_path}") + + # 使用线程池下载所有TS片段 + with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor: + futures = [] + for i, ts_url in enumerate(ts_segments): + ts_path = temp_dir / f"segment_{i:05d}.ts" + future = executor.submit( + self.download_ts_segment, + task_info, + ts_url, + ts_path, + i + ) + futures.append(future) + + # 等待所有下载完成 + results = [] + for future in concurrent.futures.as_completed(futures): + results.append(future.result()) + + # 检查下载结果 + if not all(results): + task_info['status'] = 'failed' + task_info['error'] = '部分片段下载失败' + task_info['progress'] = 0.0 + print(f"任务 {task_id} 下载失败,部分片段下载失败") + + # 清理缓存 + if temp_dir.exists(): + shutil.rmtree(temp_dir) + return + + # 合并TS文件到下载目录 + print(f"开始合并TS文件...") + task_info['status'] = 'merging' + task_info['progress'] = 1.0 + + # 确保输出目录存在 + self.output_dir.mkdir(exist_ok=True) + + with open(output_path, 'wb') as outfile: + for i in range(len(ts_segments)): + ts_path = temp_dir / f"segment_{i:05d}.ts" + if ts_path.exists(): + with open(ts_path, 'rb') as infile: + outfile.write(infile.read()) + + # 清理缓存目录 + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + task_info['status'] = 'completed' + task_info['progress'] = 1.0 + task_info['end_time'] = time.time() + + print(f"任务 {task_id} 完成: {output_path}") + + except Exception as e: + task_info['status'] = 'failed' + task_info['error'] = str(e) + task_info['progress'] = 0.0 + + # 清理缓存 + temp_dir = self.cache_dir / f"temp_{task_id}" + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + print(f"任务 {task_id} 失败: {e}") + + def download(self, output_filename, m3u8_url): + """ + 下载M3U8视频 + + Args: + output_filename: 输出文件名(如:video.mp4) + m3u8_url: M3U8文件URL + + Returns: + str: 任务ID + """ + with self.lock: + self.task_counter += 1 + task_id = f"task_{self.task_counter}" + + thread = threading.Thread( + target=self._download_m3u8, + args=(m3u8_url, output_filename, task_id) + ) + thread.daemon = True + thread.start() + + return task_id + + def get_progress(self, task_id): + """ + 获取下载进度 + + Args: + task_id: 任务ID + + Returns: + dict: 包含文件名和进度(0~1浮点数)的字典 + """ + task_info = self.get_task_info(task_id) + + if task_info['status'] == 'not_found': + return {'filename': '', 'progress': 0.0, 'status': 'not_found'} + + progress = 0.0 + if task_info['status'] == 'preparing': + progress = 0.0 + elif task_info['status'] == 'downloading': + if task_info['total_segments'] > 0: + progress = task_info['downloaded_segments'] / task_info['total_segments'] + else: + progress = 0.0 + elif task_info['status'] == 'merging': + progress = 1.0 + elif task_info['status'] == 'completed': + progress = 1.0 + elif task_info['status'] == 'failed': + progress = 0.0 + + return { + 'filename': task_info['output_filename'], + 'progress': round(progress, 4), + 'status': task_info['status'], + 'task_id': task_id, + 'output_file': task_info.get('output_file', ''), + 'downloaded_segments': task_info.get('downloaded_segments', 0), + 'total_segments': task_info.get('total_segments', 0) + } + + def wait_for_completion(self, task_id, timeout=None): + """ + 等待任务完成 + + Args: + task_id: 任务ID + timeout: 超时时间(秒) + + Returns: + bool: 是否成功完成 + """ + start_time = time.time() + while True: + task_info = self.get_task_info(task_id) + + if task_info['status'] == 'completed': + return True + elif task_info['status'] == 'failed': + return False + elif timeout and (time.time() - start_time) > timeout: + return False + + time.sleep(1) \ No newline at end of file diff --git a/function.py b/function.py new file mode 100644 index 0000000..69875c7 --- /dev/null +++ b/function.py @@ -0,0 +1,57 @@ +from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError +import os + + +async def crawl_missav(url): + result = { + 'title': '', + 'url': [], + 'serial_number': '' # 新增字段存储番号 + } + launch_args = { + "headless": False, + "args": ["--disable-blink-features=AutomationControlled"] + } + + # 从环境变量获取代理 + env_proxy = os.getenv('PROXY') + if env_proxy: + proxy = env_proxy + launch_args["proxy"] = {"server": proxy} + + async with async_playwright() as p: + browser = await p.chromium.launch(**launch_args) + page = await browser.new_page() + page.set_default_timeout(60000) + try: + await page.goto(url, wait_until="domcontentloaded") + result['title'] = await page.title() + videos = await page.query_selector_all("video") + + for i, video in enumerate(videos): + src = await video.get_attribute("src") + if (src != None): + result['url'].append(src) + + # 新增:查找包含"番号:"的span标签,并获取其同级下一个标签的文本 + try: + # 查找所有包含"番号:"文本的span标签 + spans_with_serial = await page.query_selector_all('span') + for span in spans_with_serial: + span_text = await span.text_content() + if span_text and '番号:' in span_text: + # 获取span的下一个同级元素 + next_element = await span.evaluate_handle('element => element.nextElementSibling') + if next_element: + next_element_text = await next_element.text_content() + if next_element_text: + result['serial_number'] = next_element_text.strip() + break # 找到第一个就退出 + except Exception as e: + print(f"[INFO] 查找番号时出错: {e}") + + except PlaywrightTimeoutError: + print("[ERROR] 页面加载超时,可能被 Cloudflare 拦截") + finally: + await browser.close() + return result \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..6ef3d2b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +playwright +flask +requests +m3u8 +pycryptodome +tqdm +pathlib2 +pyjwt \ No newline at end of file