完成API下载功能

2025-11-07 11:15:09 +08:00 · 2025-11-07 11:15:09 +08:00 · 00ff77f7d7
commit 00ff77f7d7
parent ebb6384aa4
11 changed files with 1366 additions and 0 deletions
--- a/api.py
+++ b/api.py
@ -0,0 +1,180 @@
+import asyncio
+from flask import Flask, jsonify, request
+import jwt
+import datetime
+import os
+from functools import wraps
+
+from download import M3U8Downloader
+from function import crawl_missav
+from urllib.parse import urlparse
+
+app = Flask(__name__)
+app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', 'your-secret-key-here')
+
+downloader = M3U8Downloader(max_workers=10, output_dir=r"download")
+
+# 从环境变量获取用户名密码
+USERNAME = os.getenv('USER')
+PASSWORD = os.getenv('PASSWORD')
+
+
+def token_required(f):
+    @wraps(f)
+    def decorated(*args, **kwargs):
+        token = request.headers.get(f'Authorization')
+
+        if not token:
+            return jsonify({
+                'msg': '请登录',
+                'code': 403,
+            }), 403
+
+        # 检查token格式
+        if token.startswith('Bearer '):
+            token = token[7:]
+
+        try:
+            # 解码token
+            data = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'])
+            current_user = data['user']
+        except jwt.ExpiredSignatureError:
+            return jsonify({
+                'msg': '登录已过期，请重新登录',
+                'code': 403,
+            }), 403
+        except jwt.InvalidTokenError:
+            return jsonify({
+                'msg': '无效的token',
+                'code': 403,
+            }), 403
+
+        return f(*args, **kwargs)
+
+    return decorated
+
+
+@app.route('/api/login', methods=['POST'])
+def login():
+    data = request.get_json()
+
+    if not data:
+        return jsonify({
+            'msg': '请提供用户名和密码',
+            'code': 400,
+        }), 400
+
+    username = data.get('username')
+    password = data.get('password')
+
+    # 验证用户名密码
+    if username == USERNAME and password == PASSWORD:
+        # 生成token，1小时过期
+        token = jwt.encode({
+            'user': username,
+            'exp': datetime.datetime.utcnow() + datetime.timedelta(hours=1)
+        }, app.config['SECRET_KEY'], algorithm='HS256')
+
+        return jsonify({
+            'msg': '登录成功',
+            'code': 200,
+            'data': {
+                'token': token,
+                'expires_in': 3600  # 1小时，单位秒
+            }
+        }), 200
+    else:
+        return jsonify({
+            'msg': '用户名或密码错误',
+            'code': 401,
+        }), 401
+
+
+@app.route('/api/check/<path:url>')
+@token_required
+def check_url(url):
+    status = is_from_missav(url)
+    if (status):
+        result = asyncio.run(crawl_missav(
+            url
+        ))
+        return jsonify({
+            'msg': '成功',
+            'code': 200,
+            'dat': result
+        }), 200
+    else:
+        return jsonify({
+            'msg': '不是来自missav的链接',
+            'code': 500
+        }), 200
+
+
+@app.route('/api/download', methods=['POST'])
+# @token_required
+def download():
+    data = request.get_json()
+
+    if not data:
+        return jsonify({'error': 'No JSON data provided'}), 400
+
+    name = data.get('name')
+    url = data.get('url')
+
+    if not name or not url:
+        return jsonify({'error': 'Missing name or url parameter'}), 400
+
+    task_id = downloader.download(
+        output_filename=f"{name}.mp4",
+        m3u8_url=url
+    )
+    return jsonify({
+        'msg': '成功',
+        'code': 200,
+        'dat': task_id
+    }), 200
+
+
+@app.route('/api/all-task', methods=['GET'])
+# @token_required
+def all_task():
+    all_tasks = downloader.get_all_tasks()
+    return jsonify({
+        'msg': '成功',
+        'code': 200,
+        'data': all_tasks
+    }), 200
+
+
+@app.route('/api/progress/<path:task_id>', methods=['GET'])
+@token_required
+def progress(task_id):
+    progress_info = downloader.get_progress(task_id)
+
+    filename = progress_info['filename']
+    progress = progress_info['progress']  # 0~1的浮点数，如0.56表示56%
+    status = progress_info['status']
+
+    print(f"文件: {filename}, 进度: {progress:.2%}, 状态: {status}")
+    return jsonify({
+        'msg': '成功',
+        'code': 200,
+        'data': {'name': filename, 'progress': progress}
+    }), 200
+
+
+def is_from_missav(url):
+    try:
+        parsed = urlparse(url)
+        hostname = parsed.netloc.lower()
+        return hostname == 'missav.ws' or hostname.endswith('.missav.ws')
+    except:
+        return False
+
+
+if __name__ == '__main__':
+    # 检查环境变量是否设置
+    if not USERNAME or not PASSWORD:
+        print("警告: 请设置环境变量 USER 和 PASSWORD")
+
+    app.run(debug=True, host='0.0.0.0', port=5000)
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -0,0 +1,61 @@
+FROM python:3.12-slim
+
+# 安装系统依赖
+RUN apt-get update && apt-get install -y \
+    nginx \
+    curl \
+    wget \
+    libglib2.0-0 \
+    libnss3 \
+    libnspr4 \
+    libatk1.0-0 \
+    libatk-bridge2.0-0 \
+    libcups2 \
+    libdrm2 \
+    libdbus-1-3 \
+    libxkbcommon0 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxfixes3 \
+    libxrandr2 \
+    libgbm1 \
+    libasound2 \
+    libpango-1.0-0 \
+    libcairo2 \
+    libatspi2.0-0 \
+    fonts-liberation \
+    libnss3-tools \
+    xvfb \
+    && rm -rf /var/lib/apt/lists/*
+
+# 设置工作目录
+WORKDIR /app
+
+# 复制server文件
+COPY ./server/* /app/server/
+
+# 复制nginx配置文件
+COPY ./nginx.conf /etc/nginx/nginx.conf
+
+# 安装Python依赖
+RUN pip install --no-cache-dir -r /app/server/requirements.txt
+
+# 配置Playwright代理和安装Chromium
+RUN playwright install chromium
+
+# 创建下载目录和nginx运行所需目录
+RUN mkdir -p /app/server/download /var/run/nginx
+
+# 设置环境变量
+ENV USER=admin
+ENV PASSWORD=password
+ENV SECRET_KEY='asd78yujncisa32r89'
+
+# 设置卷
+VOLUME ["/app/server/download"]
+
+# 暴露端口
+EXPOSE 80
+
+# 启动命令
+CMD service nginx start && cd /app/server && xvfb-run -a python3 api.py
--- a/docker/html/idnex.html
+++ b/docker/html/idnex.html
@ -0,0 +1,10 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>下载器</title>
+</head>
+<body>
+
+</body>
+</html>
--- a/docker/nginx.conf
+++ b/docker/nginx.conf
@ -0,0 +1,54 @@
+
+worker_processes  1;
+
+events {
+    worker_connections  1024;
+}
+http {
+    include       mime.types;
+    default_type  application/octet-stream;
+    sendfile        on;
+    keepalive_timeout  65;
+    map $proxy_protocol_addr $real_ip {
+            default $remote_addr;
+        }
+  server {
+    listen 4560;
+    server_name localhost;
+    client_header_buffer_size 64k;
+    large_client_header_buffers 8 128k;
+    client_max_body_size 50m;
+
+     location / {
+            root /app/html;
+            try_files $uri $uri/ /index.html;
+        }
+
+     location /api {
+    proxy_pass http://localhost:5000/api;
+    proxy_set_header Host $host;
+    proxy_set_header X-Real-IP $remote_addr;
+    proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    proxy_set_header X-Forwarded-Proto $scheme;
+
+    # 跨域配置
+    add_header Access-Control-Allow-Origin * always;
+    add_header Access-Control-Allow-Methods 'GET, POST, OPTIONS, PUT, DELETE' always;
+    add_header Access-Control-Allow-Headers 'Authorization, Content-Type, X-Requested-With, Accept, Origin' always;
+    add_header Access-Control-Allow-Credentials true always;
+    add_header Access-Control-Expose-Headers 'Content-Length, Content-Range' always;
+
+    # 处理OPTIONS预检请求
+    if ($request_method = 'OPTIONS') {
+        add_header Access-Control-Allow-Origin *;
+        add_header Access-Control-Allow-Methods 'GET, POST, OPTIONS, PUT, DELETE';
+        add_header Access-Control-Allow-Headers 'Authorization, Content-Type, X-Requested-With, Accept, Origin';
+        add_header Access-Control-Max-Age 86400;
+        add_header Content-Length 0;
+        add_header Content-Type text/plain;
+        return 200;
+    }
+}
+}
+
+}
--- a/docker/server/api.py
+++ b/docker/server/api.py
@ -0,0 +1,180 @@
+import asyncio
+from flask import Flask, jsonify, request
+import jwt
+import datetime
+import os
+from functools import wraps
+
+from download import M3U8Downloader
+from function import crawl_missav
+from urllib.parse import urlparse
+
+app = Flask(__name__)
+app.config['SECRET_KEY'] = os.getenv('SECRET_KEY', 'your-secret-key-here')
+
+downloader = M3U8Downloader(max_workers=10, output_dir=r"download")
+
+# 从环境变量获取用户名密码
+USERNAME = os.getenv('USER')
+PASSWORD = os.getenv('PASSWORD')
+
+
+def token_required(f):
+    @wraps(f)
+    def decorated(*args, **kwargs):
+        token = request.headers.get('Authorization')
+
+        if not token:
+            return jsonify({
+                'msg': '请登录',
+                'code': 403,
+            }), 403
+
+        # 检查token格式
+        if token.startswith('Bearer '):
+            token = token[7:]
+
+        try:
+            # 解码token
+            data = jwt.decode(token, app.config['SECRET_KEY'], algorithms=['HS256'])
+            current_user = data['user']
+        except jwt.ExpiredSignatureError:
+            return jsonify({
+                'msg': '登录已过期，请重新登录',
+                'code': 403,
+            }), 403
+        except jwt.InvalidTokenError:
+            return jsonify({
+                'msg': '无效的token',
+                'code': 403,
+            }), 403
+
+        return f(*args, **kwargs)
+
+    return decorated
+
+
+@app.route('/api/login', methods=['POST'])
+def login():
+    data = request.get_json()
+
+    if not data:
+        return jsonify({
+            'msg': '请提供用户名和密码',
+            'code': 400,
+        }), 400
+
+    username = data.get('username')
+    password = data.get('password')
+
+    # 验证用户名密码
+    if username == USERNAME and password == PASSWORD:
+        # 生成token，1小时过期
+        token = jwt.encode({
+            'user': username,
+            'exp': datetime.datetime.utcnow() + datetime.timedelta(hours=1)
+        }, app.config['SECRET_KEY'], algorithm='HS256')
+
+        return jsonify({
+            'msg': '登录成功',
+            'code': 200,
+            'data': {
+                'token': token,
+                'expires_in': 3600  # 1小时，单位秒
+            }
+        }), 200
+    else:
+        return jsonify({
+            'msg': '用户名或密码错误',
+            'code': 401,
+        }), 401
+
+
+@app.route('/api/check/<path:url>')
+@token_required
+def check_url(url):
+    status = is_from_missav(url)
+    if (status):
+        result = asyncio.run(crawl_missav(
+            url
+        ))
+        return jsonify({
+            'msg': '成功',
+            'code': 200,
+            'dat': result
+        }), 200
+    else:
+        return jsonify({
+            'msg': '不是来自missav的链接',
+            'code': 500
+        }), 200
+
+
+@app.route('/api/download', methods=['POST'])
+@token_required
+def download():
+    data = request.get_json()
+
+    if not data:
+        return jsonify({'error': 'No JSON data provided'}), 400
+
+    name = data.get('name')
+    url = data.get('url')
+
+    if not name or not url:
+        return jsonify({'error': 'Missing name or url parameter'}), 400
+
+    task_id = downloader.download(
+        output_filename=f"{name}.mp4",
+        m3u8_url=url
+    )
+    return jsonify({
+        'msg': '成功',
+        'code': 200,
+        'dat': task_id
+    }), 200
+
+
+@app.route('/api/all-task', methods=['GET'])
+@token_required
+def all_task():
+    all_tasks = downloader.get_all_tasks()
+    return jsonify({
+        'msg': '成功',
+        'code': 200,
+        'data': all_tasks
+    }), 200
+
+
+@app.route('/api/progress/<path:task_id>', methods=['GET'])
+@token_required
+def progress(task_id):
+    progress_info = downloader.get_progress(task_id)
+
+    filename = progress_info['filename']
+    progress = progress_info['progress']  # 0~1的浮点数，如0.56表示56%
+    status = progress_info['status']
+
+    print(f"文件: {filename}, 进度: {progress:.2%}, 状态: {status}")
+    return jsonify({
+        'msg': '成功',
+        'code': 200,
+        'data': {'name': filename, 'progress': progress}
+    }), 200
+
+
+def is_from_missav(url):
+    try:
+        parsed = urlparse(url)
+        hostname = parsed.netloc.lower()
+        return hostname == 'missav.ws' or hostname.endswith('.missav.ws')
+    except:
+        return False
+
+
+if __name__ == '__main__':
+    # 检查环境变量是否设置
+    if not USERNAME or not PASSWORD:
+        print("警告: 请设置环境变量 USER 和 PASSWORD")
+
+    app.run(debug=True, host='0.0.0.0', port=5000)
--- a/docker/server/download.py
+++ b/docker/server/download.py
@ -0,0 +1,355 @@
+import os
+import time
+import threading
+import requests
+from urllib.parse import urljoin
+import m3u8
+from Crypto.Cipher import AES
+import concurrent.futures
+from pathlib import Path
+
+
+class M3U8Downloader:
+    def __init__(self, max_workers=5, output_dir="downloads"):
+        self.max_workers = max_workers
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(exist_ok=True)
+
+        # 存储下载任务状态
+        self.tasks = {}
+        self.lock = threading.Lock()
+        self.task_counter = 0
+
+    def get_task_info(self, task_id):
+        """获取任务信息"""
+        with self.lock:
+            return self.tasks.get(task_id, {"status": "not_found"})
+
+    def list_tasks(self):
+        """列出所有任务"""
+        with self.lock:
+            return {task_id: info for task_id, info in self.tasks.items()}
+
+    def get_all_tasks(self):
+        """
+        获取全部任务的信息，包括文件名和任务ID
+
+        Returns:
+            list: 包含所有任务信息的列表，每个元素为字典
+            [{'task_id': 'task_1', 'filename': 'video1.mp4', 'status': 'downloading', 'progress': 0.56}, ...]
+        """
+        with self.lock:
+            all_tasks = []
+            for task_id, task_info in self.tasks.items():
+                # 计算进度
+                progress = 0.0
+                if task_info['status'] == 'preparing':
+                    progress = 0.0
+                elif task_info['status'] == 'downloading':
+                    if task_info['total_segments'] > 0:
+                        progress = task_info['downloaded_segments'] / task_info['total_segments']
+                    else:
+                        progress = 0.0
+                elif task_info['status'] == 'merging':
+                    progress = 1.0
+                elif task_info['status'] == 'completed':
+                    progress = 1.0
+                elif task_info['status'] == 'failed':
+                    progress = 0.0
+
+                all_tasks.append({
+                    'task_id': task_id,
+                    'filename': task_info['output_filename'],
+                    'status': task_info['status'],
+                    'progress': round(progress, 4),
+                    'start_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(task_info.get('start_time', time.time())))
+                })
+
+            # 按开始时间倒序排列，最新的任务在前面
+            all_tasks.sort(key=lambda x: x['start_time'], reverse=True)
+            return all_tasks
+
+    def get_tasks_summary(self):
+        """
+        获取任务摘要信息
+
+        Returns:
+            dict: 包含任务统计信息的字典
+        """
+        all_tasks = self.get_all_tasks()
+
+        summary = {
+            'total': len(all_tasks),
+            'preparing': 0,
+            'downloading': 0,
+            'merging': 0,
+            'completed': 0,
+            'failed': 0
+        }
+
+        for task in all_tasks:
+            status = task['status']
+            if status in summary:
+                summary[status] += 1
+
+        return summary
+
+    def download_ts_segment(self, task_info, ts_url, output_path, segment_index):
+        """下载单个TS片段"""
+        try:
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+
+            response = requests.get(ts_url, headers=headers, stream=True, timeout=30)
+            response.raise_for_status()
+
+            ts_data = response.content
+
+            # 如果有加密，进行解密
+            if task_info['key'] and task_info['iv']:
+                cipher = AES.new(task_info['key'], AES.MODE_CBC, task_info['iv'])
+                ts_data = cipher.decrypt(ts_data)
+
+            with open(output_path, 'wb') as f:
+                f.write(ts_data)
+
+            # 更新进度
+            with self.lock:
+                if task_info['task_id'] in self.tasks:
+                    self.tasks[task_info['task_id']]['downloaded_segments'] += 1
+
+            return True
+
+        except Exception as e:
+            print(f"下载片段 {segment_index} 失败: {e}")
+            return False
+
+    def get_decryption_key(self, key_uri, iv=None):
+        """获取解密密钥"""
+        try:
+            response = requests.get(key_uri)
+            response.raise_for_status()
+            key = response.content
+
+            # 如果IV是十六进制字符串，转换为bytes
+            if iv and isinstance(iv, str):
+                if iv.startswith('0x'):
+                    iv = bytes.fromhex(iv[2:])
+                else:
+                    iv = bytes.fromhex(iv)
+            elif not iv:
+                iv = b'\x00' * 16  # 默认IV
+
+            return key, iv
+        except Exception as e:
+            print(f"获取解密密钥失败: {e}")
+            return None, None
+
+    def _download_m3u8(self, m3u8_url, output_filename, task_id):
+        """内部下载方法"""
+        # 初始化任务信息
+        task_info = {
+            'task_id': task_id,
+            'm3u8_url': m3u8_url,
+            'output_filename': output_filename,
+            'status': 'preparing',
+            'total_segments': 0,
+            'downloaded_segments': 0,
+            'progress': 0.0,
+            'output_file': '',
+            'start_time': time.time(),
+            'key': None,
+            'iv': None
+        }
+
+        with self.lock:
+            self.tasks[task_id] = task_info
+
+        try:
+            # 解析M3U8文件
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+
+            response = requests.get(m3u8_url, headers=headers)
+            response.raise_for_status()
+
+            m3u8_content = response.text
+            m3u8_obj = m3u8.loads(m3u8_content)
+
+            # 处理密钥
+            key = None
+            iv = None
+            if m3u8_obj.keys and m3u8_obj.keys[0]:
+                key_uri = m3u8_obj.keys[0].uri
+                if not key_uri.startswith('http'):
+                    key_uri = urljoin(m3u8_url, key_uri)
+
+                key, iv = self.get_decryption_key(key_uri, m3u8_obj.keys[0].iv)
+                task_info['key'] = key
+                task_info['iv'] = iv
+
+            # 获取所有TS片段URL
+            ts_segments = []
+            for segment in m3u8_obj.segments:
+                ts_url = segment.uri
+                if not ts_url.startswith('http'):
+                    ts_url = urljoin(m3u8_url, ts_url)
+                ts_segments.append(ts_url)
+
+            task_info['total_segments'] = len(ts_segments)
+            task_info['status'] = 'downloading'
+
+            # 设置输出文件路径
+            output_path = self.output_dir / output_filename
+            task_info['output_file'] = str(output_path)
+
+            # 创建临时目录存储TS片段
+            temp_dir = self.output_dir / f"temp_{task_id}"
+            temp_dir.mkdir(exist_ok=True)
+
+            print(f"开始下载任务 {task_id}: {len(ts_segments)} 个片段")
+
+            # 使用线程池下载所有TS片段
+            with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                futures = []
+                for i, ts_url in enumerate(ts_segments):
+                    ts_path = temp_dir / f"segment_{i:05d}.ts"
+                    future = executor.submit(
+                        self.download_ts_segment,
+                        task_info,
+                        ts_url,
+                        ts_path,
+                        i
+                    )
+                    futures.append(future)
+
+                # 等待所有下载完成
+                results = []
+                for future in concurrent.futures.as_completed(futures):
+                    results.append(future.result())
+
+            # 检查下载结果
+            if not all(results):
+                task_info['status'] = 'failed'
+                task_info['error'] = '部分片段下载失败'
+                task_info['progress'] = 0.0
+                print(f"任务 {task_id} 下载失败，部分片段下载失败")
+                return
+
+            # 合并TS文件
+            print(f"开始合并TS文件...")
+            task_info['status'] = 'merging'
+            task_info['progress'] = 1.0
+
+            with open(output_path, 'wb') as outfile:
+                for i in range(len(ts_segments)):
+                    ts_path = temp_dir / f"segment_{i:05d}.ts"
+                    if ts_path.exists():
+                        with open(ts_path, 'rb') as infile:
+                            outfile.write(infile.read())
+                        ts_path.unlink()
+
+            # 清理临时目录
+            temp_dir.rmdir()
+
+            task_info['status'] = 'completed'
+            task_info['progress'] = 1.0
+            task_info['end_time'] = time.time()
+
+            print(f"任务 {task_id} 完成: {output_path}")
+
+        except Exception as e:
+            task_info['status'] = 'failed'
+            task_info['error'] = str(e)
+            task_info['progress'] = 0.0
+            print(f"任务 {task_id} 失败: {e}")
+
+    def download(self, output_filename, m3u8_url):
+        """
+        下载M3U8视频
+
+        Args:
+            output_filename: 输出文件名（如：video.mp4）
+            m3u8_url: M3U8文件URL
+
+        Returns:
+            str: 任务ID
+        """
+        with self.lock:
+            self.task_counter += 1
+            task_id = f"task_{self.task_counter}"
+
+        thread = threading.Thread(
+            target=self._download_m3u8,
+            args=(m3u8_url, output_filename, task_id)
+        )
+        thread.daemon = True
+        thread.start()
+
+        return task_id
+
+    def get_progress(self, task_id):
+        """
+        获取下载进度
+
+        Args:
+            task_id: 任务ID
+
+        Returns:
+            dict: 包含文件名和进度(0~1浮点数)的字典
+        """
+        task_info = self.get_task_info(task_id)
+
+        if task_info['status'] == 'not_found':
+            return {'filename': '', 'progress': 0.0, 'status': 'not_found'}
+
+        progress = 0.0
+        if task_info['status'] == 'preparing':
+            progress = 0.0
+        elif task_info['status'] == 'downloading':
+            if task_info['total_segments'] > 0:
+                progress = task_info['downloaded_segments'] / task_info['total_segments']
+            else:
+                progress = 0.0
+        elif task_info['status'] == 'merging':
+            progress = 1.0
+        elif task_info['status'] == 'completed':
+            progress = 1.0
+        elif task_info['status'] == 'failed':
+            progress = 0.0
+
+        return {
+            'filename': task_info['output_filename'],
+            'progress': round(progress, 4),
+            'status': task_info['status'],
+            'task_id': task_id,
+            'output_file': task_info.get('output_file', ''),
+            'downloaded_segments': task_info.get('downloaded_segments', 0),
+            'total_segments': task_info.get('total_segments', 0)
+        }
+
+    def wait_for_completion(self, task_id, timeout=None):
+        """
+        等待任务完成
+
+        Args:
+            task_id: 任务ID
+            timeout: 超时时间（秒）
+
+        Returns:
+            bool: 是否成功完成
+        """
+        start_time = time.time()
+        while True:
+            task_info = self.get_task_info(task_id)
+
+            if task_info['status'] == 'completed':
+                return True
+            elif task_info['status'] == 'failed':
+                return False
+            elif timeout and (time.time() - start_time) > timeout:
+                return False
+
+            time.sleep(1)
--- a/docker/server/function.py
+++ b/docker/server/function.py
@ -0,0 +1,57 @@
+from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
+import os
+
+
+async def crawl_missav(url):
+    result = {
+        'title': '',
+        'url': [],
+        'serial_number': ''  # 新增字段存储番号
+    }
+    launch_args = {
+        "headless": False,
+        "args": ["--disable-blink-features=AutomationControlled"]
+    }
+
+    # 从环境变量获取代理
+    env_proxy = os.getenv('PROXY')
+    if env_proxy:
+        proxy = env_proxy
+        launch_args["proxy"] = {"server": proxy}
+
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(**launch_args)
+        page = await browser.new_page()
+        page.set_default_timeout(60000)
+        try:
+            await page.goto(url, wait_until="domcontentloaded")
+            result['title'] = await page.title()
+            videos = await page.query_selector_all("video")
+
+            for i, video in enumerate(videos):
+                src = await video.get_attribute("src")
+                if (src != None):
+                    result['url'].append(src)
+
+            # 新增：查找包含"番号:"的span标签，并获取其同级下一个标签的文本
+            try:
+                # 查找所有包含"番号:"文本的span标签
+                spans_with_serial = await page.query_selector_all('span')
+                for span in spans_with_serial:
+                    span_text = await span.text_content()
+                    if span_text and '番号:' in span_text:
+                        # 获取span的下一个同级元素
+                        next_element = await span.evaluate_handle('element => element.nextElementSibling')
+                        if next_element:
+                            next_element_text = await next_element.text_content()
+                            if next_element_text:
+                                result['serial_number'] = next_element_text.strip()
+                                break  # 找到第一个就退出
+            except Exception as e:
+                print(f"[INFO] 查找番号时出错: {e}")
+
+        except PlaywrightTimeoutError:
+            print("[ERROR] 页面加载超时，可能被 Cloudflare 拦截")
+        finally:
+            await browser.close()
+            return result
--- a/docker/server/requirements.txt
+++ b/docker/server/requirements.txt
@ -0,0 +1,8 @@
+playwright
+flask
+requests
+m3u8
+pycryptodome
+tqdm
+pathlib2
+pyjwt
--- a/download.py
+++ b/download.py
@ -0,0 +1,396 @@
+import os
+import time
+import threading
+import requests
+from urllib.parse import urljoin
+import m3u8
+from Crypto.Cipher import AES
+import concurrent.futures
+from pathlib import Path
+import shutil
+
+
+class M3U8Downloader:
+    def __init__(self, max_workers=5, output_dir="downloads", cache_dir="cache"):
+        self.max_workers = max_workers
+        self.output_dir = Path(output_dir)
+        self.cache_dir = Path(cache_dir)
+
+        # 创建目录
+        self.output_dir.mkdir(exist_ok=True)
+        self.cache_dir.mkdir(exist_ok=True)
+
+        # 清空缓存目录
+        self.clear_cache()
+
+        # 存储下载任务状态
+        self.tasks = {}
+        self.lock = threading.Lock()
+        self.task_counter = 0
+
+    def clear_cache(self):
+        """清空缓存目录"""
+        try:
+            if self.cache_dir.exists():
+                # 删除缓存目录中的所有内容
+                for item in self.cache_dir.iterdir():
+                    if item.is_file():
+                        item.unlink()
+                    elif item.is_dir():
+                        shutil.rmtree(item)
+            print(f"缓存目录已清空: {self.cache_dir}")
+        except Exception as e:
+            print(f"清空缓存目录失败: {e}")
+
+    def get_task_info(self, task_id):
+        """获取任务信息"""
+        with self.lock:
+            return self.tasks.get(task_id, {"status": "not_found"})
+
+    def list_tasks(self):
+        """列出所有任务"""
+        with self.lock:
+            return {task_id: info for task_id, info in self.tasks.items()}
+
+    def get_all_tasks(self):
+        """
+        获取全部任务的信息，包括文件名和任务ID
+
+        Returns:
+            list: 包含所有任务信息的列表，每个元素为字典
+            [{'task_id': 'task_1', 'filename': 'video1.mp4', 'status': 'downloading', 'progress': 0.56}, ...]
+        """
+        with self.lock:
+            all_tasks = []
+            for task_id, task_info in self.tasks.items():
+                # 计算进度
+                progress = 0.0
+                if task_info['status'] == 'preparing':
+                    progress = 0.0
+                elif task_info['status'] == 'downloading':
+                    if task_info['total_segments'] > 0:
+                        progress = task_info['downloaded_segments'] / task_info['total_segments']
+                    else:
+                        progress = 0.0
+                elif task_info['status'] == 'merging':
+                    progress = 1.0
+                elif task_info['status'] == 'completed':
+                    progress = 1.0
+                elif task_info['status'] == 'failed':
+                    progress = 0.0
+
+                all_tasks.append({
+                    'task_id': task_id,
+                    'filename': task_info['output_filename'],
+                    'status': task_info['status'],
+                    'progress': round(progress, 4),
+                    'start_time': time.strftime('%Y-%m-%d %H:%M:%S',
+                                                time.localtime(task_info.get('start_time', time.time())))
+                })
+
+            # 按开始时间倒序排列，最新的任务在前面
+            all_tasks.sort(key=lambda x: x['start_time'], reverse=True)
+            return all_tasks
+
+    def get_tasks_summary(self):
+        """
+        获取任务摘要信息
+
+        Returns:
+            dict: 包含任务统计信息的字典
+        """
+        all_tasks = self.get_all_tasks()
+
+        summary = {
+            'total': len(all_tasks),
+            'preparing': 0,
+            'downloading': 0,
+            'merging': 0,
+            'completed': 0,
+            'failed': 0
+        }
+
+        for task in all_tasks:
+            status = task['status']
+            if status in summary:
+                summary[status] += 1
+
+        return summary
+
+    def download_ts_segment(self, task_info, ts_url, output_path, segment_index):
+        """下载单个TS片段"""
+        try:
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+
+            response = requests.get(ts_url, headers=headers, stream=True, timeout=30)
+            response.raise_for_status()
+
+            ts_data = response.content
+
+            # 如果有加密，进行解密
+            if task_info['key'] and task_info['iv']:
+                cipher = AES.new(task_info['key'], AES.MODE_CBC, task_info['iv'])
+                ts_data = cipher.decrypt(ts_data)
+
+            # 确保缓存目录存在
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+
+            with open(output_path, 'wb') as f:
+                f.write(ts_data)
+
+            # 更新进度
+            with self.lock:
+                if task_info['task_id'] in self.tasks:
+                    self.tasks[task_info['task_id']]['downloaded_segments'] += 1
+
+            return True
+
+        except Exception as e:
+            print(f"下载片段 {segment_index} 失败: {e}")
+            return False
+
+    def get_decryption_key(self, key_uri, iv=None):
+        """获取解密密钥"""
+        try:
+            response = requests.get(key_uri)
+            response.raise_for_status()
+            key = response.content
+
+            # 如果IV是十六进制字符串，转换为bytes
+            if iv and isinstance(iv, str):
+                if iv.startswith('0x'):
+                    iv = bytes.fromhex(iv[2:])
+                else:
+                    iv = bytes.fromhex(iv)
+            elif not iv:
+                iv = b'\x00' * 16  # 默认IV
+
+            return key, iv
+        except Exception as e:
+            print(f"获取解密密钥失败: {e}")
+            return None, None
+
+    def _download_m3u8(self, m3u8_url, output_filename, task_id):
+        """内部下载方法"""
+        # 初始化任务信息
+        task_info = {
+            'task_id': task_id,
+            'm3u8_url': m3u8_url,
+            'output_filename': output_filename,
+            'status': 'preparing',
+            'total_segments': 0,
+            'downloaded_segments': 0,
+            'progress': 0.0,
+            'output_file': '',
+            'start_time': time.time(),
+            'key': None,
+            'iv': None
+        }
+
+        with self.lock:
+            self.tasks[task_id] = task_info
+
+        try:
+            # 解析M3U8文件
+            headers = {
+                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+            }
+
+            response = requests.get(m3u8_url, headers=headers)
+            response.raise_for_status()
+
+            m3u8_content = response.text
+            m3u8_obj = m3u8.loads(m3u8_content)
+
+            # 处理密钥
+            key = None
+            iv = None
+            if m3u8_obj.keys and m3u8_obj.keys[0]:
+                key_uri = m3u8_obj.keys[0].uri
+                if not key_uri.startswith('http'):
+                    key_uri = urljoin(m3u8_url, key_uri)
+
+                key, iv = self.get_decryption_key(key_uri, m3u8_obj.keys[0].iv)
+                task_info['key'] = key
+                task_info['iv'] = iv
+
+            # 获取所有TS片段URL
+            ts_segments = []
+            for segment in m3u8_obj.segments:
+                ts_url = segment.uri
+                if not ts_url.startswith('http'):
+                    ts_url = urljoin(m3u8_url, ts_url)
+                ts_segments.append(ts_url)
+
+            task_info['total_segments'] = len(ts_segments)
+            task_info['status'] = 'downloading'
+
+            # 设置输出文件路径（在下载目录中）
+            output_path = self.output_dir / output_filename
+            task_info['output_file'] = str(output_path)
+
+            # 创建临时目录存储TS片段（在缓存目录中）
+            temp_dir = self.cache_dir / f"temp_{task_id}"
+            temp_dir.mkdir(exist_ok=True)
+
+            print(f"开始下载任务 {task_id}: {len(ts_segments)} 个片段")
+            print(f"缓存目录: {temp_dir}")
+            print(f"输出文件: {output_path}")
+
+            # 使用线程池下载所有TS片段
+            with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+                futures = []
+                for i, ts_url in enumerate(ts_segments):
+                    ts_path = temp_dir / f"segment_{i:05d}.ts"
+                    future = executor.submit(
+                        self.download_ts_segment,
+                        task_info,
+                        ts_url,
+                        ts_path,
+                        i
+                    )
+                    futures.append(future)
+
+                # 等待所有下载完成
+                results = []
+                for future in concurrent.futures.as_completed(futures):
+                    results.append(future.result())
+
+            # 检查下载结果
+            if not all(results):
+                task_info['status'] = 'failed'
+                task_info['error'] = '部分片段下载失败'
+                task_info['progress'] = 0.0
+                print(f"任务 {task_id} 下载失败，部分片段下载失败")
+
+                # 清理缓存
+                if temp_dir.exists():
+                    shutil.rmtree(temp_dir)
+                return
+
+            # 合并TS文件到下载目录
+            print(f"开始合并TS文件...")
+            task_info['status'] = 'merging'
+            task_info['progress'] = 1.0
+
+            # 确保输出目录存在
+            self.output_dir.mkdir(exist_ok=True)
+
+            with open(output_path, 'wb') as outfile:
+                for i in range(len(ts_segments)):
+                    ts_path = temp_dir / f"segment_{i:05d}.ts"
+                    if ts_path.exists():
+                        with open(ts_path, 'rb') as infile:
+                            outfile.write(infile.read())
+
+            # 清理缓存目录
+            if temp_dir.exists():
+                shutil.rmtree(temp_dir)
+
+            task_info['status'] = 'completed'
+            task_info['progress'] = 1.0
+            task_info['end_time'] = time.time()
+
+            print(f"任务 {task_id} 完成: {output_path}")
+
+        except Exception as e:
+            task_info['status'] = 'failed'
+            task_info['error'] = str(e)
+            task_info['progress'] = 0.0
+
+            # 清理缓存
+            temp_dir = self.cache_dir / f"temp_{task_id}"
+            if temp_dir.exists():
+                shutil.rmtree(temp_dir)
+
+            print(f"任务 {task_id} 失败: {e}")
+
+    def download(self, output_filename, m3u8_url):
+        """
+        下载M3U8视频
+
+        Args:
+            output_filename: 输出文件名（如：video.mp4）
+            m3u8_url: M3U8文件URL
+
+        Returns:
+            str: 任务ID
+        """
+        with self.lock:
+            self.task_counter += 1
+            task_id = f"task_{self.task_counter}"
+
+        thread = threading.Thread(
+            target=self._download_m3u8,
+            args=(m3u8_url, output_filename, task_id)
+        )
+        thread.daemon = True
+        thread.start()
+
+        return task_id
+
+    def get_progress(self, task_id):
+        """
+        获取下载进度
+
+        Args:
+            task_id: 任务ID
+
+        Returns:
+            dict: 包含文件名和进度(0~1浮点数)的字典
+        """
+        task_info = self.get_task_info(task_id)
+
+        if task_info['status'] == 'not_found':
+            return {'filename': '', 'progress': 0.0, 'status': 'not_found'}
+
+        progress = 0.0
+        if task_info['status'] == 'preparing':
+            progress = 0.0
+        elif task_info['status'] == 'downloading':
+            if task_info['total_segments'] > 0:
+                progress = task_info['downloaded_segments'] / task_info['total_segments']
+            else:
+                progress = 0.0
+        elif task_info['status'] == 'merging':
+            progress = 1.0
+        elif task_info['status'] == 'completed':
+            progress = 1.0
+        elif task_info['status'] == 'failed':
+            progress = 0.0
+
+        return {
+            'filename': task_info['output_filename'],
+            'progress': round(progress, 4),
+            'status': task_info['status'],
+            'task_id': task_id,
+            'output_file': task_info.get('output_file', ''),
+            'downloaded_segments': task_info.get('downloaded_segments', 0),
+            'total_segments': task_info.get('total_segments', 0)
+        }
+
+    def wait_for_completion(self, task_id, timeout=None):
+        """
+        等待任务完成
+
+        Args:
+            task_id: 任务ID
+            timeout: 超时时间（秒）
+
+        Returns:
+            bool: 是否成功完成
+        """
+        start_time = time.time()
+        while True:
+            task_info = self.get_task_info(task_id)
+
+            if task_info['status'] == 'completed':
+                return True
+            elif task_info['status'] == 'failed':
+                return False
+            elif timeout and (time.time() - start_time) > timeout:
+                return False
+
+            time.sleep(1)
--- a/function.py
+++ b/function.py
@ -0,0 +1,57 @@
+from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
+import os
+
+
+async def crawl_missav(url):
+    result = {
+        'title': '',
+        'url': [],
+        'serial_number': ''  # 新增字段存储番号
+    }
+    launch_args = {
+        "headless": False,
+        "args": ["--disable-blink-features=AutomationControlled"]
+    }
+
+    # 从环境变量获取代理
+    env_proxy = os.getenv('PROXY')
+    if env_proxy:
+        proxy = env_proxy
+        launch_args["proxy"] = {"server": proxy}
+
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(**launch_args)
+        page = await browser.new_page()
+        page.set_default_timeout(60000)
+        try:
+            await page.goto(url, wait_until="domcontentloaded")
+            result['title'] = await page.title()
+            videos = await page.query_selector_all("video")
+
+            for i, video in enumerate(videos):
+                src = await video.get_attribute("src")
+                if (src != None):
+                    result['url'].append(src)
+
+            # 新增：查找包含"番号:"的span标签，并获取其同级下一个标签的文本
+            try:
+                # 查找所有包含"番号:"文本的span标签
+                spans_with_serial = await page.query_selector_all('span')
+                for span in spans_with_serial:
+                    span_text = await span.text_content()
+                    if span_text and '番号:' in span_text:
+                        # 获取span的下一个同级元素
+                        next_element = await span.evaluate_handle('element => element.nextElementSibling')
+                        if next_element:
+                            next_element_text = await next_element.text_content()
+                            if next_element_text:
+                                result['serial_number'] = next_element_text.strip()
+                                break  # 找到第一个就退出
+            except Exception as e:
+                print(f"[INFO] 查找番号时出错: {e}")
+
+        except PlaywrightTimeoutError:
+            print("[ERROR] 页面加载超时，可能被 Cloudflare 拦截")
+        finally:
+            await browser.close()
+            return result
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,8 @@
+playwright
+flask
+requests
+m3u8
+pycryptodome
+tqdm
+pathlib2
+pyjwt