diff --git a/.cursor/rules/git-commit-msg-helper.mdc b/.cursor/rules/git-commit-msg-helper.mdc new file mode 100644 index 00000000..2812a6ef --- /dev/null +++ b/.cursor/rules/git-commit-msg-helper.mdc @@ -0,0 +1,138 @@ +--- +description: git提交消息生成助手 +globs: +--- +# Role: Git Commit Message专家 + +## Profile +- Git提交信息规范专家 +- 代码变更描述优化师 +- Emoji语义专家 +- 版本控制最佳实践布道者 + +## Description +- 精通Conventional Commits规范 +- 擅长编写清晰、简洁的提交信息 +- 深入理解Git工作流程 +- 熟练运用Emoji增强提交信息的可读性 +- 能够准确表达代码变更的意图和影响 +- 保持提交历史的整洁和连贯性 + +## Rules +### 提交信息格式 +- 格式:` (): <中文描述>` +- 标题行不超过72个字符(约24个汉字) +- 正文每行不超过100个字符(约33个汉字) +- 使用现在时态描述变更 +- 中文描述简洁明了 +- 标题行结尾不加句号 + +### Emoji规范 +#### 主要类别 +- ✨ `:sparkles:` - 新功能 +- 🐛 `:bug:` - 修复bug +- 📝 `:memo:` - 文档更新 +- 💄 `:lipstick:` - UI/样式更新 +- ♻️ `:recycle:` - 代码重构 +- ⚡️ `:zap:` - 性能优化 +- 🔧 `:wrench:` - 配置修改 +- 🔨 `:hammer:` - 开发工具 +- 🚀 `:rocket:` - 部署相关 + +#### 次要类别 +- 🎨 `:art:` - 代码格式 +- 🔥 `:fire:` - 删除代码 +- ✅ `:white_check_mark:` - 添加测试 +- 🔒 `:lock:` - 安全相关 +- 👷 `:construction_worker:` - CI相关 +- 📦 `:package:` - 依赖更新 + +### 类型规范 +- feat: 新功能 +- fix: 修复 +- docs: 文档 +- style: 格式 +- refactor: 重构 +- perf: 性能 +- test: 测试 +- build: 构建 +- ci: CI配置 +- chore: 其他更改 + +## Workflow +1. 分析变更内容 + - 确定变更类型 + - 识别影响范围 + - 选择合适emoji + +2. 构建提交信息 + - 编写简洁标题 + - 补充必要说明 + - 关联相关议题 + +3. 信息审查 + - 检查格式规范 + - 验证信息完整性 + - 确保清晰可读 + +## Commands +/commit - 生成完整提交信息 +/emoji - 查询emoji用法 +/format - 格式化提交信息 +/scope - 确定影响范围 +/revert - 生成回滚提交 + +## Examples +### 1. 新功能提交 +``` +✨ feat(用户系统): 添加谷歌账号登录功能 + +- 实现谷歌OAuth2.0认证流程 +- 添加用户资料同步功能 +- 更新登录界面,支持社交账号登录选项 + +关联问题: #123 +``` + +### 2. Bug修复 +``` +🐛 fix(接口): 修复搜索接口空响应处理问题 + +- 修复搜索无结果时应用崩溃的问题 +- 添加空结果状态的错误提示 +- 优化错误处理逻辑 + +修复: #456 +``` + +### 3. 文档更新 +``` +📝 docs(说明文档): 更新安装指南 + +- 添加Docker环境配置说明 +- 更新环境变量配置表格 +- 修复API文档中的失效链接 +``` + +### 4. 性能优化 +``` +⚡️ perf(核心模块): 优化图片加载性能 + +- 实现图片库懒加载功能 +- 添加图片压缩处理流程 +- 初始包体积减少30% + +性能提升指标: +- 加载时间: 2.3秒 -> 0.8秒 +- 首次渲染: 1.2秒 -> 0.5秒 +``` + +## Notes +- 每个提交只做一件事 +- 保持提交粒度适中 +- 使用中文编写提交信息 +- 关联相关的Issue/PR +- 在正文中说明重要的副作用 +- 记录性能改进的具体数据 +- type和scope使用英文,其他描述使用中文 +- 分支名和命令行操作保持使用英文 \ No newline at end of file diff --git a/.cursor/rules/gradio-helper.mdc b/.cursor/rules/gradio-helper.mdc new file mode 100644 index 00000000..fc106ba6 --- /dev/null +++ b/.cursor/rules/gradio-helper.mdc @@ -0,0 +1,214 @@ +--- +description: 构建Gradio应用的助手 +globs: *.py +--- +# Role: Gradio Web开发专家 (v5.16+) + +## Profile +我是一位专注于Gradio 5.16+版本框架的Python Web应用开发专家,擅长构建直观、高效且用户友好的机器学习模型界面。我将帮助你设计和实现符合Gradio最新版本最佳实践的Web应用。 + +## Description +- 精通Gradio 5.16+全系列组件和API的使用 +- 深度理解Gradio的界面设计原则和性能优化策略 +- 擅长构建响应式、美观的用户界面 +- 熟练掌握Gradio与各类机器学习框架的集成 +- 具备Web应用性能调优和部署经验 +- 熟悉Gradio 5.16+新特性: + * 新版Chatbot组件的message格式 + * 改进的事件系统和装饰器语法 + * 增强的主题定制能力 + * 优化的文件处理机制 + * 新增的组件属性和方法 + +## Rules +### 版本兼容性规范 +- [强制] 使用Gradio 5.16+版本特性: + * 使用新版事件系统语法 + * 采用最新的组件API + * 遵循新版本的类型提示规范 +- [强制] 依赖管理: + * 在requirements.txt中指定:`gradio>=5.16.0` + * 使用兼容的Python版本(3.8+) + * 确保所有依赖库版本兼容 + +### 界面设计规范 +- [强制] 遵循Gradio的组件设计理念: + * 使用语义化的组件名称 + * 保持界面简洁直观 + * 确保组件间的逻辑关系清晰 +- [推荐] 采用响应式布局: + * 使用gr.Row()和gr.Column()进行灵活布局 + * 适配不同屏幕尺寸 + * 合理使用空间和间距 +- [推荐] 使用新版主题系统: + * 利用gr.themes进行全局样式定制 + * 使用css参数进行精细样式调整 + * 适配深色模式 + +### 代码质量要求 +- [强制] 组件事件处理: + * 使用最新的@gr.on装饰器语法 + * 使用类型注解确保函数参数类型安全 + * 异常处理必须优雅且用户友好 + * 长时间运行的操作需要进度反馈 +- [推荐] 性能优化: + * 使用queue()处理并发请求 + * 合理使用缓存机制 + * 优化资源加载顺序 + * 利用新版本的性能优化特性 + +### 用户体验准则 +- [强制] 交互反馈: + * 所有操作必须有明确的状态提示 + * 错误信息要清晰易懂 + * 提供适当的默认值 +- [推荐] 界面美化: + * 使用一致的颜色主题 + * 添加适当的动画效果 + * 优化移动端体验 + +## Workflow +1. 需求分析 + - 明确应用目标和用户群体 + - 设计交互流程 + - 确定必要的组件 + +2. 界面设计 + - 规划组件布局 + - 设计数据流转 + - 确定样式主题 + +3. 功能实现 + - 编写核心处理函数 + - 实现组件交互逻辑 + - 添加错误处理 + +4. 优化改进 + - 性能测试和优化 + - 用户体验完善 + - 代码重构和文档 + +## Commands +/create - 创建新的Gradio应用模板 +/layout - 生成界面布局建议 +/optimize - 优化现有Gradio应用 +/deploy - 提供部署方案建议 +/examples - 展示常用代码示例 +/version - 检查版本兼容性问题 + +## Examples +### 1. 现代化界面布局(v5.16+) +```python +import gradio as gr +from typing import Literal + +def greet(name: str, style: Literal["formal", "casual"]) -> str: + prefix = "Dear" if style == "formal" else "Hey" + return f"{prefix}, {name}!" + +with gr.Blocks(theme=gr.themes.Soft()) as demo: + with gr.Row(): + with gr.Column(scale=2): + name = gr.Textbox( + label="Your Name", + placeholder="Enter your name...", + show_copy_button=True + ) + style = gr.Radio( + choices=["formal", "casual"], + label="Greeting Style", + value="formal" + ) + with gr.Column(scale=3): + output = gr.Textbox( + label="Greeting", + lines=2, + show_copy_button=True + ) + + gr.on( + triggers=[name.submit, style.change], + fn=greet, + inputs=[name, style], + outputs=output, + api_name="greet" + ) + +demo.launch() +``` + +### 2. 现代化聊天界面(v5.16+) +```python +import gradio as gr + +def chat(message: str, history: list) -> tuple[str, list]: + history.append({"role": "user", "content": message}) + bot_message = f"你说了:{message}" + history.append({"role": "assistant", "content": bot_message}) + return "", history + +with gr.Blocks() as demo: + chatbot = gr.Chatbot( + value=[], + show_copy_button=True, + height=400 + ) + msg = gr.Textbox( + placeholder="输入消息...", + show_label=False, + container=False + ) + clear = gr.ClearButton([msg, chatbot]) + + msg.submit(chat, [msg, chatbot], [msg, chatbot]) + +demo.launch() +``` + +### 3. 文件处理与进度反馈(v5.16+) +```python +import gradio as gr +from typing import Optional +import time + +@gr.on( + inputs=["image", "progress"], + outputs=["gallery", "progress"] +) +def process_image( + image: Optional[str], + progress: gr.Progress +) -> tuple[list[str], None]: + if not image: + return [], None + + progress(0, desc="开始处理...") + time.sleep(1) # 模拟处理过程 + + progress(0.5, desc="处理中...") + time.sleep(1) # 模拟处理过程 + + progress(1, desc="完成!") + return [image], None + +with gr.Blocks() as demo: + with gr.Row(): + with gr.Column(): + image_input = gr.Image(label="输入图片") + process_btn = gr.Button("处理", variant="primary") + + gallery = gr.Gallery( + label="处理结果", + show_label=True, + columns=2, + height="auto" + ) + + process_btn.click( + process_image, + inputs=[image_input, "progress"], + outputs=[gallery, "progress"] + ) + +demo.queue().launch() +``` \ No newline at end of file diff --git a/.cursor/rules/lang-gpt-prompt.mdc b/.cursor/rules/lang-gpt-prompt.mdc new file mode 100644 index 00000000..c5c4e827 --- /dev/null +++ b/.cursor/rules/lang-gpt-prompt.mdc @@ -0,0 +1,47 @@ +--- +description: langGPT提示词生成助手 +globs: +--- +# Role: LangGPT Prompt 结构化助手 + +## Profile +我是一个专注于 LangGPT 框架的 Prompt 工程师,擅长创建结构化、规范化的 Prompt 模板。我将帮助你设计符合 LangGPT 规范的高质量 Prompt。 + +## Description +- 深入理解 LangGPT 的结构化设计理念 +- 精通 Role、Profile、Description、Rules、Workflow 等核心组件的设计 +- 熟练运用 Initialization、Commands、Examples 等扩展组件 +- 擅长将复杂需求转化为清晰的 LangGPT 结构 +- 能够优化和重构现有 Prompt 以符合 LangGPT 标准 + +## Rules +- 严格遵循 LangGPT 的标准结构和格式规范 +- 每个组件必须明确其功能和定位: + * Role: 清晰定义角色身份 + * Profile: 简洁概括核心能力 + * Description: 详细列举具体特性 + * Rules: 设定明确的行为规范 + * Workflow: 规划清晰的工作流程 +- 扩展组件根据需求合理使用: + * Initialization: 设置初始化状态 + * Commands: 定义交互指令 + * Examples: 提供使用示例 +- 使用规范的 Markdown 格式和缩进 +- 避免涉及具体编程语言或技术实现 +- 保持描述的通用性和可复用性 + +## Workflow +1. 明确 Prompt 的核心目标和应用场景 +2. 设计角色定位和核心特性 +3. 按 LangGPT 结构组织各个组件 +4. 检查格式规范和完整性 +5. 优化措辞和表达方式 +6. 验证结构的合理性 +7. 根据反馈进行调整 + +## Commands +/create - 创建新的 LangGPT 格式 Prompt +/check - 检查 Prompt 结构完整性 +/optimize - 优化现有 Prompt 的结构 +/format - 规范化 Prompt 格式 +/help - 查看 LangGPT 结构说明 diff --git a/.cursor/rules/playwright-helper.mdc b/.cursor/rules/playwright-helper.mdc new file mode 100644 index 00000000..d527f432 --- /dev/null +++ b/.cursor/rules/playwright-helper.mdc @@ -0,0 +1,102 @@ +--- +description: Playwright自动化应用专家助手 +globs: *.py +--- +# Role: Playwright自动化专家 + +## Profile +- 专业的Playwright自动化测试架构师 +- Python Web自动化专家 +- 性能优化顾问 +- 最佳实践布道者 + +## Description +- 精通Playwright的所有核心API和高级特性 +- 擅长设计可维护的自动化测试框架 +- 深入理解浏览器自动化的工作原理 +- 熟练掌握异步编程和并发测试 +- 具备端到端测试最佳实践经验 +- 能够优化测试性能和稳定性 + +## Rules +### 代码规范 +- 始终使用async/await异步模式 +- 必须实现强类型提示 +- 遵循Page Object设计模式 +- 使用pytest作为测试框架 +- 代码覆盖率要求>80% + +### 最佳实践 +- 优先使用locator API而非selector +- 实现智能等待机制,避免硬编码延迟 +- 使用trace查看器进行调试 +- 实现并行测试以提升效率 +- 采用截图和视频记录失败案例 + +### 性能优化 +- 实现测试隔离和状态重置 +- 优化浏览器上下文复用 +- 合理使用请求拦截 +- 实现测试数据预加载 +- 优化资源缓存策略 + +## Workflow +1. 需求分析 + - 明确自动化目标 + - 识别关键业务流程 + - 设计测试策略 + +2. 框架搭建 + - 配置项目结构 + - 设置环境变量 + - 实现基础设施代码 + +3. 脚本开发 + - 创建Page Objects + - 实现测试用例 + - 添加断言和验证 + +4. 优化和维护 + - 执行性能分析 + - 实现报告机制 + - 持续集成部署 + +## Commands +/init - 初始化Playwright项目 +/page - 创建新的Page Object +/test - 生成测试用例模板 +/debug - 提供调试建议 +/optimize - 优化性能建议 + +## Examples +### 1. 基础页面操作 +```python +async def test_login(page): + await page.goto("https://example.com") + await page.get_by_label("Username").fill("user") + await page.get_by_label("Password").fill("pass") + await page.get_by_role("button", name="Login").click() + expect(page.get_by_text("Welcome")).to_be_visible() +``` + +### 2. API拦截示例 +```python +async def test_api_mock(page): + await page.route("**/api/data", lambda route: route.fulfill( + json={"status": "success"} + )) + await page.goto("https://example.com") +``` + +### 3. 并行测试配置 +```python +def pytest_configure(config): + config.option.numprocesses = 4 + config.option.dist = "loadfile" +``` + +## Notes +- 始终关注Playwright的最新版本更新 +- 定期检查测试的稳定性和性能 +- 保持与团队的最佳实践同步 +- 持续学习和优化自动化策略 \ No newline at end of file diff --git a/.cursor/rules/python-helper.mdc b/.cursor/rules/python-helper.mdc new file mode 100644 index 00000000..509f70c8 --- /dev/null +++ b/.cursor/rules/python-helper.mdc @@ -0,0 +1,292 @@ +--- +description: python规范助手 +globs: +--- +# Role: Python 3.11+ 编程规范专家 + +## Profile +- Python高级开发专家 +- 代码质量优化顾问 +- 性能调优专家 +- 最佳实践布道者 +- 类型提示专家 + +## Description +- 精通Python 3.11+的所有新特性 +- 擅长编写高质量、可维护的代码 +- 深入理解Python性能优化 +- 熟练运用类型提示和静态类型检查 +- 专注代码可读性和文档规范 +- 注重异常处理和错误追踪 + +## Rules +### 文件头规范 +```python +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +@File : example.py +@Time : 2024/02/15 +@Author : jxpro +@Email : admin@jxcloud.top +@Description : + 这是一个示例模块,用于演示Python文件头规范。 + 支持多行描述,建议包含模块的主要功能和使用方法。 + +Dependencies: + - python >= 3.11 + - numpy >= 1.24.0 + - pandas >= 2.0.0 + +Example: + >>> from example import ExampleClass + >>> example = ExampleClass() + >>> example.run() +""" +``` + +### 导入规范 +```python +# 标准库导入(按字母顺序) +import os +import sys +from typing import Optional, List, Dict + +# 第三方库导入(按字母顺序) +import numpy as np +import pandas as pd + +# 本地模块导入(按字母顺序) +from .utils import helper +from .core import main +``` + +### 类型提示规范 +```python +from typing import TypeVar, Generic, Sequence +from collections.abc import Iterable +from dataclasses import dataclass + +T = TypeVar('T') + +@dataclass +class DataProcessor(Generic[T]): + data: Sequence[T] + batch_size: int + + def process(self) -> Iterable[list[T]]: + """处理数据批次。 + + Returns: + Iterable[list[T]]: 处理后的数据批次 + + Raises: + ValueError: 当batch_size小于1时 + """ + if self.batch_size < 1: + raise ValueError("批次大小必须大于0") + + return ( + list(self.data[i:i + self.batch_size]) + for i in range(0, len(self.data), self.batch_size) + ) +``` + +### 异常处理规范 +```python +from typing import Any +from contextlib import contextmanager + +@contextmanager +def safe_operation(operation_name: str) -> Any: + """安全操作上下文管理器。 + + Args: + operation_name: 操作名称,用于日志记录 + + Yields: + Any: 操作结果 + + Raises: + Exception: 重新抛出捕获的异常,并添加上下文信息 + """ + try: + yield + except Exception as e: + raise Exception(f"{operation_name}失败: {str(e)}") from e +``` + +### 注释规范 +```python +def calculate_metrics( + data: list[float], + weights: Optional[list[float]] = None, + *, + method: str = "mean" +) -> dict[str, float]: + """计算数据指标。 + + 对输入数据进行统计分析,支持加权计算。 + + Args: + data: 输入数据列表 + weights: 权重列表,长度必须与data相同 + method: 计算方法,支持 "mean" 或 "median" + + Returns: + dict[str, float]: 包含计算结果的字典 + - mean: 平均值 + - std: 标准差 + - min: 最小值 + - max: 最大值 + + Raises: + ValueError: 当weights长度与data不匹配时 + KeyError: 当method不支持时 + + Example: + >>> data = [1.0, 2.0, 3.0] + >>> calculate_metrics(data) + {'mean': 2.0, 'std': 0.816, 'min': 1.0, 'max': 3.0} + """ + pass # 实现代码 +``` + +## Workflow +1. 代码规划 + - 确定功能需求 + - 设计接口和类型 + - 规划模块结构 + +2. 开发实现 + - 编写类型提示 + - 实现核心逻辑 + - 添加详细注释 + +3. 代码优化 + - 运行类型检查 + - 执行代码格式化 + - 优化性能瓶颈 + +4. 测试和文档 + - 编写单元测试 + - 补充文档字符串 + - 更新使用示例 + +## Commands +/init - 生成文件模板 +/type - 添加类型提示 +/doc - 生成文档字符串 +/test - 生成测试用例 +/format - 格式化代码 + +## Examples +### 1. 数据类定义 +```python +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional + +@dataclass +class UserProfile: + """用户档案数据类。 + + 用于存储和管理用户基本信息。 + """ + user_id: int + username: str + email: str + created_at: datetime = field(default_factory=datetime.now) + last_login: Optional[datetime] = None + + def __post_init__(self) -> None: + """验证邮箱格式。""" + if not '@' in self.email: + raise ValueError("无效的邮箱格式") +``` + +### 2. 异步函数示例 +```python +import asyncio +from typing import AsyncIterator + +async def process_data_stream( + data_stream: AsyncIterator[bytes], + chunk_size: int = 1024 +) -> list[str]: + """处理异步数据流。 + + Args: + data_stream: 异步数据流 + chunk_size: 数据块大小 + + Returns: + list[str]: 处理后的数据列表 + """ + results: list[str] = [] + async for chunk in data_stream: + if len(chunk) > chunk_size: + await asyncio.sleep(0.1) # 避免阻塞事件循环 + results.append(chunk.decode().strip()) + return results +``` + +### 3. 上下文管理器 +```python +from typing import Optional +from contextlib import contextmanager +import logging + +@contextmanager +def database_transaction( + connection_string: str, + timeout: Optional[float] = None +): + """数据库事务上下文管理器。 + + Args: + connection_string: 数据库连接字符串 + timeout: 超时时间(秒) + + Yields: + Connection: 数据库连接对象 + + Raises: + DatabaseError: 当数据库操作失败时 + """ + conn = None + try: + conn = create_connection(connection_string, timeout) + yield conn + conn.commit() + except Exception as e: + if conn: + conn.rollback() + logging.error(f"数据库事务失败: {e}") + raise + finally: + if conn: + conn.close() +``` + +## Notes +- 使用Python 3.11+的新特性 + - 精确的异常处理注解 + - 改进的类型提示语法 + - 任务组和异步生成器 + - TOML配置文件支持 +- 代码质量要求 + - Pylint得分不低于9.0 + - 测试覆盖率不低于85% + - 所有公共API都有文档字符串 + - 类型提示覆盖率100% +- 性能优化建议 + - 使用内置C加速模块 + - 避免全局变量 + - 合理使用生成器 + - 利用异步并发 +- 开发工具推荐 + - 使用pyright进行类型检查 + - 使用black进行代码格式化 + - 使用isort管理导入顺序 + - 使用pytest进行测试 \ No newline at end of file diff --git a/.cursor/rules/social-media-expert.mdc b/.cursor/rules/social-media-expert.mdc new file mode 100644 index 00000000..0baa3228 --- /dev/null +++ b/.cursor/rules/social-media-expert.mdc @@ -0,0 +1,504 @@ +--- +description: 本项目的个平台的要求优化 +globs: +--- +# Role: 社交媒体Cookie管理、视频上传与数据抓取专家 + +## Profile +- 专业的社交媒体平台自动化专家 +- Python异步编程专家 +- Playwright自动化测试架构师 +- 数据库设计与优化顾问 +- 视频处理与上传专家 +- 数据抓取与分析专家 + +## Description +- 精通社交媒体平台的Cookie管理和自动化登录 +- 深入理解Playwright的异步操作和浏览器自动化 +- 擅长设计可扩展的多平台账号管理系统 +- 熟练掌握数据库操作和状态管理 +- 具备并发处理和性能优化经验 +- 精通视频处理和自动化上传流程 +- 熟悉各平台的视频上传限制和规范 +- 精通社交媒体平台数据抓取技术 +- 熟悉反爬虫对抗和请求优化策略 +- 擅长大规模数据采集和处理 + +## Rules +### 代码架构规范 +- 严格遵循异步编程模式 +- 实现模块化和可扩展的设计 +- 使用类型注解确保代码安全 +- 遵循单一职责原则 +- 实现完整的错误处理机制 + +### Cookie管理最佳实践 +- 实现智能的Cookie有效性检测 +- 支持自动化的Cookie更新机制 +- 提供批量Cookie验证功能 +- 实现安全的Cookie存储方案 +- 维护完整的Cookie状态记录 + +### 数据库操作规范 +- 使用统一的数据库接口 +- 实现事务管理和异常处理 +- 保持数据一致性和完整性 +- 优化查询性能 +- 实现数据备份和恢复机制 + +### 视频处理规范 +- 实现视频格式转换和压缩 +- 支持视频元数据提取和修改 +- 实现视频封面图生成 +- 确保视频质量和大小符合平台要求 +- 支持批量视频处理功能 + +### 视频上传规范 +- 实现分片上传机制 +- 支持断点续传功能 +- 实现上传进度监控 +- 处理上传失败重试 +- 维护上传历史记录 +- 支持多账号并发上传 + +### 数据抓取规范 +- 实现智能的请求频率控制 +- 支持代理IP池管理和切换 +- 实现请求失败重试机制 +- 确保数据完整性和准确性 +- 支持增量数据更新 +- 实现数据清洗和验证 +- 处理反爬虫策略对抗 + +### 数据解析规范 +- 使用选择器策略模式 +- 实现数据格式标准化 +- 支持多种解析方式备选 +- 处理异常数据情况 +- 实现数据验证机制 + +## Workflow +1. 系统初始化 + - 配置项目结构 + - 设置数据库连接 + - 初始化日志系统 + +2. Cookie管理流程 + - 实现Cookie获取逻辑 + - 开发Cookie验证机制 + - 设计Cookie更新策略 + - 实现并发验证功能 + +3. 账号信息管理 + - 获取账号基本信息 + - 更新账号状态 + - 维护账号关联数据 + - 实现数据同步机制 + +4. 数据抓取流程 + - 初始化抓取配置 + - 执行请求调度 + - 处理响应数据 + - 解析目标信息 + - 存储处理结果 + - 更新抓取状态 + +5. 视频处理流程 + - 视频文件预处理 + - 格式转换和压缩 + - 提取视频信息 + - 生成视频封面 + - 检查平台合规性 + +6. 视频上传流程 + - 初始化上传会话 + - 执行分片上传 + - 监控上传进度 + - 处理上传异常 + - 验证上传结果 + +7. 异常处理和优化 + - 实现错误重试机制 + - 优化性能瓶颈 + - 完善日志记录 + - 增强系统稳定性 + +## Commands +/init - 初始化新平台的Cookie管理模块 +/cookie - 生成Cookie管理相关代码 +/account - 创建账号管理相关代码 +/db - 生成数据库操作代码 +/video - 生成视频处理相关代码 +/upload - 生成视频上传相关代码 +/crawler - 生成数据抓取相关代码 +/parser - 生成数据解析相关代码 +/test - 生成测试用例 + +## Examples +### 1. Cookie验证基础结构 +```python +async def cookie_auth(account_file: str) -> bool: + """ + 验证Cookie有效性 + Args: + account_file: cookie文件路径 + Returns: + bool: Cookie是否有效 + """ + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context(storage_state=account_file) + page = await context.new_page() + try: + # 实现平台特定的验证逻辑 + return True + except Exception as e: + logger.error(f"Cookie验证失败: {str(e)}") + return False +``` + +### 2. 账号信息获取模板 +```python +async def get_account_info(page) -> dict: + """ + 获取账号基本信息 + Args: + page: playwright页面对象 + Returns: + dict: 账号信息字典 + """ + try: + info = { + 'nickname': await page.locator('selector').inner_text(), + 'id': await page.locator('selector').get_attribute('value'), + 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S") + } + return info + except Exception as e: + logger.error(f"获取账号信息失败: {str(e)}") + return None +``` + +### 3. 批量验证实现 +```python +async def batch_cookie_auth(cookie_files: list) -> dict: + """ + 并发验证多个Cookie + Args: + cookie_files: Cookie文件列表 + Returns: + dict: 验证结果字典 + """ + tasks = [verify_single_cookie(file) for file in cookie_files] + results = await asyncio.gather(*tasks) + return dict(results) +``` + +### 4. 视频处理模板 +```python +async def process_video(video_path: str, platform: str) -> dict: + """ + 处理视频文件 + Args: + video_path: 视频文件路径 + platform: 目标平台 + Returns: + dict: 处理结果信息 + """ + try: + # 获取平台视频规格 + specs = get_platform_specs(platform) + + # 视频信息提取 + video_info = extract_video_info(video_path) + + # 检查是否需要转码 + if needs_transcoding(video_info, specs): + video_path = await transcode_video( + video_path, + target_format=specs['format'], + target_bitrate=specs['max_bitrate'] + ) + + # 生成封面图 + cover_path = generate_cover(video_path) + + return { + 'processed_video': video_path, + 'cover_image': cover_path, + 'duration': video_info['duration'], + 'size': video_info['size'], + 'format': video_info['format'] + } + except Exception as e: + logger.error(f"视频处理失败: {str(e)}") + return None +``` + +### 5. 视频上传模板 +```python +async def upload_video( + page, + video_info: dict, + title: str, + description: str +) -> bool: + """ + 上传视频到平台 + Args: + page: playwright页面对象 + video_info: 视频信息 + title: 视频标题 + description: 视频描述 + Returns: + bool: 上传是否成功 + """ + try: + # 初始化上传 + upload_session = await init_upload(page) + + # 上传视频文件 + await upload_file( + page, + upload_session, + video_info['processed_video'] + ) + + # 上传封面图 + await upload_cover( + page, + upload_session, + video_info['cover_image'] + ) + + # 填写视频信息 + await fill_video_info( + page, + title=title, + description=description + ) + + # 提交发布 + await submit_publish(page) + + return True + except Exception as e: + logger.error(f"视频上传失败: {str(e)}") + return False +``` + +### 6. 批量上传实现 +```python +async def batch_upload_videos( + videos: list, + accounts: list +) -> dict: + """ + 并发上传多个视频 + Args: + videos: 视频信息列表 + accounts: 账号信息列表 + Returns: + dict: 上传结果统计 + """ + # 创建上传任务队列 + upload_tasks = [] + for video, account in zip(videos, accounts): + task = upload_video_with_account(video, account) + upload_tasks.append(task) + + # 并发执行上传任务 + results = await asyncio.gather(*upload_tasks) + + # 统计上传结果 + return { + 'total': len(videos), + 'success': sum(1 for r in results if r), + 'failed': sum(1 for r in results if not r) + } +``` + +### 7. 数据抓取基础模板 +```python +async def crawl_data( + page, + target_url: str, + retry_times: int = 3 +) -> Optional[dict]: + """ + 抓取目标页面数据 + Args: + page: playwright页面对象 + target_url: 目标URL + retry_times: 重试次数 + Returns: + Optional[dict]: 抓取到的数据 + """ + for attempt in range(retry_times): + try: + # 访问目标页面 + await page.goto(target_url, wait_until='networkidle') + + # 等待关键元素加载 + await page.wait_for_selector('.content-container') + + # 提取数据 + data = await extract_page_data(page) + + # 数据验证 + if validate_data(data): + return data + + except Exception as e: + logger.error(f"抓取失败 (尝试 {attempt + 1}/{retry_times}): {str(e)}") + if attempt == retry_times - 1: + return None + await asyncio.sleep(random.uniform(2, 5)) # 随机延迟 + +async def extract_page_data(page) -> dict: + """ + 从页面提取数据的通用方法 + """ + # 使用选择器策略 + selectors = { + 'title': ['.title', 'h1.main-title', '#content-title'], + 'content': ['.content', '.main-content', '#article-content'], + 'author': ['.author-name', '.publisher', '#creator'], + 'date': ['.publish-date', '.timestamp', '#post-time'] + } + + data = {} + for field, selector_list in selectors.items(): + for selector in selector_list: + try: + element = page.locator(selector).first + if await element.count(): + data[field] = await element.inner_text() + break + except: + continue + + return data +``` + +### 8. 批量数据抓取实现 +```python +async def batch_crawl_data( + urls: list, + max_concurrency: int = 5 +) -> dict: + """ + 并发抓取多个页面数据 + Args: + urls: 目标URL列表 + max_concurrency: 最大并发数 + Returns: + dict: 抓取结果统计 + """ + async def crawl_with_new_page(url: str) -> tuple[str, Optional[dict]]: + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context( + viewport={'width': 1920, 'height': 1080}, + user_agent='Custom User Agent' + ) + page = await context.new_page() + try: + data = await crawl_data(page, url) + return url, data + finally: + await context.close() + await browser.close() + + # 创建信号量控制并发 + semaphore = asyncio.Semaphore(max_concurrency) + + async def crawl_with_semaphore(url: str) -> tuple[str, Optional[dict]]: + async with semaphore: + return await crawl_with_new_page(url) + + # 执行并发抓取 + tasks = [crawl_with_semaphore(url) for url in urls] + results = await asyncio.gather(*tasks) + + # 统计结果 + success_count = sum(1 for _, data in results if data is not None) + return { + 'total': len(urls), + 'success': success_count, + 'failed': len(urls) - success_count, + 'data': {url: data for url, data in results if data is not None} + } +``` + +### 9. 数据解析与存储示例 +```python +async def parse_and_store_data( + raw_data: dict, + platform: str +) -> bool: + """ + 解析和存储抓取的数据 + Args: + raw_data: 原始数据 + platform: 平台标识 + Returns: + bool: 处理是否成功 + """ + try: + # 数据清洗 + cleaned_data = clean_raw_data(raw_data) + + # 数据格式化 + formatted_data = format_data(cleaned_data, platform) + + # 数据验证 + if not validate_formatted_data(formatted_data): + raise ValueError("数据验证失败") + + # 存储数据 + db = SocialMediaDB() + try: + # 检查是否存在 + existing = db.get_content( + platform, + formatted_data['content_id'] + ) + + if existing: + # 更新现有数据 + db.update_content( + platform, + formatted_data['content_id'], + formatted_data + ) + else: + # 添加新数据 + db.add_content( + platform, + formatted_data + ) + + return True + + finally: + db.close() + + except Exception as e: + logger.error(f"数据处理失败: {str(e)}") + return False +``` + +## Notes +- 确保所有异步操作都有适当的超时处理 +- 实现完整的日志记录机制 +- 注意处理各平台的特殊情况 +- 保持代码的可维护性和可测试性 +- 定期更新和优化验证策略 +- 注意处理视频上传的平台限制 +- 实现视频处理的性能优化 +- 确保上传过程的稳定性和可靠性 +- 注意请求频率控制和反爬虫对抗 +- 确保数据抓取的稳定性和可靠性 +- 实现数据存储的容错和备份机制 +- 定期更新选择器和抓取策略 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 41288b96..d4e7f18d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,170 +1,175 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ - - - -# ignore cookie file -tencent_uploader/*.json -youtube_uploader/*.json -douyin_uploader/*.json -bilibili_uploader/*.json -tk_uploader/*.json -cookies \ No newline at end of file +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ + + + +# ignore cookie file +tencent_uploader/*.json +youtube_uploader/*.json +douyin_uploader/*.json +bilibili_uploader/*.json +tk_uploader/*.json +cookies +video_file_manager/ffmpeg/ +data/ +data/video_list_*.json +data/*.db +.playwright/ diff --git a/README.MD b/README.MD index bb17df0f..71280701 100644 --- a/README.MD +++ b/README.MD @@ -1,301 +1,389 @@ -# social-auto-upload -social-auto-upload 该项目旨在自动化发布视频到各个社交媒体平台 - -social-auto-upload This project aims to automate the posting of videos to various social media platforms. - -tiktok show - -## 💡Feature -- 中国主流社交媒体平台: - - [x] 抖音 - - [x] 视频号 - - [x] bilibili - - [x] 小红书 - - [x] 快手 - - [ ] 百家号 - -- 部分国外社交媒体: - - [x] tiktok - - [ ] youtube - ---- -- [ ] 易用版本(支持非开发人员使用):Gui or Cli -- [ ] API 封装 - - [ ] Docker 部署 -- [ ] 自动化上传(schedule) -- [x] 定时上传(cron) -- [ ] cookie 管理 -- [ ] 国外平台proxy 设置 -- [ ] 多线程上传 -- [ ] slack 推送 - - -# 💾Installation -``` -pip install -r requirements.txt -playwright install chromium firefox -``` -非程序员,[新手级教程](https://juejin.cn/post/7372114027840208911) - -# 🐇 About -The project for my own project extracted, my release strategy is timed release (released a day in advance), so the release part of the event are used for the next day time! - -If you need to release it immediately, you can study the source code or ask me questions. - -该项目为我自用项目抽离出来,我的发布策略是定时发布(提前一天发布),故发布部分采用的事件均为第二天的时间 - -如果你有需求立即发布,可自行研究源码或者向我提问 - - -# 核心模块解释 - -### 1. 视频文件准备(video prepare) -filepath 本地视频目录,目录包含(filepath Local video directory containing) - -- 视频文件(video files) -- 视频meta信息txt文件(video meta information txt file) - -举例(for example): - -file:2023-08-24_16-29-52 - 这位勇敢的男子为了心爱之人每天坚守 .mp4 - -meta_file:2023-08-24_16-29-52 - 这位勇敢的男子为了心爱之人每天坚守 .txt - -meta_file 内容(content): -```angular2html -这位勇敢的男子为了心爱之人每天坚守 🥺❤️‍🩹 -#坚持不懈 #爱情执着 #奋斗使者 #短视频 -``` - -### Usage -1. 设置conf 文件中的 `LOCAL_CHROME_PATH`(在douyin、视频号 tiktok可能出现chromium 不兼容的各种问题,建议设置本地的chrome) -2. 这里分割出来3条路 - - 可自行研究源码,免费、任意 穿插在自己的项目中 - - 可参考下面的各个平台的使用指南,`examples`文件夹中有各种示例代码 - - 使用cli 简易使用(支持tiktok douyin 视频号) - -#### cli 用法 -```python -python cli_main.py [options] -``` -查看详细的参数说明使用: -```python -python cli_main.py -h -``` -```python -usage: cli_main.py [-h] platform account_name action ... - -Upload video to multiple social-media. - -positional arguments: - platform Choose social-media platform: douyin tencent tiktok kuaishou - account_name Account name for the platform: xiaoA - action Choose action - upload upload operation - login login operation - watch watch operation - -options: - -h, --help show this help message and exit - -``` -示例 -```python -python cli_main.py douyin test login -douyin平台,账号名为test,动作为login - -python cli_main.py douyin test upload "C:\Users\duperdog\Videos\2023-11-07_05-27-44 - 这位少女如梦中仙... .mp4" -pt 0 -douyin平台, 账号名为test, 动作为upload, 视频文件(需对应的meta文件,详见上), 发布方式(pt):0 立即发布 - -python cli_main.py douyin test upload "C:\Users\superdog\Videos\2023-11-07_05-27-44 - 这位少女如梦中仙... .mp4" -pt 1 -t "2024-6-14 12:00" -douyin平台, 账号名为test, 动作为upload, 视频文件, 发布方式(pt):1 定时发布, 发布时间(t): 2024-6-14 12:00 -``` - ---- - -## 各平台详细说明 - -### 1. 抖音 -douyin show - -使用playwright模拟浏览器行为 -> 抖音前端实现,诸多css class id 均为随机数,故项目中locator多采用相对定位,而非固定定位 -1. 准备视频目录结构 -2. cookie获取:get_douyin_cookie.py 扫码登录 -3. 上传视频:upload_video_to_douyin.py - - - -其他部分解释: -``` -douyin_setup handle 参数为True,为手动获取cookie False 则是校验cookie有效性 - -generate_schedule_time_next_day 默认从第二天开始(此举为避免选择时间的意外错误) -参数解释: -- total_videos 本次上传视频个数 -- videos_per_day 每日上传视频数量 -- daily_times 视频发布时间 默认6、11、14、16、22点 -- start_days 从第N天开始 -``` - -> 2023年12月15日补充:使用chromium可能会出现无法识别视频封面图片的情况 -> 解决方案: -> 1. 下载chrome -> 2. 找到chrome的安装目录 -> 3. 将本地chrome 路径conf文件中 LOCAL_CHROME_PATH = "xxx/xxx/chrome.exe" - - -参考项目: -- https://github.com/wanghaisheng/tiktoka-studio-uploader -- https://github.com/Superheroff/douyin_uplod -- https://github.com/lishang520/DouYin-Auto-Upload.git - ---- - -### 2. 视频号 -使用playwright模拟浏览器行为 -1. 准备视频目录结构 -2. cookie获取:get_tencent_cookie.py 扫码登录 -3. 上传视频:upload_video_to_tencent.py - - - -其他部分解释: -``` -参考上面douyin_setup 配置 -``` - -> 视频号使用chromium会出现不支持上传视频,需要自行指定本地浏览器 -> 解决方案: -> 1. 下载chrome -> 2. 找到chrome的安装目录 -> 3. 将本地chrome 路径conf文件中 LOCAL_CHROME_PATH = "xxx/xxx/chrome.exe" - ---- - - -### 3. 小红书 -该实现,借助ReaJason的[xhs](https://github.com/ReaJason/xhs),再次感谢。 - -1. 目录结构同上 -2. cookie获取,可使用chrome插件:EditThisCookie -- 设置导出格式 -![Alt text](media/20231009111131.png) -- 导出 -![Alt text](media/20231009111214.png) -3. 黏贴至 uploader/xhs_uploader/accounts.ini文件中 - - -#### 解释与注意事项: - -``` -xhs签名方式分为两种: -- 本地签名 sign_locl (默认) -- 自建签名服务 sign - -测试下来发现本地签名,在实际多账号情况下会存在问题 -故如果你有多账号分发,建议采用自建签名服务(todo 上传docker配置) -``` -##### 疑难杂症 -遇到签名问题,可尝试更新 "utils/stealth.min.js"文件 -https://github.com/requireCool/stealth.min.js - -24.4.10 大多数人小红书失败的原因在于你的cookie有问题,请参照你cookie是不是如我这样 -![正确的cookie](media/xhs_error_cookie.png) - -关于xhs部分可参考作者: https://reajason.github.io/xhs/basic - -##### todo -- 扫码登录方式(实验下来发现与浏览器获取的存在区别,会有问题,未来再研究) - - -参考项目: -- https://github.com/ReaJason/xhs - ---- - -### 4. bilibili -该实现,借助biliup的[biliup-rs](https://github.com/biliup/biliup-rs),再次感谢。 -1. 准备视频目录结构 -2. cookie获取:`biliup.exe -u account.json login` 选项你喜欢的登录方式 -![登录方式](media/get_bili_cookie.png) -3. 上传视频:upload_video_to_bilibili.py - -#### 解释与注意事项: - -``` -bilibili cookie 长期有效(至少我运行2年以来是这样的) -其他平台 诸如linux mac 服务器运行 请自行研究 -``` -参考项目: -- https://github.com/biliup/biliup-rs - ---- - -### 5. tiktok -使用playwright模拟浏览器行为(Simulating Browser Behavior with playwright) -1. 准备视频目录结构(Prepare the video directory structure) -2. 将本地chrome路径配置到conf文件中 LOCAL_CHROME_PATH = "xxx/xxx/chrome.exe"(Configure the local chrome path to the conf file) -3. cookie获取(generate your cookie):get_tk_cookie.py -![get tiktok cookie](media/tk_login.png) -4. 上传视频(upload video):upload_video_to_tiktok.py - -24.7.3 update local chrome support: -- you can upload your custom thumbnail(place `png` file at the path of `videos`) -- if not, the system will take the first frame of the video as the thumbnail. -- chrome drive can't login by gmail oauth. the google don't support it. (if you want login by google oauth you can use the `tk_uploader/main.py` old firefox.) -- before the upload process change the language to eng. - -其他部分解释: -``` -参考上面douyin_setup 配置 -``` - -other part explain(for eng friends): -``` -tiktok_setup handle parameter is True to get cookie manually False to check cookie validity - -generate_schedule_time_next_day defaults to start on the next day (this is to avoid accidental time selection errors) -Parameter explanation: -- total_videos Number of videos uploaded this time -- videos_per_day Number of videos uploaded per day -- daily_times The video posting times are 6, 11, 14, 16, 22 by default. -- start_days Starts on the nth day. -``` -参考项目: -- https://github.com/wkaisertexas/tiktok-uploader - ---- - -### 其余部分(todo) -整理后上传 - ---- - -## 其他优秀项目 -- https://github.com/KLordy/auto_publish_videos - - db支持 - - 定期自动发布 - - cookie db管理 - - 视频去重 - - 消息推送 - - -## 🐾Communicate -[Donate as u like](https://www.buymeacoffee.com/hysn2001m) - -如果你也是`一个人`,喜欢`折腾`, 想要在如此恶劣的大环境寻找突破 - -希望探索 #技术变现 #AI创业 #跨境航海 #自动化上传 #自动化视频 #技术探讨 - -可以来群里和大家交流 - -|![Nas](media/mp.jpg)|![赞赏](media/QR.png)| -|:-:|:-:| -|后台回复 `上传` 加群交流|如果你觉得有用| - - -## Star History -> 如果这个项目对你有帮助,⭐以表示支持 - -[![Star History Chart](https://api.star-history.com/svg?repos=dreammis/social-auto-upload&type=Date)](https://star-history.com/#dreammis/social-auto-upload&Date) \ No newline at end of file +# social-auto-upload +social-auto-upload 该项目旨在自动化发布视频到各个社交媒体平台 + +social-auto-upload This project aims to automate the posting of videos to various social media platforms. + +tiktok show + +## 💡Feature +- 中国主流社交媒体平台: + - [x] 抖音 + - [x] 视频号 + - [x] bilibili + - [x] 小红书 + - [x] 快手 + - [ ] 百家号 + +- 部分国外社交媒体: + - [x] tiktok + - [ ] youtube + +--- +- [ ] 易用版本(支持非开发人员使用):Gui or Cli +- [ ] API 封装 + - [ ] Docker 部署 +- [ ] 自动化上传(schedule) +- [x] 定时上传(cron) +- [ ] cookie 管理 +- [ ] 国外平台proxy 设置 +- [ ] 多线程上传 +- [ ] slack 推送 + + +# 💾Installation +``` +pip install -r requirements.txt +playwright install chromium firefox +``` +非程序员,[新手级教程](https://juejin.cn/post/7372114027840208911) + +# 🐇 About +The project for my own project extracted, my release strategy is timed release (released a day in advance), so the release part of the event are used for the next day time! + +If you need to release it immediately, you can study the source code or ask me questions. + +该项目为我自用项目抽离出来,我的发布策略是定时发布(提前一天发布),故发布部分采用的事件均为第二天的时间 + +如果你有需求立即发布,可自行研究源码或者向我提问 + + +# 核心模块解释 + +### 1. 视频文件准备(video prepare) +filepath 本地视频目录,目录包含(filepath Local video directory containing) + +- 视频文件(video files) +- 视频meta信息txt文件(video meta information txt file) + +举例(for example): + +file:2023-08-24_16-29-52 - 这位勇敢的男子为了心爱之人每天坚守 .mp4 + +meta_file:2023-08-24_16-29-52 - 这位勇敢的男子为了心爱之人每天坚守 .txt + +meta_file 内容(content): +```angular2html +这位勇敢的男子为了心爱之人每天坚守 🥺❤️‍🩹 +#坚持不懈 #爱情执着 #奋斗使者 #短视频 +``` + +### Usage +1. 设置conf 文件中的 `LOCAL_CHROME_PATH`(在douyin、视频号 tiktok可能出现chromium 不兼容的各种问题,建议设置本地的chrome) +2. 这里分割出来3条路 + - 可自行研究源码,免费、任意 穿插在自己的项目中 + - 可参考下面的各个平台的使用指南,`examples`文件夹中有各种示例代码 + - 使用cli 简易使用(支持tiktok douyin 视频号) + +#### cli 用法 +```python +python cli_main.py [options] +``` +查看详细的参数说明使用: +```python +python cli_main.py -h +``` +```python +usage: cli_main.py [-h] platform account_name action ... + +Upload video to multiple social-media. + +positional arguments: + platform Choose social-media platform: douyin tencent tiktok kuaishou + account_name Account name for the platform: xiaoA + action Choose action + upload upload operation + login login operation + watch watch operation + +options: + -h, --help show this help message and exit + +``` +示例 +```python +python cli_main.py douyin test login +douyin平台,账号名为test,动作为login + +python cli_main.py douyin test upload "C:\Users\duperdog\Videos\2023-11-07_05-27-44 - 这位少女如梦中仙... .mp4" -pt 0 +douyin平台, 账号名为test, 动作为upload, 视频文件(需对应的meta文件,详见上), 发布方式(pt):0 立即发布 + +python cli_main.py douyin test upload "C:\Users\superdog\Videos\2023-11-07_05-27-44 - 这位少女如梦中仙... .mp4" -pt 1 -t "2024-6-14 12:00" +douyin平台, 账号名为test, 动作为upload, 视频文件, 发布方式(pt):1 定时发布, 发布时间(t): 2024-6-14 12:00 +``` + +--- + +## 各平台详细说明 + +### 1. 抖音 +douyin show + +使用playwright模拟浏览器行为 +> 抖音前端实现,诸多css class id 均为随机数,故项目中locator多采用相对定位,而非固定定位 +1. 准备视频目录结构 +2. cookie获取:get_douyin_cookie.py 扫码登录 +3. 上传视频:upload_video_to_douyin.py + + + +其他部分解释: +``` +douyin_setup handle 参数为True,为手动获取cookie False 则是校验cookie有效性 + +generate_schedule_time_next_day 默认从第二天开始(此举为避免选择时间的意外错误) +参数解释: +- total_videos 本次上传视频个数 +- videos_per_day 每日上传视频数量 +- daily_times 视频发布时间 默认6、11、14、16、22点 +- start_days 从第N天开始 +``` + +> 2023年12月15日补充:使用chromium可能会出现无法识别视频封面图片的情况 +> 解决方案: +> 1. 下载chrome +> 2. 找到chrome的安装目录 +> 3. 将本地chrome 路径conf文件中 LOCAL_CHROME_PATH = "xxx/xxx/chrome.exe" + + +参考项目: +- https://github.com/wanghaisheng/tiktoka-studio-uploader +- https://github.com/Superheroff/douyin_uplod +- https://github.com/lishang520/DouYin-Auto-Upload.git + +--- + +### 2. 视频号 +使用playwright模拟浏览器行为 +1. 准备视频目录结构 +2. cookie获取:get_tencent_cookie.py 扫码登录 +3. 上传视频:upload_video_to_tencent.py + + + +其他部分解释: +``` +参考上面douyin_setup 配置 +``` + +> 视频号使用chromium会出现不支持上传视频,需要自行指定本地浏览器 +> 解决方案: +> 1. 下载chrome +> 2. 找到chrome的安装目录 +> 3. 将本地chrome 路径conf文件中 LOCAL_CHROME_PATH = "xxx/xxx/chrome.exe" + +--- + + +### 3. 小红书 +该实现,借助ReaJason的[xhs](https://github.com/ReaJason/xhs),再次感谢。 + +1. 目录结构同上 +2. cookie获取,可使用chrome插件:EditThisCookie +- 设置导出格式 +![Alt text](media/20231009111131.png) +- 导出 +![Alt text](media/20231009111214.png) +3. 黏贴至 uploader/xhs_uploader/accounts.ini文件中 + + +#### 解释与注意事项: + +``` +xhs签名方式分为两种: +- 本地签名 sign_locl (默认) +- 自建签名服务 sign + +测试下来发现本地签名,在实际多账号情况下会存在问题 +故如果你有多账号分发,建议采用自建签名服务(todo 上传docker配置) +``` +##### 疑难杂症 +遇到签名问题,可尝试更新 "utils/stealth.min.js"文件 +https://github.com/requireCool/stealth.min.js + +24.4.10 大多数人小红书失败的原因在于你的cookie有问题,请参照你cookie是不是如我这样 +![正确的cookie](media/xhs_error_cookie.png) + +关于xhs部分可参考作者: https://reajason.github.io/xhs/basic + +##### todo +- 扫码登录方式(实验下来发现与浏览器获取的存在区别,会有问题,未来再研究) + + +参考项目: +- https://github.com/ReaJason/xhs + +--- + +### 4. bilibili +该实现,借助biliup的[biliup-rs](https://github.com/biliup/biliup-rs),再次感谢。 +1. 准备视频目录结构 +2. cookie获取:`biliup.exe -u account.json login` 选项你喜欢的登录方式 +![登录方式](media/get_bili_cookie.png) +3. 上传视频:upload_video_to_bilibili.py + +#### 解释与注意事项: + +``` +bilibili cookie 长期有效(至少我运行2年以来是这样的) +其他平台 诸如linux mac 服务器运行 请自行研究 +``` +参考项目: +- https://github.com/biliup/biliup-rs + +--- + +### 5. tiktok +使用playwright模拟浏览器行为(Simulating Browser Behavior with playwright) +1. 准备视频目录结构(Prepare the video directory structure) +2. 将本地chrome路径配置到conf文件中 LOCAL_CHROME_PATH = "xxx/xxx/chrome.exe"(Configure the local chrome path to the conf file) +3. cookie获取(generate your cookie):get_tk_cookie.py +![get tiktok cookie](media/tk_login.png) +4. 上传视频(upload video):upload_video_to_tiktok.py + +24.7.3 update local chrome support: +- you can upload your custom thumbnail(place `png` file at the path of `videos`) +- if not, the system will take the first frame of the video as the thumbnail. +- chrome drive can't login by gmail oauth. the google don't support it. (if you want login by google oauth you can use the `tk_uploader/main.py` old firefox.) +- before the upload process change the language to eng. + +其他部分解释: +``` +参考上面douyin_setup 配置 +``` + +other part explain(for eng friends): +``` +tiktok_setup handle parameter is True to get cookie manually False to check cookie validity + +generate_schedule_time_next_day defaults to start on the next day (this is to avoid accidental time selection errors) +Parameter explanation: +- total_videos Number of videos uploaded this time +- videos_per_day Number of videos uploaded per day +- daily_times The video posting times are 6, 11, 14, 16, 22 by default. +- start_days Starts on the nth day. +``` +参考项目: +- https://github.com/wkaisertexas/tiktok-uploader + +--- + +### 其余部分(todo) +整理后上传 + +--- + +## 其他优秀项目 +- https://github.com/KLordy/auto_publish_videos + - db支持 + - 定期自动发布 + - cookie db管理 + - 视频去重 + - 消息推送 + + +## 🐾Communicate +[Donate as u like](https://www.buymeacoffee.com/hysn2001m) + +如果你也是`一个人`,喜欢`折腾`, 想要在如此恶劣的大环境寻找突破 + +希望探索 #技术变现 #AI创业 #跨境航海 #自动化上传 #自动化视频 #技术探讨 + +可以来群里和大家交流 + +|![Nas](media/mp.jpg)|![赞赏](media/QR.png)| +|:-:|:-:| +|后台回复 `上传` 加群交流|如果你觉得有用| + + +## Star History +> 如果这个项目对你有帮助,⭐以表示支持 + +[![Star History Chart](https://api.star-history.com/svg?repos=dreammis/social-auto-upload&type=Date)](https://star-history.com/#dreammis/social-auto-upload&Date) + +# 视频管理工具 + +一个简单高效的视频文件管理工具,帮助你管理多平台视频发布状态。 + +## 项目愿景 + +帮助内容创作者高效管理视频文件和多平台发布状态,提高工作效率。 + +## 快速开始 + +1. 克隆项目 +```bash +git clone [项目地址] +cd video-manager +``` + +2. 创建虚拟环境 +```bash +python -m venv venv +source venv/bin/activate # Linux/Mac +# 或 +.\venv\Scripts\activate # Windows +``` + +3. 安装依赖 +```bash +pip install -r requirements.txt +``` + +4. 配置环境变量 +```bash +cp .env.example .env +# 编辑 .env 文件,设置必要的配置 +``` + +5. 运行程序 +```bash +streamlit run src/frontend/app.py +``` + +## 功能地图 + +``` +视频管理工具 +├── 文件管理 +│ ├── 视频扫描 +│ ├── 元数据管理 +│ └── 文件监控 +├── 发布管理 +│ ├── 多平台配置 +│ ├── 发布状态跟踪 +│ └── 定时发布 +└── 数据统计 + ├── 发布统计 + └── 状态分析 +``` + +## 典型使用场景 + +1. **批量视频管理** + - 扫描指定目录下的所有视频文件 + - 自动生成和管理 info.json 文件 + - 查看和编辑视频元数据 + +2. **多平台发布跟踪** + - 管理不同平台的发布状态 + - 设置发布时间和标签 + - 记录发布URL和笔记 + +3. **数据统计分析** + - 查看发布统计数据 + - 分析发布效果 + - 导出数据报告 + +## 技术栈 + +- Frontend: Streamlit +- Backend: Python 3.10+ +- Data Validation: Pydantic +- File Monitoring: Watchdog + +## 开发规范 + +- 使用 mypy 进行静态类型检查 +- 使用 pytest 进行单元测试 +- 遵循 PEP 8 编码规范 +- 保持代码覆盖率在 80% 以上 \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 00000000..12c032b6 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +# 空文件,标记这是一个Python包 \ No newline at end of file diff --git a/ai_module.py b/ai_module.py new file mode 100644 index 00000000..5a40c139 --- /dev/null +++ b/ai_module.py @@ -0,0 +1,42 @@ +""" +AI功能集成模块 +包含自动生成标签、智能标题优化等AI功能 +""" + +import openai # 需要安装openai库 + +def generate_hashtags(text: str, max_tags: int = 5) -> list: + """ + 模拟AI生成标签功能(基础版) + + 参数: + text - 输入文本 + max_tags - 最大返回标签数 + + 返回值: + 标签列表(示例数据) + """ + # 基础实现示例 + sample_tags = ["视频", "精选", "热门", "推荐", "生活"] + return sample_tags[:max_tags] + +def generate_hashtags_ai(text: str, max_tags: int = 5) -> list: + """ + 使用ChatGPT生成智能标签 + + 需要设置环境变量: + export OPENAI_API_KEY='your-api-key' + """ + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[{ + "role": "user", + "content": f"为以下内容生成{max_tags}个中文标签,用逗号分隔:{text}" + }] + ) + return response.choices[0].message.content.split(",") + +# 后续可扩展其他AI功能: +# - 智能标题优化 +# - 内容合规检查 +# - 自动生成视频描述 \ No newline at end of file diff --git a/clean_html.py b/clean_html.py new file mode 100644 index 00000000..2978c725 --- /dev/null +++ b/clean_html.py @@ -0,0 +1,31 @@ +from bs4 import BeautifulSoup +import sys + +def clean_html(input_file, output_file): + # 读取原始文件 + with open(input_file, 'r', encoding='utf-8') as f: + content = f.read() + + # 使用BeautifulSoup解析HTML + soup = BeautifulSoup(content, 'html.parser') + + # 移除所有style标签 + for style in soup.find_all('style'): + style.decompose() + + # 移除所有class和style属性 + for tag in soup.find_all(True): + if tag.has_attr('class'): + del tag['class'] + if tag.has_attr('style'): + del tag['style'] + + # 保存清理后的文件 + with open(output_file, 'w', encoding='utf-8') as f: + f.write(str(soup.prettify())) + +if __name__ == '__main__': + input_file = 'error_logs/cookie_gen_20250220_200544.html' + output_file = 'error_logs/cookie_gen_20250220_200544_clean.html' + clean_html(input_file, output_file) + print("HTML文件已清理完成!") \ No newline at end of file diff --git a/cli_main.py b/cli_main.py index fc35eb7f..372bc864 100644 --- a/cli_main.py +++ b/cli_main.py @@ -1,101 +1,101 @@ -import argparse -import asyncio -from datetime import datetime -from os.path import exists -from pathlib import Path - -from conf import BASE_DIR -from uploader.douyin_uploader.main import douyin_setup, DouYinVideo -from uploader.ks_uploader.main import ks_setup, KSVideo -from uploader.tencent_uploader.main import weixin_setup, TencentVideo -from uploader.tk_uploader.main_chrome import tiktok_setup, TiktokVideo -from utils.base_social_media import get_supported_social_media, get_cli_action, SOCIAL_MEDIA_DOUYIN, \ - SOCIAL_MEDIA_TENCENT, SOCIAL_MEDIA_TIKTOK, SOCIAL_MEDIA_KUAISHOU -from utils.constant import TencentZoneTypes -from utils.files_times import get_title_and_hashtags - - -def parse_schedule(schedule_raw): - if schedule_raw: - schedule = datetime.strptime(schedule_raw, '%Y-%m-%d %H:%M') - else: - schedule = None - return schedule - - -async def main(): - # 主解析器 - parser = argparse.ArgumentParser(description="Upload video to multiple social-media.") - parser.add_argument("platform", metavar='platform', choices=get_supported_social_media(), help="Choose social-media platform: douyin tencent tiktok kuaishou") - - parser.add_argument("account_name", type=str, help="Account name for the platform: xiaoA") - subparsers = parser.add_subparsers(dest="action", metavar='action', help="Choose action", required=True) - - actions = get_cli_action() - for action in actions: - action_parser = subparsers.add_parser(action, help=f'{action} operation') - if action == 'login': - # Login 不需要额外参数 - continue - elif action == 'upload': - action_parser.add_argument("video_file", help="Path to the Video file") - action_parser.add_argument("-pt", "--publish_type", type=int, choices=[0, 1], - help="0 for immediate, 1 for scheduled", default=0) - action_parser.add_argument('-t', '--schedule', help='Schedule UTC time in %Y-%m-%d %H:%M format') - - # 解析命令行参数 - args = parser.parse_args() - # 参数校验 - if args.action == 'upload': - if not exists(args.video_file): - raise FileNotFoundError(f'Could not find the video file at {args["video_file"]}') - if args.publish_type == 1 and not args.schedule: - parser.error("The schedule must must be specified for scheduled publishing.") - - account_file = Path(BASE_DIR / "cookies" / f"{args.platform}_{args.account_name}.json") - account_file.parent.mkdir(exist_ok=True) - - # 根据 action 处理不同的逻辑 - if args.action == 'login': - print(f"Logging in with account {args.account_name} on platform {args.platform}") - if args.platform == SOCIAL_MEDIA_DOUYIN: - await douyin_setup(str(account_file), handle=True) - elif args.platform == SOCIAL_MEDIA_TIKTOK: - await tiktok_setup(str(account_file), handle=True) - elif args.platform == SOCIAL_MEDIA_TENCENT: - await weixin_setup(str(account_file), handle=True) - elif args.platform == SOCIAL_MEDIA_KUAISHOU: - await ks_setup(str(account_file), handle=True) - elif args.action == 'upload': - title, tags = get_title_and_hashtags(args.video_file) - video_file = args.video_file - - if args.publish_type == 0: - print("Uploading immediately...") - publish_date = 0 - else: - print("Scheduling videos...") - publish_date = parse_schedule(args.schedule) - - if args.platform == SOCIAL_MEDIA_DOUYIN: - await douyin_setup(account_file, handle=False) - app = DouYinVideo(title, video_file, tags, publish_date, account_file) - elif args.platform == SOCIAL_MEDIA_TIKTOK: - await tiktok_setup(account_file, handle=True) - app = TiktokVideo(title, video_file, tags, publish_date, account_file) - elif args.platform == SOCIAL_MEDIA_TENCENT: - await weixin_setup(account_file, handle=True) - category = TencentZoneTypes.LIFESTYLE.value # 标记原创需要否则不需要传 - app = TencentVideo(title, video_file, tags, publish_date, account_file, category) - elif args.platform == SOCIAL_MEDIA_KUAISHOU: - await ks_setup(account_file, handle=True) - app = KSVideo(title, video_file, tags, publish_date, account_file) - else: - print("Wrong platform, please check your input") - exit() - - await app.main() - - -if __name__ == "__main__": - asyncio.run(main()) +import argparse +import asyncio +from datetime import datetime +from os.path import exists +from pathlib import Path + +from conf import BASE_DIR +from uploader.douyin_uploader.main import douyin_setup, DouYinVideo +from uploader.ks_uploader.main import ks_setup, KSVideo +from uploader.tencent_uploader.main import weixin_setup, TencentVideo +from uploader.tk_uploader.main_chrome import tiktok_setup, TiktokVideo +from utils.base_social_media import get_supported_social_media, get_cli_action, SOCIAL_MEDIA_DOUYIN, \ + SOCIAL_MEDIA_TENCENT, SOCIAL_MEDIA_TIKTOK, SOCIAL_MEDIA_KUAISHOU +from utils.constant import TencentZoneTypes +from utils.files_times import get_title_and_hashtags + + +def parse_schedule(schedule_raw): + if schedule_raw: + schedule = datetime.strptime(schedule_raw, '%Y-%m-%d %H:%M') + else: + schedule = None + return schedule + + +async def main(): + # 主解析器 + parser = argparse.ArgumentParser(description="Upload video to multiple social-media.") + parser.add_argument("platform", metavar='platform', choices=get_supported_social_media(), help="Choose social-media platform: douyin tencent tiktok kuaishou") + + parser.add_argument("account_name", type=str, help="Account name for the platform: xiaoA") + subparsers = parser.add_subparsers(dest="action", metavar='action', help="Choose action", required=True) + + actions = get_cli_action() + for action in actions: + action_parser = subparsers.add_parser(action, help=f'{action} operation') + if action == 'login': + # Login 不需要额外参数 + continue + elif action == 'upload': + action_parser.add_argument("video_file", help="Path to the Video file") + action_parser.add_argument("-pt", "--publish_type", type=int, choices=[0, 1], + help="0 for immediate, 1 for scheduled", default=0) + action_parser.add_argument('-t', '--schedule', help='Schedule UTC time in %Y-%m-%d %H:%M format') + + # 解析命令行参数 + args = parser.parse_args() + # 参数校验 + if args.action == 'upload': + if not exists(args.video_file): + raise FileNotFoundError(f'Could not find the video file at {args["video_file"]}') + if args.publish_type == 1 and not args.schedule: + parser.error("The schedule must must be specified for scheduled publishing.") + + account_file = Path(BASE_DIR / "cookies" / f"{args.platform}_{args.account_name}.json") + account_file.parent.mkdir(exist_ok=True) + + # 根据 action 处理不同的逻辑 + if args.action == 'login': + print(f"Logging in with account {args.account_name} on platform {args.platform}") + if args.platform == SOCIAL_MEDIA_DOUYIN: + await douyin_setup(str(account_file), handle=True) + elif args.platform == SOCIAL_MEDIA_TIKTOK: + await tiktok_setup(str(account_file), handle=True) + elif args.platform == SOCIAL_MEDIA_TENCENT: + await weixin_setup(str(account_file), handle=True) + elif args.platform == SOCIAL_MEDIA_KUAISHOU: + await ks_setup(str(account_file), handle=True) + elif args.action == 'upload': + title, tags = get_title_and_hashtags(args.video_file) + video_file = args.video_file + + if args.publish_type == 0: + print("Uploading immediately...") + publish_date = 0 + else: + print("Scheduling videos...") + publish_date = parse_schedule(args.schedule) + + if args.platform == SOCIAL_MEDIA_DOUYIN: + await douyin_setup(account_file, handle=False) + app = DouYinVideo(title, video_file, tags, publish_date, account_file) + elif args.platform == SOCIAL_MEDIA_TIKTOK: + await tiktok_setup(account_file, handle=True) + app = TiktokVideo(title, video_file, tags, publish_date, account_file) + elif args.platform == SOCIAL_MEDIA_TENCENT: + await weixin_setup(account_file, handle=True) + category = TencentZoneTypes.LIFESTYLE.value # 标记原创需要否则不需要传 + app = TencentVideo(title, video_file, tags, publish_date, account_file, category) + elif args.platform == SOCIAL_MEDIA_KUAISHOU: + await ks_setup(account_file, handle=True) + app = KSVideo(title, video_file, tags, publish_date, account_file) + else: + print("Wrong platform, please check your input") + exit() + + await app.main() + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/conf.py b/conf.py index b653ae4f..d65033c7 100644 --- a/conf.py +++ b/conf.py @@ -1,5 +1,20 @@ -from pathlib import Path - -BASE_DIR = Path(__file__).parent.resolve() -XHS_SERVER = "http://127.0.0.1:11901" -LOCAL_CHROME_PATH = "" # change me necessary! for example C:/Program Files/Google/Chrome/Application/chrome.exe +from pathlib import Path + +# 获取项目根目录 +BASE_DIR = Path(__file__).parent.absolute() +XHS_SERVER = "http://127.0.0.1:5005" +# 使用原始字符串(r)或正斜杠(/)来避免转义序列问题 +LOCAL_CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe" # 或使用 "C:/Program Files/Google/Chrome/Application/chrome.exe" + +# 可扩展为: +class TencentConfig: + VIDEO_SETTINGS = { + 'max_retries': 5, + 'allowed_formats': ['.mp4', '.mov'], + 'default_category': '生活' + } + + SCHEDULING_RULES = { + 'earliest_time': '06:00', + 'latest_time': '23:00' + } diff --git a/data/info.json b/data/info.json new file mode 100644 index 00000000..82a8a2a6 --- /dev/null +++ b/data/info.json @@ -0,0 +1,11 @@ +[ + { + "tencent":{ + "title": "人教人教不会,事教人一教就会。", + "tags": ["情感共鸣", "向阳有米", "女性成长", "girlstalk", "正能量", "认知", "女性智慧"], + "friends": ["微信创作者", "微信创作者助手", "向阳也有米"], + "publish_date": "2025-02-07 21:00:00" + } + } + +] \ No newline at end of file diff --git a/data/social_media.db b/data/social_media.db new file mode 100644 index 00000000..553dd894 Binary files /dev/null and b/data/social_media.db differ diff --git a/doc/README.MD b/doc/README.MD new file mode 100644 index 00000000..a4d90fe1 --- /dev/null +++ b/doc/README.MD @@ -0,0 +1,4 @@ +视频号定时发布,最多支持一个月 +抖音定时发布,支持最多14天 +快手定时发布,支持最多14天 +小红书定时发布,支持最多14天 diff --git a/doc/prompt/git_commit_expert_prompt.md b/doc/prompt/git_commit_expert_prompt.md new file mode 100644 index 00000000..53be9ccc --- /dev/null +++ b/doc/prompt/git_commit_expert_prompt.md @@ -0,0 +1,134 @@ +# Role: Git Commit Message专家 + +## Profile +- Git提交信息规范专家 +- 代码变更描述优化师 +- Emoji语义专家 +- 版本控制最佳实践布道者 + +## Description +- 精通Conventional Commits规范 +- 擅长编写清晰、简洁的提交信息 +- 深入理解Git工作流程 +- 熟练运用Emoji增强提交信息的可读性 +- 能够准确表达代码变更的意图和影响 +- 保持提交历史的整洁和连贯性 + +## Rules +### 提交信息格式 +- 格式:` (): <中文描述>` +- 标题行不超过72个字符(约24个汉字) +- 正文每行不超过100个字符(约33个汉字) +- 使用现在时态描述变更 +- 中文描述简洁明了 +- 标题行结尾不加句号 + +### Emoji规范 +#### 主要类别 +- ✨ `:sparkles:` - 新功能 +- 🐛 `:bug:` - 修复bug +- 📝 `:memo:` - 文档更新 +- 💄 `:lipstick:` - UI/样式更新 +- ♻️ `:recycle:` - 代码重构 +- ⚡️ `:zap:` - 性能优化 +- 🔧 `:wrench:` - 配置修改 +- 🔨 `:hammer:` - 开发工具 +- 🚀 `:rocket:` - 部署相关 + +#### 次要类别 +- 🎨 `:art:` - 代码格式 +- 🔥 `:fire:` - 删除代码 +- ✅ `:white_check_mark:` - 添加测试 +- 🔒 `:lock:` - 安全相关 +- 👷 `:construction_worker:` - CI相关 +- 📦 `:package:` - 依赖更新 + +### 类型规范 +- feat: 新功能 +- fix: 修复 +- docs: 文档 +- style: 格式 +- refactor: 重构 +- perf: 性能 +- test: 测试 +- build: 构建 +- ci: CI配置 +- chore: 其他更改 + +## Workflow +1. 分析变更内容 + - 确定变更类型 + - 识别影响范围 + - 选择合适emoji + +2. 构建提交信息 + - 编写简洁标题 + - 补充必要说明 + - 关联相关议题 + +3. 信息审查 + - 检查格式规范 + - 验证信息完整性 + - 确保清晰可读 + +## Commands +/commit - 生成完整提交信息 +/emoji - 查询emoji用法 +/format - 格式化提交信息 +/scope - 确定影响范围 +/revert - 生成回滚提交 + +## Examples +### 1. 新功能提交 +``` +✨ feat(用户系统): 添加谷歌账号登录功能 + +- 实现谷歌OAuth2.0认证流程 +- 添加用户资料同步功能 +- 更新登录界面,支持社交账号登录选项 + +关联问题: #123 +``` + +### 2. Bug修复 +``` +🐛 fix(接口): 修复搜索接口空响应处理问题 + +- 修复搜索无结果时应用崩溃的问题 +- 添加空结果状态的错误提示 +- 优化错误处理逻辑 + +修复: #456 +``` + +### 3. 文档更新 +``` +📝 docs(说明文档): 更新安装指南 + +- 添加Docker环境配置说明 +- 更新环境变量配置表格 +- 修复API文档中的失效链接 +``` + +### 4. 性能优化 +``` +⚡️ perf(核心模块): 优化图片加载性能 + +- 实现图片库懒加载功能 +- 添加图片压缩处理流程 +- 初始包体积减少30% + +性能提升指标: +- 加载时间: 2.3秒 -> 0.8秒 +- 首次渲染: 1.2秒 -> 0.5秒 +``` + +## Notes +- 每个提交只做一件事 +- 保持提交粒度适中 +- 使用中文编写提交信息 +- 关联相关的Issue/PR +- 在正文中说明重要的副作用 +- 记录性能改进的具体数据 +- type和scope使用英文,其他描述使用中文 +- 分支名和命令行操作保持使用英文 \ No newline at end of file diff --git a/doc/prompt/gradio_expert_prompt.md b/doc/prompt/gradio_expert_prompt.md new file mode 100644 index 00000000..0376fa46 --- /dev/null +++ b/doc/prompt/gradio_expert_prompt.md @@ -0,0 +1,210 @@ +# Role: Gradio Web开发专家 (v5.16+) + +## Profile +我是一位专注于Gradio 5.16+版本框架的Python Web应用开发专家,擅长构建直观、高效且用户友好的机器学习模型界面。我将帮助你设计和实现符合Gradio最新版本最佳实践的Web应用。 + +## Description +- 精通Gradio 5.16+全系列组件和API的使用 +- 深度理解Gradio的界面设计原则和性能优化策略 +- 擅长构建响应式、美观的用户界面 +- 熟练掌握Gradio与各类机器学习框架的集成 +- 具备Web应用性能调优和部署经验 +- 熟悉Gradio 5.16+新特性: + * 新版Chatbot组件的message格式 + * 改进的事件系统和装饰器语法 + * 增强的主题定制能力 + * 优化的文件处理机制 + * 新增的组件属性和方法 + +## Rules +### 版本兼容性规范 +- [强制] 使用Gradio 5.16+版本特性: + * 使用新版事件系统语法 + * 采用最新的组件API + * 遵循新版本的类型提示规范 +- [强制] 依赖管理: + * 在requirements.txt中指定:`gradio>=5.16.0` + * 使用兼容的Python版本(3.8+) + * 确保所有依赖库版本兼容 + +### 界面设计规范 +- [强制] 遵循Gradio的组件设计理念: + * 使用语义化的组件名称 + * 保持界面简洁直观 + * 确保组件间的逻辑关系清晰 +- [推荐] 采用响应式布局: + * 使用gr.Row()和gr.Column()进行灵活布局 + * 适配不同屏幕尺寸 + * 合理使用空间和间距 +- [推荐] 使用新版主题系统: + * 利用gr.themes进行全局样式定制 + * 使用css参数进行精细样式调整 + * 适配深色模式 + +### 代码质量要求 +- [强制] 组件事件处理: + * 使用最新的@gr.on装饰器语法 + * 使用类型注解确保函数参数类型安全 + * 异常处理必须优雅且用户友好 + * 长时间运行的操作需要进度反馈 +- [推荐] 性能优化: + * 使用queue()处理并发请求 + * 合理使用缓存机制 + * 优化资源加载顺序 + * 利用新版本的性能优化特性 + +### 用户体验准则 +- [强制] 交互反馈: + * 所有操作必须有明确的状态提示 + * 错误信息要清晰易懂 + * 提供适当的默认值 +- [推荐] 界面美化: + * 使用一致的颜色主题 + * 添加适当的动画效果 + * 优化移动端体验 + +## Workflow +1. 需求分析 + - 明确应用目标和用户群体 + - 设计交互流程 + - 确定必要的组件 + +2. 界面设计 + - 规划组件布局 + - 设计数据流转 + - 确定样式主题 + +3. 功能实现 + - 编写核心处理函数 + - 实现组件交互逻辑 + - 添加错误处理 + +4. 优化改进 + - 性能测试和优化 + - 用户体验完善 + - 代码重构和文档 + +## Commands +/create - 创建新的Gradio应用模板 +/layout - 生成界面布局建议 +/optimize - 优化现有Gradio应用 +/deploy - 提供部署方案建议 +/examples - 展示常用代码示例 +/version - 检查版本兼容性问题 + +## Examples +### 1. 现代化界面布局(v5.16+) +```python +import gradio as gr +from typing import Literal + +def greet(name: str, style: Literal["formal", "casual"]) -> str: + prefix = "Dear" if style == "formal" else "Hey" + return f"{prefix}, {name}!" + +with gr.Blocks(theme=gr.themes.Soft()) as demo: + with gr.Row(): + with gr.Column(scale=2): + name = gr.Textbox( + label="Your Name", + placeholder="Enter your name...", + show_copy_button=True + ) + style = gr.Radio( + choices=["formal", "casual"], + label="Greeting Style", + value="formal" + ) + with gr.Column(scale=3): + output = gr.Textbox( + label="Greeting", + lines=2, + show_copy_button=True + ) + + gr.on( + triggers=[name.submit, style.change], + fn=greet, + inputs=[name, style], + outputs=output, + api_name="greet" + ) + +demo.launch() +``` + +### 2. 现代化聊天界面(v5.16+) +```python +import gradio as gr + +def chat(message: str, history: list) -> tuple[str, list]: + history.append({"role": "user", "content": message}) + bot_message = f"你说了:{message}" + history.append({"role": "assistant", "content": bot_message}) + return "", history + +with gr.Blocks() as demo: + chatbot = gr.Chatbot( + value=[], + show_copy_button=True, + height=400 + ) + msg = gr.Textbox( + placeholder="输入消息...", + show_label=False, + container=False + ) + clear = gr.ClearButton([msg, chatbot]) + + msg.submit(chat, [msg, chatbot], [msg, chatbot]) + +demo.launch() +``` + +### 3. 文件处理与进度反馈(v5.16+) +```python +import gradio as gr +from typing import Optional +import time + +@gr.on( + inputs=["image", "progress"], + outputs=["gallery", "progress"] +) +def process_image( + image: Optional[str], + progress: gr.Progress +) -> tuple[list[str], None]: + if not image: + return [], None + + progress(0, desc="开始处理...") + time.sleep(1) # 模拟处理过程 + + progress(0.5, desc="处理中...") + time.sleep(1) # 模拟处理过程 + + progress(1, desc="完成!") + return [image], None + +with gr.Blocks() as demo: + with gr.Row(): + with gr.Column(): + image_input = gr.Image(label="输入图片") + process_btn = gr.Button("处理", variant="primary") + + gallery = gr.Gallery( + label="处理结果", + show_label=True, + columns=2, + height="auto" + ) + + process_btn.click( + process_image, + inputs=[image_input, "progress"], + outputs=[gallery, "progress"] + ) + +demo.queue().launch() +``` \ No newline at end of file diff --git a/doc/prompt/langgpt_prompt_assistant.md b/doc/prompt/langgpt_prompt_assistant.md new file mode 100644 index 00000000..a7386eaf --- /dev/null +++ b/doc/prompt/langgpt_prompt_assistant.md @@ -0,0 +1,43 @@ +# Role: LangGPT Prompt 结构化助手 + +## Profile +我是一个专注于 LangGPT 框架的 Prompt 工程师,擅长创建结构化、规范化的 Prompt 模板。我将帮助你设计符合 LangGPT 规范的高质量 Prompt。 + +## Description +- 深入理解 LangGPT 的结构化设计理念 +- 精通 Role、Profile、Description、Rules、Workflow 等核心组件的设计 +- 熟练运用 Initialization、Commands、Examples 等扩展组件 +- 擅长将复杂需求转化为清晰的 LangGPT 结构 +- 能够优化和重构现有 Prompt 以符合 LangGPT 标准 + +## Rules +- 严格遵循 LangGPT 的标准结构和格式规范 +- 每个组件必须明确其功能和定位: + * Role: 清晰定义角色身份 + * Profile: 简洁概括核心能力 + * Description: 详细列举具体特性 + * Rules: 设定明确的行为规范 + * Workflow: 规划清晰的工作流程 +- 扩展组件根据需求合理使用: + * Initialization: 设置初始化状态 + * Commands: 定义交互指令 + * Examples: 提供使用示例 +- 使用规范的 Markdown 格式和缩进 +- 避免涉及具体编程语言或技术实现 +- 保持描述的通用性和可复用性 + +## Workflow +1. 明确 Prompt 的核心目标和应用场景 +2. 设计角色定位和核心特性 +3. 按 LangGPT 结构组织各个组件 +4. 检查格式规范和完整性 +5. 优化措辞和表达方式 +6. 验证结构的合理性 +7. 根据反馈进行调整 + +## Commands +/create - 创建新的 LangGPT 格式 Prompt +/check - 检查 Prompt 结构完整性 +/optimize - 优化现有 Prompt 的结构 +/format - 规范化 Prompt 格式 +/help - 查看 LangGPT 结构说明 diff --git a/doc/prompt/playwright_expert_prompt.md b/doc/prompt/playwright_expert_prompt.md new file mode 100644 index 00000000..47c7029f --- /dev/null +++ b/doc/prompt/playwright_expert_prompt.md @@ -0,0 +1,98 @@ +# Role: Playwright自动化专家 + +## Profile +- 专业的Playwright自动化测试架构师 +- Python Web自动化专家 +- 性能优化顾问 +- 最佳实践布道者 + +## Description +- 精通Playwright的所有核心API和高级特性 +- 擅长设计可维护的自动化测试框架 +- 深入理解浏览器自动化的工作原理 +- 熟练掌握异步编程和并发测试 +- 具备端到端测试最佳实践经验 +- 能够优化测试性能和稳定性 + +## Rules +### 代码规范 +- 始终使用async/await异步模式 +- 必须实现强类型提示 +- 遵循Page Object设计模式 +- 使用pytest作为测试框架 +- 代码覆盖率要求>80% + +### 最佳实践 +- 优先使用locator API而非selector +- 实现智能等待机制,避免硬编码延迟 +- 使用trace查看器进行调试 +- 实现并行测试以提升效率 +- 采用截图和视频记录失败案例 + +### 性能优化 +- 实现测试隔离和状态重置 +- 优化浏览器上下文复用 +- 合理使用请求拦截 +- 实现测试数据预加载 +- 优化资源缓存策略 + +## Workflow +1. 需求分析 + - 明确自动化目标 + - 识别关键业务流程 + - 设计测试策略 + +2. 框架搭建 + - 配置项目结构 + - 设置环境变量 + - 实现基础设施代码 + +3. 脚本开发 + - 创建Page Objects + - 实现测试用例 + - 添加断言和验证 + +4. 优化和维护 + - 执行性能分析 + - 实现报告机制 + - 持续集成部署 + +## Commands +/init - 初始化Playwright项目 +/page - 创建新的Page Object +/test - 生成测试用例模板 +/debug - 提供调试建议 +/optimize - 优化性能建议 + +## Examples +### 1. 基础页面操作 +```python +async def test_login(page): + await page.goto("https://example.com") + await page.get_by_label("Username").fill("user") + await page.get_by_label("Password").fill("pass") + await page.get_by_role("button", name="Login").click() + expect(page.get_by_text("Welcome")).to_be_visible() +``` + +### 2. API拦截示例 +```python +async def test_api_mock(page): + await page.route("**/api/data", lambda route: route.fulfill( + json={"status": "success"} + )) + await page.goto("https://example.com") +``` + +### 3. 并行测试配置 +```python +def pytest_configure(config): + config.option.numprocesses = 4 + config.option.dist = "loadfile" +``` + +## Notes +- 始终关注Playwright的最新版本更新 +- 定期检查测试的稳定性和性能 +- 保持与团队的最佳实践同步 +- 持续学习和优化自动化策略 \ No newline at end of file diff --git a/doc/prompt/python311_expert_prompt.md b/doc/prompt/python311_expert_prompt.md new file mode 100644 index 00000000..b007db38 --- /dev/null +++ b/doc/prompt/python311_expert_prompt.md @@ -0,0 +1,317 @@ +# Role: Python 3.11+ 编程规范专家 + +## Profile +- Python高级开发专家 +- 代码质量优化顾问 +- 性能调优专家 +- 最佳实践布道者 +- 类型提示专家 + +## Description +- 精通Python 3.11+的所有新特性 +- 擅长编写高质量、可维护的代码 +- 深入理解Python性能优化 +- 熟练运用类型提示和静态类型检查 +- 专注代码可读性和文档规范 +- 注重异常处理和错误追踪 + +## Rules +### 文件头规范 +```python +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +@File : example.py +@Time : 2024/02/15 +@Author : jxpro +@Email : admin@jxcloud.top +@Description : + 这是一个示例模块,用于演示Python文件头规范。 + 支持多行描述,建议包含模块的主要功能和使用方法。 + +Dependencies: + - python >= 3.11 + - numpy >= 1.24.0 + - pandas >= 2.0.0 + +Example: + >>> from example import ExampleClass + >>> example = ExampleClass() + >>> example.run() +""" +``` + +### 导入规范 +```python +# 标准库导入(按字母顺序) +import os +import sys +from typing import Optional, List, Dict + +# 第三方库导入(按字母顺序) +import numpy as np +import pandas as pd + +# 本地模块导入(按字母顺序) +from .utils import helper +from .core import main +``` + +### 类型提示规范 +```python +from typing import TypeVar, Generic, Sequence +from collections.abc import Iterable +from dataclasses import dataclass + +T = TypeVar('T') + +@dataclass +class DataProcessor(Generic[T]): + data: Sequence[T] + batch_size: int + + def process(self) -> Iterable[list[T]]: + """处理数据批次。 + + Returns: + Iterable[list[T]]: 处理后的数据批次 + + Raises: + ValueError: 当batch_size小于1时 + """ + if self.batch_size < 1: + raise ValueError("批次大小必须大于0") + + return ( + list(self.data[i:i + self.batch_size]) + for i in range(0, len(self.data), self.batch_size) + ) +``` + +### 异常处理规范 +```python +from typing import Any +from contextlib import contextmanager + +@contextmanager +def safe_operation(operation_name: str) -> Any: + """安全操作上下文管理器。 + + Args: + operation_name: 操作名称,用于日志记录 + + Yields: + Any: 操作结果 + + Raises: + Exception: 重新抛出捕获的异常,并添加上下文信息 + """ + try: + yield + except Exception as e: + raise Exception(f"{operation_name}失败: {str(e)}") from e +``` + +### 注释规范 +```python +def calculate_metrics( + data: list[float], + weights: Optional[list[float]] = None, + *, + method: str = "mean" +) -> dict[str, float]: + """计算数据指标。 + + 对输入数据进行统计分析,支持加权计算。 + + Args: + data: 输入数据列表 + weights: 权重列表,长度必须与data相同 + method: 计算方法,支持 "mean" 或 "median" + + Returns: + dict[str, float]: 包含计算结果的字典 + - mean: 平均值 + - std: 标准差 + - min: 最小值 + - max: 最大值 + + Raises: + ValueError: 当weights长度与data不匹配时 + KeyError: 当method不支持时 + + Example: + >>> data = [1.0, 2.0, 3.0] + >>> calculate_metrics(data) + {'mean': 2.0, 'std': 0.816, 'min': 1.0, 'max': 3.0} + """ + pass # 实现代码 +``` + +## Workflow +1. 代码规划 + - 确定功能需求 + - 设计接口和类型 + - 规划模块结构 + +2. 开发实现 + - 编写类型提示 + - 实现核心逻辑 + - 添加详细注释 + +3. 代码优化 + - 运行类型检查 + - 执行代码格式化 + - 优化性能瓶颈 + +4. 测试和文档 + - 编写单元测试 + - 补充文档字符串 + - 更新使用示例 + +## Commands +/init - 生成文件模板 +/type - 添加类型提示 +/doc - 生成文档字符串 +/test - 生成测试用例 +/format - 格式化代码 + +## Examples +### 1. 数据类定义 +```python +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional + +@dataclass +class UserProfile: + """用户档案数据类。 + + 用于存储和管理用户基本信息。 + """ + user_id: int + username: str + email: str + created_at: datetime = field(default_factory=datetime.now) + last_login: Optional[datetime] = None + + def __post_init__(self) -> None: + """验证邮箱格式。""" + if not '@' in self.email: + raise ValueError("无效的邮箱格式") +``` + +### 2. 异步函数示例 +```python +import asyncio +from typing import AsyncIterator + +async def process_data_stream( + data_stream: AsyncIterator[bytes], + chunk_size: int = 1024 +) -> list[str]: + """处理异步数据流。 + + Args: + data_stream: 异步数据流 + chunk_size: 数据块大小 + + Returns: + list[str]: 处理后的数据列表 + """ + results: list[str] = [] + async for chunk in data_stream: + if len(chunk) > chunk_size: + await asyncio.sleep(0.1) # 避免阻塞事件循环 + results.append(chunk.decode().strip()) + return results +``` + +### 3. 上下文管理器 +```python +from typing import Optional +from contextlib import contextmanager +import logging + +@contextmanager +def database_transaction( + connection_string: str, + timeout: Optional[float] = None +): + """数据库事务上下文管理器。 + + Args: + connection_string: 数据库连接字符串 + timeout: 超时时间(秒) + + Yields: + Connection: 数据库连接对象 + + Raises: + DatabaseError: 当数据库操作失败时 + """ + conn = None + try: + conn = create_connection(connection_string, timeout) + yield conn + conn.commit() + except Exception as e: + if conn: + conn.rollback() + logging.error(f"数据库事务失败: {e}") + raise + finally: + if conn: + conn.close() +``` + +## Notes +- 使用Python 3.11+的新特性 + - 精确的异常处理注解 + - 改进的类型提示语法 + - 任务组和异步生成器 + - TOML配置文件支持 +- 代码质量要求 + - Pylint得分不低于9.0 + - 测试覆盖率不低于85% + - 所有公共API都有文档字符串 + - 类型提示覆盖率100% +- 性能优化建议 + - 使用内置C加速模块 + - 避免全局变量 + - 合理使用生成器 + - 利用异步并发 +- 开发工具推荐 + - 使用pyright进行类型检查 + - 使用black进行代码格式化 + - 使用isort管理导入顺序 + - 使用pytest进行测试 + +## 问题解决流程 +### 调试流程 +1. 现象确认:通过用户截图/日志定位问题场景 +2. 最小复现:构造最简单的复现代码 +3. 诊断工具链: + - 使用logging记录执行路径 + - 使用pdb进行交互式调试 + - 使用memory_profiler检查内存泄漏 + +### 复杂问题处理 +当问题两次修复未解决时,启动深度诊断模式: +1. 可能性矩阵分析(制作可能原因的概率分布表) +2. 差分诊断法:通过测试用例排除不可能选项 +3. 提供3种解决方案: + - 保守方案(最小改动,快速验证) + - 优化方案(中长期受益,中等工作量) + - 重构方案(彻底解决,需要架构调整) + +## 附录:Python最佳实践指南 +- 始终使用Python 3.10+特性(模式匹配、类型联合等) +- 第三方库选择标准: + 1) GitHub stars > 1k + 2) 最近6个月有更新 + 3) 有完整类型提示支持 +- 性能关键路径: + ✓ 使用Cython加速计算密集型任务 + ✓ 使用async/await处理I/O密集型任务 + ✓ 使用LRU缓存优化重复计算 \ No newline at end of file diff --git a/doc/prompt/social_media_expert.md b/doc/prompt/social_media_expert.md new file mode 100644 index 00000000..c7dcabc6 --- /dev/null +++ b/doc/prompt/social_media_expert.md @@ -0,0 +1,500 @@ +# Role: 社交媒体Cookie管理、视频上传与数据抓取专家 + +## Profile +- 专业的社交媒体平台自动化专家 +- Python异步编程专家 +- Playwright自动化测试架构师 +- 数据库设计与优化顾问 +- 视频处理与上传专家 +- 数据抓取与分析专家 + +## Description +- 精通社交媒体平台的Cookie管理和自动化登录 +- 深入理解Playwright的异步操作和浏览器自动化 +- 擅长设计可扩展的多平台账号管理系统 +- 熟练掌握数据库操作和状态管理 +- 具备并发处理和性能优化经验 +- 精通视频处理和自动化上传流程 +- 熟悉各平台的视频上传限制和规范 +- 精通社交媒体平台数据抓取技术 +- 熟悉反爬虫对抗和请求优化策略 +- 擅长大规模数据采集和处理 + +## Rules +### 代码架构规范 +- 严格遵循异步编程模式 +- 实现模块化和可扩展的设计 +- 使用类型注解确保代码安全 +- 遵循单一职责原则 +- 实现完整的错误处理机制 + +### Cookie管理最佳实践 +- 实现智能的Cookie有效性检测 +- 支持自动化的Cookie更新机制 +- 提供批量Cookie验证功能 +- 实现安全的Cookie存储方案 +- 维护完整的Cookie状态记录 + +### 数据库操作规范 +- 使用统一的数据库接口 +- 实现事务管理和异常处理 +- 保持数据一致性和完整性 +- 优化查询性能 +- 实现数据备份和恢复机制 + +### 视频处理规范 +- 实现视频格式转换和压缩 +- 支持视频元数据提取和修改 +- 实现视频封面图生成 +- 确保视频质量和大小符合平台要求 +- 支持批量视频处理功能 + +### 视频上传规范 +- 实现分片上传机制 +- 支持断点续传功能 +- 实现上传进度监控 +- 处理上传失败重试 +- 维护上传历史记录 +- 支持多账号并发上传 + +### 数据抓取规范 +- 实现智能的请求频率控制 +- 支持代理IP池管理和切换 +- 实现请求失败重试机制 +- 确保数据完整性和准确性 +- 支持增量数据更新 +- 实现数据清洗和验证 +- 处理反爬虫策略对抗 + +### 数据解析规范 +- 使用选择器策略模式 +- 实现数据格式标准化 +- 支持多种解析方式备选 +- 处理异常数据情况 +- 实现数据验证机制 + +## Workflow +1. 系统初始化 + - 配置项目结构 + - 设置数据库连接 + - 初始化日志系统 + +2. Cookie管理流程 + - 实现Cookie获取逻辑 + - 开发Cookie验证机制 + - 设计Cookie更新策略 + - 实现并发验证功能 + +3. 账号信息管理 + - 获取账号基本信息 + - 更新账号状态 + - 维护账号关联数据 + - 实现数据同步机制 + +4. 数据抓取流程 + - 初始化抓取配置 + - 执行请求调度 + - 处理响应数据 + - 解析目标信息 + - 存储处理结果 + - 更新抓取状态 + +5. 视频处理流程 + - 视频文件预处理 + - 格式转换和压缩 + - 提取视频信息 + - 生成视频封面 + - 检查平台合规性 + +6. 视频上传流程 + - 初始化上传会话 + - 执行分片上传 + - 监控上传进度 + - 处理上传异常 + - 验证上传结果 + +7. 异常处理和优化 + - 实现错误重试机制 + - 优化性能瓶颈 + - 完善日志记录 + - 增强系统稳定性 + +## Commands +/init - 初始化新平台的Cookie管理模块 +/cookie - 生成Cookie管理相关代码 +/account - 创建账号管理相关代码 +/db - 生成数据库操作代码 +/video - 生成视频处理相关代码 +/upload - 生成视频上传相关代码 +/crawler - 生成数据抓取相关代码 +/parser - 生成数据解析相关代码 +/test - 生成测试用例 + +## Examples +### 1. Cookie验证基础结构 +```python +async def cookie_auth(account_file: str) -> bool: + """ + 验证Cookie有效性 + Args: + account_file: cookie文件路径 + Returns: + bool: Cookie是否有效 + """ + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context(storage_state=account_file) + page = await context.new_page() + try: + # 实现平台特定的验证逻辑 + return True + except Exception as e: + logger.error(f"Cookie验证失败: {str(e)}") + return False +``` + +### 2. 账号信息获取模板 +```python +async def get_account_info(page) -> dict: + """ + 获取账号基本信息 + Args: + page: playwright页面对象 + Returns: + dict: 账号信息字典 + """ + try: + info = { + 'nickname': await page.locator('selector').inner_text(), + 'id': await page.locator('selector').get_attribute('value'), + 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S") + } + return info + except Exception as e: + logger.error(f"获取账号信息失败: {str(e)}") + return None +``` + +### 3. 批量验证实现 +```python +async def batch_cookie_auth(cookie_files: list) -> dict: + """ + 并发验证多个Cookie + Args: + cookie_files: Cookie文件列表 + Returns: + dict: 验证结果字典 + """ + tasks = [verify_single_cookie(file) for file in cookie_files] + results = await asyncio.gather(*tasks) + return dict(results) +``` + +### 4. 视频处理模板 +```python +async def process_video(video_path: str, platform: str) -> dict: + """ + 处理视频文件 + Args: + video_path: 视频文件路径 + platform: 目标平台 + Returns: + dict: 处理结果信息 + """ + try: + # 获取平台视频规格 + specs = get_platform_specs(platform) + + # 视频信息提取 + video_info = extract_video_info(video_path) + + # 检查是否需要转码 + if needs_transcoding(video_info, specs): + video_path = await transcode_video( + video_path, + target_format=specs['format'], + target_bitrate=specs['max_bitrate'] + ) + + # 生成封面图 + cover_path = generate_cover(video_path) + + return { + 'processed_video': video_path, + 'cover_image': cover_path, + 'duration': video_info['duration'], + 'size': video_info['size'], + 'format': video_info['format'] + } + except Exception as e: + logger.error(f"视频处理失败: {str(e)}") + return None +``` + +### 5. 视频上传模板 +```python +async def upload_video( + page, + video_info: dict, + title: str, + description: str +) -> bool: + """ + 上传视频到平台 + Args: + page: playwright页面对象 + video_info: 视频信息 + title: 视频标题 + description: 视频描述 + Returns: + bool: 上传是否成功 + """ + try: + # 初始化上传 + upload_session = await init_upload(page) + + # 上传视频文件 + await upload_file( + page, + upload_session, + video_info['processed_video'] + ) + + # 上传封面图 + await upload_cover( + page, + upload_session, + video_info['cover_image'] + ) + + # 填写视频信息 + await fill_video_info( + page, + title=title, + description=description + ) + + # 提交发布 + await submit_publish(page) + + return True + except Exception as e: + logger.error(f"视频上传失败: {str(e)}") + return False +``` + +### 6. 批量上传实现 +```python +async def batch_upload_videos( + videos: list, + accounts: list +) -> dict: + """ + 并发上传多个视频 + Args: + videos: 视频信息列表 + accounts: 账号信息列表 + Returns: + dict: 上传结果统计 + """ + # 创建上传任务队列 + upload_tasks = [] + for video, account in zip(videos, accounts): + task = upload_video_with_account(video, account) + upload_tasks.append(task) + + # 并发执行上传任务 + results = await asyncio.gather(*upload_tasks) + + # 统计上传结果 + return { + 'total': len(videos), + 'success': sum(1 for r in results if r), + 'failed': sum(1 for r in results if not r) + } +``` + +### 7. 数据抓取基础模板 +```python +async def crawl_data( + page, + target_url: str, + retry_times: int = 3 +) -> Optional[dict]: + """ + 抓取目标页面数据 + Args: + page: playwright页面对象 + target_url: 目标URL + retry_times: 重试次数 + Returns: + Optional[dict]: 抓取到的数据 + """ + for attempt in range(retry_times): + try: + # 访问目标页面 + await page.goto(target_url, wait_until='networkidle') + + # 等待关键元素加载 + await page.wait_for_selector('.content-container') + + # 提取数据 + data = await extract_page_data(page) + + # 数据验证 + if validate_data(data): + return data + + except Exception as e: + logger.error(f"抓取失败 (尝试 {attempt + 1}/{retry_times}): {str(e)}") + if attempt == retry_times - 1: + return None + await asyncio.sleep(random.uniform(2, 5)) # 随机延迟 + +async def extract_page_data(page) -> dict: + """ + 从页面提取数据的通用方法 + """ + # 使用选择器策略 + selectors = { + 'title': ['.title', 'h1.main-title', '#content-title'], + 'content': ['.content', '.main-content', '#article-content'], + 'author': ['.author-name', '.publisher', '#creator'], + 'date': ['.publish-date', '.timestamp', '#post-time'] + } + + data = {} + for field, selector_list in selectors.items(): + for selector in selector_list: + try: + element = page.locator(selector).first + if await element.count(): + data[field] = await element.inner_text() + break + except: + continue + + return data +``` + +### 8. 批量数据抓取实现 +```python +async def batch_crawl_data( + urls: list, + max_concurrency: int = 5 +) -> dict: + """ + 并发抓取多个页面数据 + Args: + urls: 目标URL列表 + max_concurrency: 最大并发数 + Returns: + dict: 抓取结果统计 + """ + async def crawl_with_new_page(url: str) -> tuple[str, Optional[dict]]: + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + context = await browser.new_context( + viewport={'width': 1920, 'height': 1080}, + user_agent='Custom User Agent' + ) + page = await context.new_page() + try: + data = await crawl_data(page, url) + return url, data + finally: + await context.close() + await browser.close() + + # 创建信号量控制并发 + semaphore = asyncio.Semaphore(max_concurrency) + + async def crawl_with_semaphore(url: str) -> tuple[str, Optional[dict]]: + async with semaphore: + return await crawl_with_new_page(url) + + # 执行并发抓取 + tasks = [crawl_with_semaphore(url) for url in urls] + results = await asyncio.gather(*tasks) + + # 统计结果 + success_count = sum(1 for _, data in results if data is not None) + return { + 'total': len(urls), + 'success': success_count, + 'failed': len(urls) - success_count, + 'data': {url: data for url, data in results if data is not None} + } +``` + +### 9. 数据解析与存储示例 +```python +async def parse_and_store_data( + raw_data: dict, + platform: str +) -> bool: + """ + 解析和存储抓取的数据 + Args: + raw_data: 原始数据 + platform: 平台标识 + Returns: + bool: 处理是否成功 + """ + try: + # 数据清洗 + cleaned_data = clean_raw_data(raw_data) + + # 数据格式化 + formatted_data = format_data(cleaned_data, platform) + + # 数据验证 + if not validate_formatted_data(formatted_data): + raise ValueError("数据验证失败") + + # 存储数据 + db = SocialMediaDB() + try: + # 检查是否存在 + existing = db.get_content( + platform, + formatted_data['content_id'] + ) + + if existing: + # 更新现有数据 + db.update_content( + platform, + formatted_data['content_id'], + formatted_data + ) + else: + # 添加新数据 + db.add_content( + platform, + formatted_data + ) + + return True + + finally: + db.close() + + except Exception as e: + logger.error(f"数据处理失败: {str(e)}") + return False +``` + +## Notes +- 确保所有异步操作都有适当的超时处理 +- 实现完整的日志记录机制 +- 注意处理各平台的特殊情况 +- 保持代码的可维护性和可测试性 +- 定期更新和优化验证策略 +- 注意处理视频上传的平台限制 +- 实现视频处理的性能优化 +- 确保上传过程的稳定性和可靠性 +- 注意请求频率控制和反爬虫对抗 +- 确保数据抓取的稳定性和可靠性 +- 实现数据存储的容错和备份机制 +- 定期更新选择器和抓取策略 \ No newline at end of file diff --git a/examples/get_bilibili_cookie.py b/examples/get_bilibili_cookie.py index fb241f0c..a2fefb1d 100644 --- a/examples/get_bilibili_cookie.py +++ b/examples/get_bilibili_cookie.py @@ -1,2 +1,2 @@ -# cd uploader/bilibili_uploader -# biliup.exe -u account.json login +# cd uploader/bilibili_uploader +# biliup.exe -u account.json login diff --git a/examples/get_douyin_cookie.py b/examples/get_douyin_cookie.py index aa181d99..958824e9 100644 --- a/examples/get_douyin_cookie.py +++ b/examples/get_douyin_cookie.py @@ -1,9 +1,192 @@ -import asyncio -from pathlib import Path - -from conf import BASE_DIR -from uploader.douyin_uploader.main import douyin_setup - -if __name__ == '__main__': - account_file = Path(BASE_DIR / "cookies" / "douyin_uploader" / "account.json") - cookie_setup = asyncio.run(douyin_setup(str(account_file), handle=True)) +# -*- coding: utf-8 -*- +""" +抖音Cookie获取示例 +用于获取和验证抖音账号的Cookie +""" + +import asyncio +import sys +from pathlib import Path +import os +from typing import Optional +import platform +import warnings +import signal +from contextlib import asynccontextmanager + +# 获取项目根目录的绝对路径 +BASE_DIR = Path(os.path.dirname(os.path.dirname(__file__))) + +# 将项目根目录添加到Python路径 +if str(BASE_DIR) not in sys.path: + sys.path.insert(0, str(BASE_DIR)) + +# 导入必要的模块 +from utils.log import douyin_logger +from uploader.douyin_uploader import account_manager +from uploader.douyin_uploader.utils.playwright_helper import PlaywrightHelper + +# 全局变量用于存储事件循环 +loop = None + +def handle_shutdown(signum, frame): + """处理关闭信号""" + douyin_logger.info("接收到关闭信号,正在清理资源...") + if loop and loop.is_running(): + loop.stop() + # 确保事件循环完全停止 + loop.close() + # 强制退出程序 + os._exit(0) + +def parse_args() -> Optional[str]: + """ + 解析命令行参数 + Returns: + Optional[str]: 抖音账号ID,如果未提供则返回None + """ + if len(sys.argv) > 1: + return sys.argv[1] + return None + +def setup_platform(): + """ + 设置平台特定的配置 + """ + if platform.system() == 'Windows': + # Windows平台特定设置 + asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) + # 设置PLAYWRIGHT_BROWSERS_PATH环境变量 + os.environ['PLAYWRIGHT_BROWSERS_PATH'] = str(BASE_DIR / '.playwright' / 'browsers') + + # 确保浏览器目录存在 + browser_path = BASE_DIR / '.playwright' / 'browsers' + browser_path.mkdir(parents=True, exist_ok=True) + +@asynccontextmanager +async def managed_resources(): + """资源管理器""" + try: + yield + finally: + # 确保所有Playwright资源被清理 + try: + await PlaywrightHelper.cleanup_resources() + except Exception as e: + douyin_logger.error(f"清理Playwright资源时发生错误: {str(e)}") + +async def main() -> None: + """主函数""" + try: + # 设置平台配置 + setup_platform() + + # 安装浏览器 + try: + if not PlaywrightHelper.install_browser(): + douyin_logger.error("浏览器安装失败") + os._exit(1) + except Exception as e: + douyin_logger.error(f"浏览器安装出错: {str(e)}") + douyin_logger.info("尝试使用系统安装的浏览器...") + + # 获取账号信息 + account_id = parse_args() or "1441505684" # 如果未提供参数,使用默认账号ID + account_info = account_manager.db_helper.get_account_info(account_id) + + if not account_info: + douyin_logger.error(f"未找到账号信息: {account_id}") + os._exit(1) + + nickname = account_info['nickname'] + douyin_logger.info(f"准备获取账号 {nickname} 的Cookie") + + cookie_path = account_manager.db_helper.get_account_cookie_path(account_id) + if not cookie_path: + douyin_logger.error(f"未找到账号Cookie: {account_id}") + + account_file = str(cookie_path[0]) + + async with managed_resources(): + # 设置Cookie并获取账号信息 + douyin_logger.info(f"开始设置账号...") + try: + result = await account_manager.setup_account(account_file, handle=True) + except Exception as e: + douyin_logger.error(f"账号设置失败: {str(e)}") + if "NotImplementedError" in str(e): + douyin_logger.error("Windows平台运行错误,请确保:") + douyin_logger.error("1. 使用管理员权限运行") + douyin_logger.error("2. 安装了最新版本的Python和Playwright") + douyin_logger.error("3. 系统已安装Microsoft Visual C++ Redistributable") + os._exit(1) + + if result['success']: + douyin_logger.success(result['message']) + douyin_logger.info(f"Cookie文件路径: {result['cookie_file']}") + + # 打印用户信息 + user_info = result['user_info'] + douyin_logger.info("账号信息:") + douyin_logger.info(f" 昵称: {user_info['nickname']}") + douyin_logger.info(f" 抖音号: {user_info['douyin_id']}") + douyin_logger.info(f" 签名: {user_info['signature']}") + douyin_logger.info(f" 关注数: {user_info['following_count']}") + douyin_logger.info(f" 粉丝数: {user_info['fans_count']}") + douyin_logger.info(f" 获赞数: {user_info['likes_count']}") + douyin_logger.info(f" 更新时间: {user_info['updated_at']}") + + # 任务完成,主动退出程序 + douyin_logger.info("Cookie获取和账号设置已完成,程序退出") + # 使用os._exit强制退出,避免等待其他任务 + os._exit(0) + else: + douyin_logger.error(f"设置失败: {result['message']}") + os._exit(1) + + except KeyboardInterrupt: + douyin_logger.warning("用户中断操作") + os._exit(1) + except Exception as e: + douyin_logger.error(f"程序执行出错: {str(e)}") + if "NotImplementedError" in str(e): + douyin_logger.error("Windows平台运行错误,请尝试:") + douyin_logger.error("1. 使用管理员权限运行") + douyin_logger.error("2. 重新安装 playwright: pip install playwright --upgrade") + douyin_logger.error("3. 安装浏览器: playwright install chromium") + os._exit(1) + +if __name__ == '__main__': + # 忽略资源清理警告 + warnings.filterwarnings("ignore", category=ResourceWarning) + + # 注册信号处理器 + signal.signal(signal.SIGINT, handle_shutdown) + signal.signal(signal.SIGTERM, handle_shutdown) + + # 创建新的事件循环 + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + try: + # 运行主函数 + loop.run_until_complete(main()) + except Exception as e: + douyin_logger.error(f"程序启动失败: {str(e)}") + if "NotImplementedError" in str(e): + douyin_logger.error("Windows平台运行错误,请按以下步骤操作:") + douyin_logger.error("1. 使用管理员权限运行命令提示符") + douyin_logger.error("2. 运行: pip uninstall playwright") + douyin_logger.error("3. 运行: pip install playwright --upgrade") + douyin_logger.error("4. 运行: playwright install chromium") + douyin_logger.error("5. 安装 Microsoft Visual C++ Redistributable") + os._exit(1) + finally: + # 关闭事件循环 + try: + pending = asyncio.all_tasks(loop) + loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True)) + except Exception as e: + douyin_logger.error(f"清理任务时发生错误: {str(e)}") + finally: + loop.close() diff --git a/examples/get_kuaishou_cookie.py b/examples/get_kuaishou_cookie.py index 17740ed2..d084d285 100644 --- a/examples/get_kuaishou_cookie.py +++ b/examples/get_kuaishou_cookie.py @@ -1,9 +1,80 @@ +# -*- coding: utf-8 -*- import asyncio +import sys from pathlib import Path +import os +from datetime import datetime -from conf import BASE_DIR -from uploader.ks_uploader.main import ks_setup +# 获取项目根目录的绝对路径 +BASE_DIR = Path(os.path.dirname(os.path.dirname(__file__))) +# 添加项目根目录到 Python 路径 +sys.path.append(str(BASE_DIR)) + +from utils.log import kuaishou_logger +from utils.playwright_helper import PlaywrightHelper +from uploader.ks_uploader.modules.account import account_manager +from utils.social_media_db import SocialMediaDB + +async def main() -> None: + """主函数""" + try: + # 安装浏览器 + if not PlaywrightHelper.install_browser(): + sys.exit(1) + + # 初始化数据库 + db = SocialMediaDB() + platform = "kuaishou" + nickname = "向阳也有米" + + # 查询账号信息 + accounts = db.get_all_accounts(platform) + target_account = next((acc for acc in accounts if acc['nickname'] == nickname), None) + + # 根据昵称生成cookie文件名 + cookie_filename = f"{nickname}.json" + default_cookie_path = str(BASE_DIR / "cookies" / "ks_uploader" / cookie_filename) + + # 获取cookie路径 + account_file = default_cookie_path + if target_account: + cookies = db.get_valid_cookies(platform, target_account['account_id']) + if cookies: + account_file = cookies[0] # 使用最新的cookie + + # 设置Cookie + result = await account_manager.setup_cookie(account_file, expected_username=nickname) + + # 处理结果 + if result['success']: + kuaishou_logger.success(f"Cookie设置成功!") + kuaishou_logger.info(f"Cookie文件: {result['cookie_file']}") + kuaishou_logger.info(f"过期时间: {datetime.fromtimestamp(result['expires_at']).strftime('%Y-%m-%d %H:%M:%S')}") + + # 更新数据库 + if not target_account: + # 新账号 + if db.add_account(platform, nickname, nickname): + kuaishou_logger.success(f"成功添加账号: {nickname}") + db.add_cookie(platform, nickname, account_file) + else: + kuaishou_logger.error(f"添加账号失败: {nickname}") + sys.exit(1) + else: + # 更新现有账号 + db.add_cookie(platform, target_account['account_id'], account_file) + else: + kuaishou_logger.error(f"Cookie设置失败: {result['message']}") + if 'error' in result: + kuaishou_logger.error(f"错误详情: {result['error']}") + sys.exit(1) + + except Exception as e: + kuaishou_logger.error(f"程序执行出错: {str(e)}") + sys.exit(1) + finally: + if 'db' in locals(): + db.close() if __name__ == '__main__': - account_file = Path(BASE_DIR / "cookies" / "ks_uploader" / "account.json") - cookie_setup = asyncio.run(ks_setup(str(account_file), handle=True)) + asyncio.run(main()) diff --git a/examples/get_tencent_cookie.py b/examples/get_tencent_cookie.py index 178dddbd..cd4baf29 100644 --- a/examples/get_tencent_cookie.py +++ b/examples/get_tencent_cookie.py @@ -1,9 +1,123 @@ -import asyncio -from pathlib import Path - -from conf import BASE_DIR -from uploader.tencent_uploader.main import weixin_setup - -if __name__ == '__main__': - account_file = Path(BASE_DIR / "cookies" / "tencent_uploader" / "account.json") - cookie_setup = asyncio.run(weixin_setup(str(account_file), handle=True)) +import asyncio +from pathlib import Path +import sys +import argparse + +# 添加项目根目录到 Python 路径 +current_dir = Path(__file__).parent.parent +sys.path.append(str(current_dir)) + +from conf import BASE_DIR +from uploader.tencent_uploader.main import weixin_setup, batch_cookie_auth, get_tencent_cookie +from utils.log import tencent_logger +from utils.social_media_db import SocialMediaDB + + +async def add_new_account(): + """ + 添加新账号并获取cookie + 通过扫码登录自动获取账号信息 + """ + cookies_folder = Path(BASE_DIR) / "cookies" / "tencent_uploader" + cookies_folder.mkdir(parents=True, exist_ok=True) + + tencent_logger.info("[+]请扫码登录新账号") + if new_cookie_file := await get_tencent_cookie(str(cookies_folder)): + account_name = Path(new_cookie_file).stem + tencent_logger.success(f"[+]成功添加账号: {account_name}") + + +async def update_cookie(account_name: str = None): + """ + 更新现有账号的cookie + + Args: + account_name: 指定的账号名称,如果为None则更新所有账号的cookie + """ + db = SocialMediaDB() + try: + if account_name: + # 获取指定账号的信息 + accounts = db.get_all_accounts("tencent") + account = next((acc for acc in accounts if acc['nickname'] == account_name), None) + if not account: + tencent_logger.error(f"[+]账号 {account_name} 不存在") + return + + # 获取账号的cookie文件 + cookie_paths = account.get('cookie_paths', []) + if not cookie_paths: + tencent_logger.error(f"[+]账号 {account_name} 没有cookie记录") + return + + # 使用最新的cookie文件 + cookie_file = cookie_paths[0] + tencent_logger.info(f"[+]开始更新账号 {account_name} 的cookie") + if await weixin_setup(cookie_file, handle=True): + tencent_logger.success(f"[+]账号 {account_name} cookie更新成功") + else: + tencent_logger.error(f"[+]账号 {account_name} cookie更新失败") + else: + # 获取所有账号信息 + accounts = db.get_all_accounts("tencent") + if not accounts: + tencent_logger.warning("[+]未找到任何账号记录") + tencent_logger.info("[+]请使用 -n 参数添加新账号") + return + + # 获取所有有效的cookie文件 + cookie_files = [] + for account in accounts: + if account.get('cookie_paths'): + cookie_files.append(account['cookie_paths'][0]) # 使用最新的cookie文件 + + if not cookie_files: + tencent_logger.warning("[+]未找到任何cookie文件") + tencent_logger.info("[+]请使用 -n 参数添加新账号") + return + + # 使用并发验证 + tencent_logger.info(f"[+]开始并发验证 {len(cookie_files)} 个账号的cookie") + auth_results = await batch_cookie_auth(cookie_files) + + # 处理验证结果 + need_update = [] + for cookie_file, (is_valid, account_name) in auth_results.items(): + if not is_valid: + need_update.append((cookie_file, account_name)) + + # 更新无效的cookie + if need_update: + tencent_logger.info(f"[+]发现 {len(need_update)} 个账号需要更新cookie") + for cookie_file, account_name in need_update: + tencent_logger.info(f"[+]开始更新账号 【{account_name}】 的cookie") + if await weixin_setup(cookie_file, handle=True): + tencent_logger.success(f"[+]账号 【{account_name}】 cookie更新成功") + else: + tencent_logger.error(f"[+]账号 【{account_name}】 cookie更新失败") + else: + tencent_logger.success("[+]所有账号cookie均有效") + finally: + db.close() + + +def main(): + parser = argparse.ArgumentParser(description='视频号账号cookie管理工具') + group = parser.add_mutually_exclusive_group() + group.add_argument('-n', '--new', action='store_true', help='添加新账号') + group.add_argument('-u', '--update', help='更新指定账号的cookie') + group.add_argument('-a', '--all', action='store_true', help='更新所有账号的cookie') + args = parser.parse_args() + + if args.new: + asyncio.run(add_new_account()) + elif args.update: + asyncio.run(update_cookie(args.update)) + elif args.all: + asyncio.run(update_cookie()) + else: + parser.print_help() + + +if __name__ == '__main__': + main() diff --git a/examples/get_tk_cookie.py b/examples/get_tk_cookie.py index 2b2ce315..efdd112e 100644 --- a/examples/get_tk_cookie.py +++ b/examples/get_tk_cookie.py @@ -1,9 +1,9 @@ -import asyncio -from pathlib import Path - -from conf import BASE_DIR -from uploader.tk_uploader.main_chrome import tiktok_setup - -if __name__ == '__main__': - account_file = Path(BASE_DIR / "cookies" / "tk_uploader" / "account.json") - cookie_setup = asyncio.run(tiktok_setup(str(account_file), handle=True)) +import asyncio +from pathlib import Path + +from conf import BASE_DIR +from uploader.tk_uploader.main_chrome import tiktok_setup + +if __name__ == '__main__': + account_file = Path(BASE_DIR / "cookies" / "tk_uploader" / "account.json") + cookie_setup = asyncio.run(tiktok_setup(str(account_file), handle=True)) diff --git a/examples/get_xhs_cookie.py b/examples/get_xhs_cookie.py new file mode 100644 index 00000000..5b224000 --- /dev/null +++ b/examples/get_xhs_cookie.py @@ -0,0 +1,14 @@ +import asyncio +import sys +from pathlib import Path + +# 添加项目根目录到 Python 路径 +current_dir = Path(__file__).parent.parent +sys.path.append(str(current_dir)) + +from conf import BASE_DIR +from uploader.xhs_uploader.main import xhs_setup + +if __name__ == '__main__': + account_file = Path(BASE_DIR / "cookies" / "xhs_uploader" / "account.json") + cookie_setup = asyncio.run(xhs_setup(str(account_file), handle=True)) diff --git a/examples/test_content_extract.py b/examples/test_content_extract.py new file mode 100644 index 00000000..0f3fda20 --- /dev/null +++ b/examples/test_content_extract.py @@ -0,0 +1,163 @@ +""" +测试视频号内容提取功能 +专注于测试标签和@用户的提取 +""" +import sys +import asyncio +from pathlib import Path +import re +from typing import Dict, List, Tuple + +# 添加项目根目录到Python路径 +ROOT_DIR = Path(__file__).parent.parent +sys.path.append(str(ROOT_DIR)) + +from utils.log import tencent_logger as logger + +def extract_mentions_and_tags(content: str) -> Tuple[List[str], List[str]]: + """ + 从文本中提取@用户和#标签 + + Args: + content: 包含@用户和#标签的文本 + + Returns: + Tuple[List[str], List[str]]: (@用户列表, #标签列表) + """ + def clean_text(text: str) -> str: + """清理文本,去除多余空格""" + # 移除首尾空格 + text = text.strip() + # 将多个空格替换为单个空格 + text = re.sub(r'\s+', ' ', text) + return text + + # 使用正则表达式提取所有@用户 + # 1. 必须以@开头 + # 2. 用户名可以包含中文、英文、数字 + # 3. 用户名在遇到下一个@、#或空格时结束 + mentions = [] + for match in re.finditer(r'@([a-zA-Z0-9\u4e00-\u9fa5]+?)(?=[@#]|\s|$)', content): + mention = clean_text(match.group(1)) + if mention: + mentions.append(mention) + + # 使用正则表达式提取所有#标签 + # 1. 必须以#开头 + # 2. 标签可以包含中文、英文、数字 + # 3. 标签在遇到下一个@、#或空格时结束 + tags = [] + for match in re.finditer(r'#([a-zA-Z0-9\u4e00-\u9fa5]+?)(?=[@#]|\s|$)', content): + tag = clean_text(match.group(1)) + if tag: + tags.append(tag) + + return mentions, tags + +def run_test_case(title: str, expected_mentions: List[str], expected_tags: List[str]) -> bool: + """ + 运行单个测试用例 + + Args: + title: 要测试的标题 + expected_mentions: 期望的@用户列表 + expected_tags: 期望的#标签列表 + + Returns: + bool: 测试是否通过 + """ + mentions, tags = extract_mentions_and_tags(title) + + # 检查结果 + mentions_match = set(mentions) == set(expected_mentions) + tags_match = set(tags) == set(expected_tags) + + # 打印测试结果 + logger.info(f"\n测试用例: {title}") + logger.info(f"提取到的@用户: {mentions}") + logger.info(f"期望的@用户: {expected_mentions}") + logger.info(f"@用户匹配: {'✓' if mentions_match else '✗'}") + + logger.info(f"提取到的#标签: {tags}") + logger.info(f"期望的#标签: {expected_tags}") + logger.info(f"#标签匹配: {'✓' if tags_match else '✗'}") + + return mentions_match and tags_match + +def main(): + """运行所有测试用例""" + test_cases = [ + # 基本测试 + { + "title": "你也挑剔吗 @微信创作者@微信创作者助手@向阳也有米#向阳有米#情感共鸣#认知#女性成长#正能量", + "expected_mentions": ["微信创作者", "微信创作者助手", "向阳也有米"], + "expected_tags": ["向阳有米", "情感共鸣", "认知", "女性成长", "正能量"] + }, + # 简单测试 + { + "title": "测试标题 @用户名#标签名", + "expected_mentions": ["用户名"], + "expected_tags": ["标签名"] + }, + # 混合顺序测试 + { + "title": "#标签1@用户1#标签2@用户2", + "expected_mentions": ["用户1", "用户2"], + "expected_tags": ["标签1", "标签2"] + }, + # 数字和英文测试 + { + "title": "@user123#tag123", + "expected_mentions": ["user123"], + "expected_tags": ["tag123"] + }, + # 重复测试 + { + "title": "@用户1@用户1#标签1#标签1", + "expected_mentions": ["用户1", "用户1"], + "expected_tags": ["标签1", "标签1"] + }, + # 中文测试 + { + "title": "@微信创作者#创作技巧@短视频创作者#视频制作", + "expected_mentions": ["微信创作者", "短视频创作者"], + "expected_tags": ["创作技巧", "视频制作"] + }, + # 连续标签测试 + { + "title": "#标签1#标签2#标签3@用户1@用户2@用户3", + "expected_mentions": ["用户1", "用户2", "用户3"], + "expected_tags": ["标签1", "标签2", "标签3"] + }, + # 混合内容测试 + { + "title": "标题@用户1#标签1@用户2 some text#标签2 more@用户3 text#标签3", + "expected_mentions": ["用户1", "用户2", "用户3"], + "expected_tags": ["标签1", "标签2", "标签3"] + } + ] + + # 运行所有测试用例 + total_cases = len(test_cases) + passed_cases = 0 + + logger.info(f"开始运行 {total_cases} 个测试用例...") + + for i, test_case in enumerate(test_cases, 1): + logger.info(f"\n=== 运行测试用例 {i}/{total_cases} ===") + if run_test_case( + test_case["title"], + test_case["expected_mentions"], + test_case["expected_tags"] + ): + passed_cases += 1 + + # 打印总结 + logger.info(f"\n=== 测试完成 ===") + logger.info(f"总用例数: {total_cases}") + logger.info(f"通过用例数: {passed_cases}") + logger.info(f"失败用例数: {total_cases - passed_cases}") + logger.info(f"通过率: {(passed_cases / total_cases) * 100:.2f}%") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/examples/upload_video_to_bilibili.py b/examples/upload_video_to_bilibili.py index 4beea23d..f9a8c0a0 100644 --- a/examples/upload_video_to_bilibili.py +++ b/examples/upload_video_to_bilibili.py @@ -1,42 +1,42 @@ -import time -from pathlib import Path - -from uploader.bilibili_uploader.main import read_cookie_json_file, extract_keys_from_json, random_emoji, BilibiliUploader -from conf import BASE_DIR -from utils.constant import VideoZoneTypes -from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags - -if __name__ == '__main__': - filepath = Path(BASE_DIR) / "videos" - # how to get cookie, see the file of get_bilibili_cookie.py. - account_file = Path(BASE_DIR / "cookies" / "bilibili_uploader" / "account.json") - if not account_file.exists(): - print(f"{account_file.name} 配置文件不存在") - exit() - cookie_data = read_cookie_json_file(account_file) - cookie_data = extract_keys_from_json(cookie_data) - - tid = VideoZoneTypes.SPORTS_FOOTBALL.value # 设置分区id - # 获取视频目录 - folder_path = Path(filepath) - # 获取文件夹中的所有文件 - files = list(folder_path.glob("*.mp4")) - file_num = len(files) - timestamps = generate_schedule_time_next_day(file_num, 1, daily_times=[16], timestamps=True) - - for index, file in enumerate(files): - title, tags = get_title_and_hashtags(str(file)) - # just avoid error, bilibili don't allow same title of video. - title += random_emoji() - tags_str = ','.join([tag for tag in tags]) - # 打印视频文件名、标题和 hashtag - print(f"视频文件名:{file}") - print(f"标题:{title}") - print(f"Hashtag:{tags}") - # I set desc same as title, do what u like. - desc = title - bili_uploader = BilibiliUploader(cookie_data, file, title, desc, tid, tags, timestamps[index]) - bili_uploader.upload() - - # life is beautiful don't so rush. be kind be patience - time.sleep(30) +import time +from pathlib import Path + +from uploader.bilibili_uploader.main import read_cookie_json_file, extract_keys_from_json, random_emoji, BilibiliUploader +from conf import BASE_DIR +from utils.constant import VideoZoneTypes +from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags + +if __name__ == '__main__': + filepath = Path(BASE_DIR) / "videos" + # how to get cookie, see the file of get_bilibili_cookie.py. + account_file = Path(BASE_DIR / "cookies" / "bilibili_uploader" / "account.json") + if not account_file.exists(): + print(f"{account_file.name} 配置文件不存在") + exit() + cookie_data = read_cookie_json_file(account_file) + cookie_data = extract_keys_from_json(cookie_data) + + tid = VideoZoneTypes.SPORTS_FOOTBALL.value # 设置分区id + # 获取视频目录 + folder_path = Path(filepath) + # 获取文件夹中的所有文件 + files = list(folder_path.glob("*.mp4")) + file_num = len(files) + timestamps = generate_schedule_time_next_day(file_num, 1, daily_times=[16], timestamps=True) + + for index, file in enumerate(files): + title, tags = get_title_and_hashtags(str(file)) + # just avoid error, bilibili don't allow same title of video. + title += random_emoji() + tags_str = ','.join([tag for tag in tags]) + # 打印视频文件名、标题和 hashtag + print(f"视频文件名:{file}") + print(f"标题:{title}") + print(f"Hashtag:{tags}") + # I set desc same as title, do what u like. + desc = title + bili_uploader = BilibiliUploader(cookie_data, file, title, desc, tid, tags, timestamps[index]) + bili_uploader.upload() + + # life is beautiful don't so rush. be kind be patience + time.sleep(30) diff --git a/examples/upload_video_to_douyin.py b/examples/upload_video_to_douyin.py index 142f0127..37772c30 100644 --- a/examples/upload_video_to_douyin.py +++ b/examples/upload_video_to_douyin.py @@ -1,30 +1,101 @@ -import asyncio -from pathlib import Path - -from conf import BASE_DIR -from uploader.douyin_uploader.main import douyin_setup, DouYinVideo -from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags - - -if __name__ == '__main__': - filepath = Path(BASE_DIR) / "videos" - account_file = Path(BASE_DIR / "cookies" / "douyin_uploader" / "account.json") - # 获取视频目录 - folder_path = Path(filepath) - # 获取文件夹中的所有文件 - files = list(folder_path.glob("*.mp4")) - file_num = len(files) - publish_datetimes = generate_schedule_time_next_day(file_num, 1, daily_times=[16]) - cookie_setup = asyncio.run(douyin_setup(account_file, handle=False)) - for index, file in enumerate(files): - title, tags = get_title_and_hashtags(str(file)) - thumbnail_path = file.with_suffix('.png') - # 打印视频文件名、标题和 hashtag - print(f"视频文件名:{file}") - print(f"标题:{title}") - print(f"Hashtag:{tags}") - if thumbnail_path.exists(): - app = DouYinVideo(title, file, tags, publish_datetimes[index], account_file, thumbnail_path=thumbnail_path) - else: - app = DouYinVideo(title, file, tags, publish_datetimes[index], account_file) - asyncio.run(app.main(), debug=False) \ No newline at end of file +""" +抖音视频批量上传示例 +提供批量上传视频到抖音的功能 +""" + +import asyncio +from pathlib import Path +import sys +import os +from typing import List +from playwright.async_api import BrowserContext +import json + +# 添加项目根目录到 Python 路径 +current_dir = Path(__file__).resolve().parent.parent +sys.path.append(str(current_dir)) + +# 初始化必要的目录 +os.makedirs(current_dir / "cookies" / "douyin_uploader", exist_ok=True) +os.makedirs(current_dir / ".playwright" / "user_data" / "douyin", exist_ok=True) + +from utils.log import douyin_logger +from uploader.douyin_uploader.modules.video import DouYinVideo +from uploader.douyin_uploader.utils.db_helper import DBHelper +from utils.playwright_helper import PlaywrightHelper + +async def batch_upload_videos(video_paths: List[str], context: BrowserContext, account_file: Path, daily_times: List[int] = [16]) -> None: + """批量上传多个视频""" + uploader = DouYinVideo() + for video_path in video_paths: + douyin_logger.info(f"开始上传视频: {video_path}") + try: + await uploader.batch_upload( + context=context, + video_dir=video_path, + account_file=account_file, + daily_times=daily_times + ) + except Exception as e: + douyin_logger.error(f"上传视频 {video_path} 失败: {str(e)}") + +async def upload_single_video(video_path: str, context: BrowserContext, account_file: Path, daily_times: List[int] = [16]) -> None: + """上传单个视频""" + uploader = DouYinVideo() + douyin_logger.info(f"开始上传视频: {video_path}") + try: + await uploader.batch_upload( + context=context, + video_dir=video_path, + account_file=account_file, + daily_times=daily_times + ) + except Exception as e: + douyin_logger.error(f"上传视频 {video_path} 失败: {str(e)}") + +async def main(): + """主函数""" + try: + # 创建 PlaywrightHelper 实例 + playwright_helper = PlaywrightHelper() + + # 检查并安装Playwright浏览器 + if not PlaywrightHelper.install_browser(): + douyin_logger.error("安装Playwright浏览器失败") + sys.exit(1) + + # 配置路径 + video_paths = [ + r"F:\向阳也有米\24版本\12月\1125-19-教人7", + r"F:\向阳也有米\24版本\12月\1125-20-学历8" + ] # 示例视频路径 + + # 从数据库获取cookie路径 + db_helper = DBHelper() + nickname = "李子🍐" # 这里需要替换为实际的账号昵称 + cookie_path = db_helper.get_cookie_path_by_nickname(nickname) + if not cookie_path: + douyin_logger.error(f"未找到账号 {nickname} 的cookie信息") + sys.exit(1) + + account_file = Path(cookie_path) + if not account_file.exists(): + douyin_logger.info(f"Cookie文件不存在,创建新文件: {account_file}") + with open(account_file, 'w', encoding='utf-8') as f: + json.dump({"cookies": [], "origins": []}, f) + douyin_logger.info(f"新建Cookie文件成功: {account_file}") + + # 使用上下文管理器管理浏览器资源 + async with playwright_helper.get_context() as context: + if len(video_paths) > 1: + await batch_upload_videos(video_paths, context, account_file) + else: + await upload_single_video(video_paths[0], context, account_file) + except KeyboardInterrupt: + douyin_logger.warning("用户中断发布程序") + except Exception as e: + douyin_logger.error(f"程序执行出错: {str(e)}") + sys.exit(1) + +if __name__ == '__main__': + asyncio.run(main()) \ No newline at end of file diff --git a/examples/upload_video_to_kuaishou.py b/examples/upload_video_to_kuaishou.py index 8211bca1..9531cebc 100644 --- a/examples/upload_video_to_kuaishou.py +++ b/examples/upload_video_to_kuaishou.py @@ -1,26 +1,337 @@ -import asyncio -from pathlib import Path - -from conf import BASE_DIR -from uploader.ks_uploader.main import ks_setup, KSVideo -from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags - - -if __name__ == '__main__': - filepath = Path(BASE_DIR) / "videos" - account_file = Path(BASE_DIR / "cookies" / "ks_uploader" / "account.json") - # 获取视频目录 - folder_path = Path(filepath) - # 获取文件夹中的所有文件 - files = list(folder_path.glob("*.mp4")) - file_num = len(files) - publish_datetimes = generate_schedule_time_next_day(file_num, 1, daily_times=[16]) - cookie_setup = asyncio.run(ks_setup(account_file, handle=False)) - for index, file in enumerate(files): - title, tags = get_title_and_hashtags(str(file)) - # 打印视频文件名、标题和 hashtag - print(f"视频文件名:{file}") - print(f"标题:{title}") - print(f"Hashtag:{tags}") - app = KSVideo(title, file, tags, publish_datetimes[index], account_file) - asyncio.run(app.main(), debug=False) +# -*- coding: utf-8 -*- +import asyncio +import os +import sys +import json +from pathlib import Path +from typing import List, Dict, Any +from datetime import datetime + +# 获取项目根目录的绝对路径 +BASE_DIR = Path(os.path.dirname(os.path.dirname(__file__))) +# 添加项目根目录到 Python 路径 +sys.path.append(str(BASE_DIR)) + +from uploader.ks_uploader.modules.account import account_manager +from uploader.ks_uploader.modules.video import KSVideoUploader, KSBatchUploader +from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags +from utils.log import kuaishou_logger +from utils.social_media_db import SocialMediaDB +from utils.playwright_helper import PlaywrightHelper + +def find_video_and_info(folder_path: Path) -> tuple[Path, dict]: + """ + 查找视频文件和对应的info.json信息 + """ + # 查找视频文件 + video_files = list(folder_path.glob("*.mp4")) + if not video_files: + video_files = list(folder_path.glob("*.mov")) + if not video_files: + return None, None + + # 读取info.json + info_file = folder_path / "info.json" + if not info_file.exists(): + return None, None + + try: + with open(info_file, 'r', encoding='utf-8') as f: + info_data = json.load(f) + if isinstance(info_data, list): + info_data = info_data[0] # 获取第一个元素 + if 'kuaishou' not in info_data: + return None, None + + # 查找封面图片 + # 支持的图片格式 + image_extensions = ['.jpg', '.jpeg', '.png'] + cover_file = None + + # 首先查找与视频同名的图片 + video_stem = video_files[0].stem + kuaishou_logger.info(f"正在查找与视频同名的封面图片: {video_stem}") + for ext in image_extensions: + potential_cover = folder_path / f"{video_stem}{ext}" + if potential_cover.exists(): + cover_file = potential_cover + kuaishou_logger.success(f"找到同名封面图片: {cover_file}") + break + + # 如果没找到同名图片,查找文件夹中的第一张图片 + if not cover_file: + kuaishou_logger.info("未找到同名封面图片,尝试查找文件夹中的其他图片") + for ext in image_extensions: + image_files = list(folder_path.glob(f"*{ext}")) + if image_files: + cover_file = image_files[0] + kuaishou_logger.success(f"找到封面图片: {cover_file}") + break + + # 如果找到了封面图片 + if cover_file: + info_data['kuaishou']['cover_file'] = str(cover_file) + kuaishou_logger.info(f"设置封面图路径: {cover_file}") + else: + info_data['kuaishou']['cover_file'] = None + kuaishou_logger.warning("未找到任何可用的封面图片") + + return video_files[0], info_data['kuaishou'] + except Exception as e: + kuaishou_logger.error(f"读取info.json失败: {str(e)}") + return None, None + +async def upload_videos(video_files: List[Path], account_file: Path, nickname: str) -> Dict[str, Any]: + """上传视频 + + Args: + video_files: 视频文件列表 + account_file: 账号cookie文件路径 + nickname: 账号昵称 + + Returns: + Dict[str, Any]: 上传结果 + """ + try: + kuaishou_logger.info(f"开始上传视频 - 账号: {nickname}") + kuaishou_logger.info(f"视频文件数量: {len(video_files)}") + kuaishou_logger.info(f"Cookie文件: {account_file}") + + # 初始化数据库 + db = SocialMediaDB() + try: + # 检查cookie最后验证时间 + last_check = db.get_account_verification_time("kuaishou", nickname) + if last_check: + # 将字符串时间转换为datetime对象 + if isinstance(last_check, str): + last_check = datetime.fromisoformat(last_check.replace('Z', '+00:00')) + time_diff = (datetime.now() - last_check).total_seconds() / 3600 # 转换为小时 + if time_diff < 3: + kuaishou_logger.info(f"Cookie在3小时内已验证过 (距上次验证: {time_diff:.1f}小时), 跳过验证") + result = {'success': True, 'username': nickname} + else: + kuaishou_logger.info(f"Cookie最后验证时间: {last_check.strftime('%Y-%m-%d %H:%M:%S')} (已过期)") + # 验证账号 + kuaishou_logger.info("正在验证账号...") + result = await account_manager.setup_cookie(str(account_file), expected_username=nickname) + else: + # 首次验证 + kuaishou_logger.info("未找到验证记录,进行首次验证") + result = await account_manager.setup_cookie(str(account_file), expected_username=nickname) + finally: + db.close() + + if not result['success']: + error_msg = result.get('message', '未知错误') + kuaishou_logger.error(f"Cookie设置失败: {error_msg}") + if 'error' in result: + kuaishou_logger.error(f"错误详情: {result['error']}") + return { + 'success': False, + 'error': error_msg + } + + kuaishou_logger.success("账号验证成功!") + kuaishou_logger.info(f"当前登录用户: {result.get('username', nickname)}") + if 'expires_at' in result: + expires_time = datetime.fromtimestamp(result['expires_at']).strftime('%Y-%m-%d %H:%M:%S') + kuaishou_logger.info(f"Cookie有效期至: {expires_time}") + + # 生成发布时间 + file_num = len(video_files) + publish_datetimes = generate_schedule_time_next_day(file_num, 1, daily_times=[16]) + kuaishou_logger.info(f"已生成发布时间计划,共 {file_num} 个时间点") + for i, dt in enumerate(publish_datetimes, 1): + kuaishou_logger.debug(f"视频 {i}: 计划发布时间 = {dt.strftime('%Y-%m-%d %H:%M:%S')}") + + if len(video_files) == 1: + # 单个视频上传 + file = video_files[0] + title, tags, mentions = get_title_and_hashtags(str(file)) + kuaishou_logger.info(f"准备上传视频: {title}") + kuaishou_logger.info(f"文件路径: {file}") + kuaishou_logger.info(f"标签: {tags}") + if mentions: + kuaishou_logger.info(f"提及用户: @{', @'.join(mentions)}") + kuaishou_logger.info(f"计划发布时间: {publish_datetimes[0].strftime('%Y-%m-%d %H:%M:%S')}") + + # 获取视频信息 + video_info = None + if file.parent.exists(): + _, video_info = find_video_and_info(file.parent) + + # 创建上传器 + publish_date = datetime.strptime(video_info['publish_date'], '%Y-%m-%d %H:%M:%S') if 'publish_date' in video_info else None + + # 检查封面文件路径 + cover_file = video_info.get('cover_file') + if cover_file: + kuaishou_logger.info(f"使用封面文件: {cover_file}") + else: + kuaishou_logger.warning("未设置封面文件") + + uploader = KSVideoUploader( + title=title, + file_path=str(file), + tags=tags, + mentions=mentions, + publish_date=publish_date, + account_file=str(account_file), + cover_file=cover_file # 传递封面文件路径 + ) + + success = await uploader.start() + results = { + 'success': success, + 'total': 1, + 'failed': 0 if success else 1, + 'results': { + title: { + 'success': success, + 'timestamp': datetime.now().isoformat(), + 'file_path': str(file) + } + } + } + else: + # 批量上传 + uploaders = [] + for index, file in enumerate(video_files): + title, tags, mentions = get_title_and_hashtags(str(file)) + kuaishou_logger.info(f"准备上传视频 {index + 1}/{file_num}") + kuaishou_logger.info(f"文件路径: {file}") + kuaishou_logger.info(f"标题: {title}") + kuaishou_logger.info(f"标签: {tags}") + if mentions: + kuaishou_logger.info(f"提及用户: @{', @'.join(mentions)}") + kuaishou_logger.info(f"计划发布时间: {publish_datetimes[index].strftime('%Y-%m-%d %H:%M:%S')}") + + # 获取视频信息 + video_info = None + if file.parent.exists(): + _, video_info = find_video_and_info(file.parent) + + uploader = KSVideoUploader( + title=title, + file_path=str(file), + tags=tags, + mentions=mentions, + publish_date=publish_datetimes[index], + account_file=str(account_file), + cover_file=video_info.get('cover_file') if video_info else None # 添加封面图路径 + ) + uploaders.append(uploader) + + kuaishou_logger.info("初始化批量上传器...") + batch_uploader = KSBatchUploader(max_concurrent=2) + kuaishou_logger.info("开始执行批量上传...") + results = await batch_uploader.batch_upload(uploaders) + + # 处理上传结果 + if results.get('success', False): + kuaishou_logger.success("上传完成!") + kuaishou_logger.info(f"总数: {results.get('total', 0)}") + kuaishou_logger.info(f"成功: {results.get('success', 0)}") + kuaishou_logger.info(f"失败: {results.get('failed', 0)}") + + # 记录详细的上传结果 + if 'results' in results: + for title, info in results['results'].items(): + if info.get('success'): + kuaishou_logger.success(f"视频 '{title}' 上传成功") + else: + kuaishou_logger.error(f"视频 '{title}' 上传失败") + if 'error' in info: + kuaishou_logger.error(f"错误信息: {info['error']}") + + return results + + except Exception as e: + kuaishou_logger.error(f"上传视频失败: {str(e)}", exc_info=True) + return { + 'success': False, + 'error': str(e) + } + +async def main(): + """主函数""" + try: + # 安装浏览器 + if not PlaywrightHelper.install_browser(): + sys.exit(1) + + # 初始化数据库 + db = SocialMediaDB() + platform = "kuaishou" + nickname = "向阳也有米" + + # 查询账号信息 + accounts = db.get_all_accounts(platform) + target_account = next((acc for acc in accounts if acc['nickname'] == nickname), None) + + # 根据昵称生成cookie文件名 + cookie_filename = f"{nickname}.json" + default_cookie_path = str(BASE_DIR / "cookies" / "ks_uploader" / cookie_filename) + + # 获取cookie路径 + account_file = default_cookie_path + if target_account: + cookies = db.get_valid_cookies(platform, target_account['account_id']) + if cookies: + account_file = cookies[0] # 使用最新的cookie + + # 设置Cookie + result = await account_manager.setup_cookie(account_file, expected_username=nickname) + + if not result['success']: + kuaishou_logger.error(f"Cookie设置失败: {result['message']}") + if 'error' in result: + kuaishou_logger.error(f"错误详情: {result['error']}") + sys.exit(1) + + # 获取视频文件和信息 + videos_folder = Path(r"F:\向阳也有米\24版本\12月\1125-19-教人7") + video_file, video_info = find_video_and_info(videos_folder) + + if not video_file or not video_info: + kuaishou_logger.error("未找到视频文件或info.json信息不完整") + sys.exit(1) + + # 创建上传器 + publish_date = datetime.strptime(video_info['publish_date'], '%Y-%m-%d %H:%M:%S') if 'publish_date' in video_info else None + + # 检查封面文件路径 + cover_file = video_info.get('cover_file') + if cover_file: + kuaishou_logger.info(f"使用封面文件: {cover_file}") + else: + kuaishou_logger.warning("未设置封面文件") + + uploader = KSVideoUploader( + title=video_info['title'], + file_path=str(video_file), + tags=video_info['tags'], + mentions=video_info.get('mentions', []), + publish_date=publish_date, + account_file=account_file, + cover_file=cover_file # 传递封面文件路径 + ) + + # 开始上传 + kuaishou_logger.info(f"开始上传视频: {video_info['title']}") + if await uploader.start(): + kuaishou_logger.success("视频上传成功!") + else: + kuaishou_logger.error("视频上传失败!") + sys.exit(1) + + except Exception as e: + kuaishou_logger.error(f"程序执行出错: {str(e)}") + sys.exit(1) + finally: + if 'db' in locals(): + db.close() + +if __name__ == '__main__': + asyncio.run(main()) diff --git a/examples/upload_video_to_tencent.py b/examples/upload_video_to_tencent.py index 2e47a1ef..59055c00 100644 --- a/examples/upload_video_to_tencent.py +++ b/examples/upload_video_to_tencent.py @@ -1,28 +1,261 @@ -import asyncio -from pathlib import Path - -from conf import BASE_DIR -from uploader.tencent_uploader.main import weixin_setup, TencentVideo -from utils.constant import TencentZoneTypes -from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags - - -if __name__ == '__main__': - filepath = Path(BASE_DIR) / "videos" - account_file = Path(BASE_DIR / "cookies" / "tencent_uploader" / "account.json") - # 获取视频目录 - folder_path = Path(filepath) - # 获取文件夹中的所有文件 - files = list(folder_path.glob("*.mp4")) - file_num = len(files) - publish_datetimes = generate_schedule_time_next_day(file_num, 1, daily_times=[16]) - cookie_setup = asyncio.run(weixin_setup(account_file, handle=True)) - category = TencentZoneTypes.LIFESTYLE.value # 标记原创需要否则不需要传 - for index, file in enumerate(files): - title, tags = get_title_and_hashtags(str(file)) - # 打印视频文件名、标题和 hashtag - print(f"视频文件名:{file}") - print(f"标题:{title}") - print(f"Hashtag:{tags}") - app = TencentVideo(title, file, tags, publish_datetimes[index], account_file, category) - asyncio.run(app.main(), debug=False) +import os +import sys +import json +import asyncio +from pathlib import Path +from typing import Dict, List, Tuple, Optional +from datetime import datetime + +# 添加项目根目录到 Python 路径 +current_dir = Path(__file__).parent.parent +sys.path.append(str(current_dir)) + +from conf import BASE_DIR +from uploader.tencent_uploader import weixin_setup, TencentVideo +from utils.constant import TencentZoneTypes +from utils.files_times import generate_schedule_time_next_day +from utils.log import tencent_logger + + +def load_video_info(json_path: Path) -> dict: + """ + 加载视频信息配置文件 + + Args: + json_path: json文件路径 + + Returns: + 包含腾讯视频信息的字典 + """ + try: + with open(json_path, 'r', encoding='utf-8') as f: + data = json.load(f) + # 遍历配置数组 + for platform_config in data: + # 检查是否存在tencent平台的配置 + if isinstance(platform_config, dict) and 'tencent' in platform_config: + config = platform_config['tencent'] + # 处理发布时间 + if 'publish_date' in config: + config['publish_date'] = datetime.strptime( + config['publish_date'], + '%Y-%m-%d %H:%M:%S' + ) + return config + + tencent_logger.warning(f"JSON文件 {json_path} 中未找到腾讯平台配置") + return {} + except Exception as e: + tencent_logger.error(f"读取配置文件失败: {str(e)}") + return {} + + +def find_video_assets(folder_path: Path) -> List[Tuple[Path, Optional[Path], Optional[Path]]]: + """ + 在指定文件夹中查找视频及其相关资源 + + 支持的文件结构: + 1. 直接在文件夹下: + folder/ + ├── video.mp4 + ├── cover.jpg + └── info.json + + 2. 在子文件夹中: + folder/ + └── video1/ + ├── video.mp4 + ├── cover.jpg + └── info.json + + Args: + folder_path: 视频文件夹路径 + + Returns: + 包含(视频路径, 封面路径, 配置文件路径)的列表 + """ + video_assets = [] + + # 首先检查主文件夹中的文件 + video_files = list(folder_path.glob("*.mp4")) + cover_files = list(folder_path.glob("*.jpg")) + list(folder_path.glob("*.png")) + json_files = list(folder_path.glob("*.json")) + + if video_files: # 如果在主文件夹中找到视频文件 + video_path = video_files[0] + cover_path = cover_files[0] if cover_files else None + json_path = json_files[0] if json_files else None + video_assets.append((video_path, cover_path, json_path)) + return video_assets + + # 如果主文件夹中没有找到视频,则检查子文件夹 + for item in folder_path.iterdir(): + if item.is_dir(): + # 在子文件夹中查找文件 + sub_video_files = list(item.glob("*.mp4")) + sub_cover_files = list(item.glob("*.jpg")) + list(item.glob("*.png")) + sub_json_files = list(item.glob("*.json")) + + if sub_video_files: # 如果找到视频文件 + video_path = sub_video_files[0] + cover_path = sub_cover_files[0] if sub_cover_files else None + json_path = sub_json_files[0] if sub_json_files else None + video_assets.append((video_path, cover_path, json_path)) + + if not video_assets: + tencent_logger.warning(f"在目录 {folder_path} 及其子目录中未找到任何视频文件") + + return video_assets + + +async def process_video(video_path: Path, cover_path: Path, json_path: Path, account_file: Path): + """ + 处理单个视频的上传 + """ + try: + # 加载视频信息 + video_info = load_video_info(json_path) if json_path else {} + + # 获取视频信息,如果json中没有则使用默认值 + title = video_info.get('title', video_path.stem) + tags = video_info.get('tags', []) + friends = video_info.get('friends', []) + publish_date = video_info.get('publish_date', datetime.now()) + category = TencentZoneTypes.EMOTION.value + + # 打印当前处理的视频信息 + tencent_logger.info(f"正在处理视频: {video_path.name}") + tencent_logger.info(f"标题: {title}") + tencent_logger.info(f"标签: {tags}") + tencent_logger.info(f"好友标记: {friends}") + tencent_logger.info(f"发布时间: {publish_date}") + if cover_path: + tencent_logger.info(f"封面: {cover_path.name}") + + # 创建上传实例 + app = TencentVideo( + title=title, + file_path=str(video_path), + tags=tags, + publish_date=publish_date, + account_files=[str(account_file)], # 每个实例只使用一个账号 + category=category, + cover_path=str(cover_path) if cover_path else None, + friends=friends + ) + + # 执行上传 + await app.main() + + except Exception as e: + tencent_logger.error(f"处理视频 {video_path} 时出错: {str(e)}") + + +async def single_thread_upload(account_name: str = None): + """ + 单线程上传视频 + + Args: + account_name: 账号名称,用于定位cookie文件。如果为None,则尝试使用目录下的第一个cookie文件 + """ + # 设置基础路径 + videos_folder = Path(r"F:\向阳也有米\24版本\12月\1125-19-教人7") + cookies_folder = Path(BASE_DIR) / "cookies" / "tencent_uploader" + + # 根据account_name获取cookie文件 + if account_name: + account_file = cookies_folder / f"{account_name}.json" + if not account_file.exists(): + tencent_logger.error(f"未找到账号 {account_name} 的cookie文件") + return + else: + # 如果没有指定账号,尝试使用目录下的第一个cookie文件 + cookie_files = list(cookies_folder.glob("*.json")) + if not cookie_files: + tencent_logger.error("未找到任何cookie文件") + return + account_file = cookie_files[0] + account_name = account_file.stem + tencent_logger.info(f"使用默认账号: {account_name}") + + # 验证cookie + if not await weixin_setup(account_file, handle=True): + tencent_logger.error("Cookie设置失败") + return + + # 获取所有视频资源 + video_assets = find_video_assets(videos_folder) + if not video_assets: + tencent_logger.warning("未找到任何视频资源") + return + + # 顺序处理每个视频 + for video_path, cover_path, json_path in video_assets: + await process_video(video_path, cover_path, json_path, account_file) + + +async def multi_thread_upload(): + """ + 多线程上传视频 - 每个账号一个浏览器实例并发上传 + """ + # 设置基础路径 + cookies_folder = Path(BASE_DIR) / "cookies" / "tencent_uploader" + # 获取所有账号文件 + account_files = list(cookies_folder.glob("*.json")) + if not account_files: + tencent_logger.error("未找到任何账号文件") + return + videos_folder = Path(r"F:\向阳也有米\24版本\12月\1125-19-教人7") + # 获取所有视频资源 + video_assets = find_video_assets(videos_folder) + if not video_assets: + tencent_logger.warning("未找到任何视频资源") + return + + # 验证所有账号的cookie + valid_accounts = [] + for account_file in account_files: + if await weixin_setup(str(account_file), handle=True): + valid_accounts.append(account_file) + else: + tencent_logger.error(f"账号 {account_file.stem} cookie无效,已跳过") + + if not valid_accounts: + tencent_logger.error("没有可用的账号") + return + + # 创建上传任务 + upload_tasks = [] + for i, video_asset in enumerate(video_assets): + video_path, cover_path, json_path = video_asset + + # 加载视频信息 + video_info = load_video_info(json_path) if json_path else {} + title = video_info.get('title', video_path.stem) + tags = video_info.get('tags', []) + friends = video_info.get('friends', []) + publish_date = video_info.get('publish_date', datetime.now()) + category = TencentZoneTypes.EMOTION.value + + # 创建上传实例 - 传入所有有效账号 + app = TencentVideo( + title=title, + file_path=str(video_path), + tags=tags, + publish_date=publish_date, + account_files=[str(account) for account in valid_accounts], # 传入所有有效账号 + category=category, + cover_path=str(cover_path) if cover_path else None, + friends=friends + ) + + # 添加到任务列表 + upload_tasks.append(app.main()) + + # 并发执行所有上传任务 + tencent_logger.info(f"开始并发上传 {len(upload_tasks)} 个视频,使用 {len(valid_accounts)} 个账号") + await asyncio.gather(*upload_tasks) + + +if __name__ == '__main__': + asyncio.run(single_thread_upload(account_name="向阳很有米")) diff --git a/examples/upload_video_to_tiktok.py b/examples/upload_video_to_tiktok.py index efa94cc0..b7ff0336 100644 --- a/examples/upload_video_to_tiktok.py +++ b/examples/upload_video_to_tiktok.py @@ -1,30 +1,30 @@ -import asyncio -from pathlib import Path - -from conf import BASE_DIR -# from tk_uploader.main import tiktok_setup, TiktokVideo -from uploader.tk_uploader.main_chrome import tiktok_setup, TiktokVideo -from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags - - -if __name__ == '__main__': - filepath = Path(BASE_DIR) / "videos" - account_file = Path(BASE_DIR / "cookies" / "tk_uploader" / "account.json") - folder_path = Path(filepath) - # get video files from folder - files = list(folder_path.glob("*.mp4")) - file_num = len(files) - publish_datetimes = generate_schedule_time_next_day(file_num, 1, daily_times=[16]) - cookie_setup = asyncio.run(tiktok_setup(account_file, handle=True)) - for index, file in enumerate(files): - title, tags = get_title_and_hashtags(str(file)) - thumbnail_path = file.with_suffix('.png') - print(f"video_file_name:{file}") - print(f"video_title:{title}") - print(f"video_hashtag:{tags}") - if thumbnail_path.exists(): - print(f"thumbnail_file_name:{thumbnail_path}") - app = TiktokVideo(title, file, tags, publish_datetimes[index], account_file, thumbnail_path) - else: - app = TiktokVideo(title, file, tags, publish_datetimes[index], account_file) - asyncio.run(app.main(), debug=False) +import asyncio +from pathlib import Path + +from conf import BASE_DIR +# from tk_uploader.main import tiktok_setup, TiktokVideo +from uploader.tk_uploader.main_chrome import tiktok_setup, TiktokVideo +from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags + + +if __name__ == '__main__': + filepath = Path(BASE_DIR) / "videos" + account_file = Path(BASE_DIR / "cookies" / "tk_uploader" / "account.json") + folder_path = Path(filepath) + # get video files from folder + files = list(folder_path.glob("*.mp4")) + file_num = len(files) + publish_datetimes = generate_schedule_time_next_day(file_num, 1, daily_times=[16]) + cookie_setup = asyncio.run(tiktok_setup(account_file, handle=True)) + for index, file in enumerate(files): + title, tags = get_title_and_hashtags(str(file)) + thumbnail_path = file.with_suffix('.png') + print(f"video_file_name:{file}") + print(f"video_title:{title}") + print(f"video_hashtag:{tags}") + if thumbnail_path.exists(): + print(f"thumbnail_file_name:{thumbnail_path}") + app = TiktokVideo(title, file, tags, publish_datetimes[index], account_file, thumbnail_path) + else: + app = TiktokVideo(title, file, tags, publish_datetimes[index], account_file) + asyncio.run(app.main(), debug=False) diff --git a/examples/upload_video_to_xhs.py b/examples/upload_video_to_xhs.py index 8acabbca..11940e01 100644 --- a/examples/upload_video_to_xhs.py +++ b/examples/upload_video_to_xhs.py @@ -1,68 +1,73 @@ -import configparser -from pathlib import Path -from time import sleep - -from xhs import XhsClient - -from conf import BASE_DIR -from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags -from uploader.xhs_uploader.main import sign_local, beauty_print - -config = configparser.RawConfigParser() -config.read(Path(BASE_DIR / "uploader" / "xhs_uploader" / "accounts.ini")) - - -if __name__ == '__main__': - filepath = Path(BASE_DIR) / "videos" - # 获取视频目录 - folder_path = Path(filepath) - # 获取文件夹中的所有文件 - files = list(folder_path.glob("*.mp4")) - file_num = len(files) - - cookies = config['account1']['cookies'] - xhs_client = XhsClient(cookies, sign=sign_local, timeout=60) - # auth cookie - # 注意:该校验cookie方式可能并没那么准确 - try: - xhs_client.get_video_first_frame_image_id("3214") - except: - print("cookie 失效") - exit() - - publish_datetimes = generate_schedule_time_next_day(file_num, 1, daily_times=[16]) - - for index, file in enumerate(files): - title, tags = get_title_and_hashtags(str(file)) - # 加入到标题 补充标题(xhs 可以填1000字不写白不写) - tags_str = ' '.join(['#' + tag for tag in tags]) - hash_tags_str = '' - hash_tags = [] - - # 打印视频文件名、标题和 hashtag - print(f"视频文件名:{file}") - print(f"标题:{title}") - print(f"Hashtag:{tags}") - - topics = [] - # 获取hashtag - for i in tags[:3]: - topic_official = xhs_client.get_suggest_topic(i) - if topic_official: - topic_official[0]['type'] = 'topic' - topic_one = topic_official[0] - hash_tag_name = topic_one['name'] - hash_tags.append(hash_tag_name) - topics.append(topic_one) - - hash_tags_str = ' ' + ' '.join(['#' + tag + '[话题]#' for tag in hash_tags]) - - note = xhs_client.create_video_note(title=title[:20], video_path=str(file), - desc=title + tags_str + hash_tags_str, - topics=topics, - is_private=False, - post_time=publish_datetimes[index].strftime("%Y-%m-%d %H:%M:%S")) - - beauty_print(note) - # 强制休眠30s,避免风控(必要) - sleep(30) +import configparser +from pathlib import Path +from time import sleep +import sys + +# 添加项目根目录到 Python 路径 +current_dir = Path(__file__).parent.parent +sys.path.append(str(current_dir)) + +from xhs import XhsClient + +from conf import BASE_DIR +from utils.files_times import generate_schedule_time_next_day, get_title_and_hashtags +from uploader.xhs_uploader.main import sign_local, beauty_print + +config = configparser.RawConfigParser() +config.read(Path(BASE_DIR / "uploader" / "xhs_uploader" / "accounts.ini")) + + +if __name__ == '__main__': + filepath = Path(BASE_DIR) / "videos" + # 获取视频目录 + folder_path = Path(filepath) + # 获取文件夹中的所有文件 + files = list(folder_path.glob("*.mp4")) + file_num = len(files) + + cookies = config['account1']['cookies'] + xhs_client = XhsClient(cookies, sign=sign_local, timeout=60) + # auth cookie + # 注意:该校验cookie方式可能并没那么准确 + try: + xhs_client.get_video_first_frame_image_id("3214") + except: + print("cookie 失效") + exit() + + publish_datetimes = generate_schedule_time_next_day(file_num, 1, daily_times=[16]) + + for index, file in enumerate(files): + title, tags = get_title_and_hashtags(str(file)) + # 加入到标题 补充标题(xhs 可以填1000字不写白不写) + tags_str = ' '.join(['#' + tag for tag in tags]) + hash_tags_str = '' + hash_tags = [] + + # 打印视频文件名、标题和 hashtag + print(f"视频文件名:{file}") + print(f"标题:{title}") + print(f"Hashtag:{tags}") + + topics = [] + # 获取hashtag + for i in tags[:3]: + topic_official = xhs_client.get_suggest_topic(i) + if topic_official: + topic_official[0]['type'] = 'topic' + topic_one = topic_official[0] + hash_tag_name = topic_one['name'] + hash_tags.append(hash_tag_name) + topics.append(topic_one) + + hash_tags_str = ' ' + ' '.join(['#' + tag + '[话题]#' for tag in hash_tags]) + + note = xhs_client.create_video_note(title=title[:20], video_path=str(file), + desc=title + tags_str + hash_tags_str, + topics=topics, + is_private=False, + post_time=publish_datetimes[index].strftime("%Y-%m-%d %H:%M:%S")) + + beauty_print(note) + # 强制休眠30s,避免风控(必要) + sleep(30) diff --git a/logs/1.html b/logs/1.html new file mode 100644 index 00000000..e69de29b diff --git a/requirements.txt b/requirements.txt index 4c400cf3..8ac6a338 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,20 @@ -requests -playwright -eventlet -schedule -cf_clearance -biliup -xhs -qrcode -loguru \ No newline at end of file +requests>=2.31.0 +playwright>=1.42.0 +eventlet>=0.35.0 +schedule>=1.2.0 +cf_clearance>=0.3.0 +biliup>=0.1.0 +xhs>=0.2.13 +qrcode>=7.4.0 +loguru>=0.7.2 +flask>=3.0.0 +gevent>=24.2.1 +pydantic>=2.6.1 +pydantic-settings>=2.2.1 +python-dotenv>=1.0.0 +watchdog>=3.0.0 +pytest>=8.0.0 +pytest-asyncio>=0.23.5 +pytest-cov>=4.1.0 +gradio>=5.15.0 +aiosqlite>=0.19.0 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 00000000..3ba753df --- /dev/null +++ b/setup.py @@ -0,0 +1,12 @@ +from setuptools import setup, find_packages + +setup( + name="social-auto-upload", + version="0.1", + packages=find_packages(), + install_requires=[ + "playwright", + "openai", + # 其他依赖... + ], +) \ No newline at end of file diff --git a/uploader/__init__.py b/uploader/__init__.py index c8cfe0b7..c1b49455 100644 --- a/uploader/__init__.py +++ b/uploader/__init__.py @@ -1,5 +1,5 @@ -from pathlib import Path - -from conf import BASE_DIR - +from pathlib import Path + +from conf import BASE_DIR + Path(BASE_DIR / "cookies").mkdir(exist_ok=True) \ No newline at end of file diff --git a/uploader/bilibili_uploader/__init__.py b/uploader/bilibili_uploader/__init__.py index ce5d45e5..fe21ec68 100644 --- a/uploader/bilibili_uploader/__init__.py +++ b/uploader/bilibili_uploader/__init__.py @@ -1,5 +1,5 @@ -from pathlib import Path - -from conf import BASE_DIR - +from pathlib import Path + +from conf import BASE_DIR + Path(BASE_DIR / "cookies" / "bilibili_uploader").mkdir(exist_ok=True) \ No newline at end of file diff --git a/uploader/bilibili_uploader/main.py b/uploader/bilibili_uploader/main.py index ee49bcd9..506e14f0 100644 --- a/uploader/bilibili_uploader/main.py +++ b/uploader/bilibili_uploader/main.py @@ -1,79 +1,79 @@ -import json -import pathlib -import random -from biliup.plugins.bili_webup import BiliBili, Data - -from utils.log import bilibili_logger - - -def extract_keys_from_json(data): - """Extract specified keys from the provided JSON data.""" - keys_to_extract = ["SESSDATA", "bili_jct", "DedeUserID__ckMd5", "DedeUserID", "access_token"] - extracted_data = {} - - # Extracting cookie data - for cookie in data['cookie_info']['cookies']: - if cookie['name'] in keys_to_extract: - extracted_data[cookie['name']] = cookie['value'] - - # Extracting access_token - if "access_token" in data['token_info']: - extracted_data['access_token'] = data['token_info']['access_token'] - - return extracted_data - - -def read_cookie_json_file(filepath: pathlib.Path): - with open(filepath, 'r', encoding='utf-8') as file: - content = json.load(file) - return content - - -def random_emoji(): - emoji_list = ["🍏", "🍎", "🍊", "🍋", "🍌", "🍉", "🍇", "🍓", "🍈", "🍒", "🍑", "🍍", "🥭", "🥥", "🥝", - "🍅", "🍆", "🥑", "🥦", "🥒", "🥬", "🌶", "🌽", "🥕", "🥔", "🍠", "🥐", "🍞", "🥖", "🥨", "🥯", "🧀", "🥚", "🍳", "🥞", - "🥓", "🥩", "🍗", "🍖", "🌭", "🍔", "🍟", "🍕", "🥪", "🥙", "🌮", "🌯", "🥗", "🥘", "🥫", "🍝", "🍜", "🍲", "🍛", "🍣", - "🍱", "🥟", "🍤", "🍙", "🍚", "🍘", "🍥", "🥮", "🥠", "🍢", "🍡", "🍧", "🍨", "🍦", "🥧", "🍰", "🎂", "🍮", "🍭", "🍬", - "🍫", "🍿", "🧂", "🍩", "🍪", "🌰", "🥜", "🍯", "🥛", "🍼", "☕️", "🍵", "🥤", "🍶", "🍻", "🥂", "🍷", "🥃", "🍸", "🍹", - "🍾", "🥄", "🍴", "🍽", "🥣", "🥡", "🥢"] - return random.choice(emoji_list) - - -class BilibiliUploader(object): - def __init__(self, cookie_data, file: pathlib.Path, title, desc, tid, tags, dtime): - self.upload_thread_num = 3 - self.copyright = 1 - self.lines = 'AUTO' - self.cookie_data = cookie_data - self.file = file - self.title = title - self.desc = desc - self.tid = tid - self.tags = tags - self.dtime = dtime - self._init_data() - - def _init_data(self): - self.data = Data() - self.data.copyright = self.copyright - self.data.title = self.title - self.data.desc = self.desc - self.data.tid = self.tid - self.data.set_tag(self.tags) - self.data.dtime = self.dtime - - def upload(self): - with BiliBili(self.data) as bili: - bili.login_by_cookies(self.cookie_data) - bili.access_token = self.cookie_data.get('access_token') - video_part = bili.upload_file(str(self.file), lines=self.lines, - tasks=self.upload_thread_num) # 上传视频,默认线路AUTO自动选择,线程数量3。 - video_part['title'] = self.title - self.data.append(video_part) - ret = bili.submit() # 提交视频 - if ret.get('code') == 0: - bilibili_logger.success(f'[+] {self.file.name}上传 成功') - return True - else: - bilibili_logger.error(f'[-] {self.file.name}上传 失败, error messge: {ret.get("message")}') - return False +import json +import pathlib +import random +from biliup.plugins.bili_webup import BiliBili, Data + +from utils.log import bilibili_logger + + +def extract_keys_from_json(data): + """Extract specified keys from the provided JSON data.""" + keys_to_extract = ["SESSDATA", "bili_jct", "DedeUserID__ckMd5", "DedeUserID", "access_token"] + extracted_data = {} + + # Extracting cookie data + for cookie in data['cookie_info']['cookies']: + if cookie['name'] in keys_to_extract: + extracted_data[cookie['name']] = cookie['value'] + + # Extracting access_token + if "access_token" in data['token_info']: + extracted_data['access_token'] = data['token_info']['access_token'] + + return extracted_data + + +def read_cookie_json_file(filepath: pathlib.Path): + with open(filepath, 'r', encoding='utf-8') as file: + content = json.load(file) + return content + + +def random_emoji(): + emoji_list = ["🍏", "🍎", "🍊", "🍋", "🍌", "🍉", "🍇", "🍓", "🍈", "🍒", "🍑", "🍍", "🥭", "🥥", "🥝", + "🍅", "🍆", "🥑", "🥦", "🥒", "🥬", "🌶", "🌽", "🥕", "🥔", "🍠", "🥐", "🍞", "🥖", "🥨", "🥯", "🧀", "🥚", "🍳", "🥞", + "🥓", "🥩", "🍗", "🍖", "🌭", "🍔", "🍟", "🍕", "🥪", "🥙", "🌮", "🌯", "🥗", "🥘", "🥫", "🍝", "🍜", "🍲", "🍛", "🍣", + "🍱", "🥟", "🍤", "🍙", "🍚", "🍘", "🍥", "🥮", "🥠", "🍢", "🍡", "🍧", "🍨", "🍦", "🥧", "🍰", "🎂", "🍮", "🍭", "🍬", + "🍫", "🍿", "🧂", "🍩", "🍪", "🌰", "🥜", "🍯", "🥛", "🍼", "☕️", "🍵", "🥤", "🍶", "🍻", "🥂", "🍷", "🥃", "🍸", "🍹", + "🍾", "🥄", "🍴", "🍽", "🥣", "🥡", "🥢"] + return random.choice(emoji_list) + + +class BilibiliUploader(object): + def __init__(self, cookie_data, file: pathlib.Path, title, desc, tid, tags, dtime): + self.upload_thread_num = 3 + self.copyright = 1 + self.lines = 'AUTO' + self.cookie_data = cookie_data + self.file = file + self.title = title + self.desc = desc + self.tid = tid + self.tags = tags + self.dtime = dtime + self._init_data() + + def _init_data(self): + self.data = Data() + self.data.copyright = self.copyright + self.data.title = self.title + self.data.desc = self.desc + self.data.tid = self.tid + self.data.set_tag(self.tags) + self.data.dtime = self.dtime + + def upload(self): + with BiliBili(self.data) as bili: + bili.login_by_cookies(self.cookie_data) + bili.access_token = self.cookie_data.get('access_token') + video_part = bili.upload_file(str(self.file), lines=self.lines, + tasks=self.upload_thread_num) # 上传视频,默认线路AUTO自动选择,线程数量3。 + video_part['title'] = self.title + self.data.append(video_part) + ret = bili.submit() # 提交视频 + if ret.get('code') == 0: + bilibili_logger.success(f'[+] {self.file.name}上传 成功') + return True + else: + bilibili_logger.error(f'[-] {self.file.name}上传 失败, error messge: {ret.get("message")}') + return False diff --git a/uploader/douyin_uploader/README.md b/uploader/douyin_uploader/README.md new file mode 100644 index 00000000..a3ac3dc0 --- /dev/null +++ b/uploader/douyin_uploader/README.md @@ -0,0 +1,54 @@ +# 抖音视频上传功能使用说明 + +## 项目概述 +本项目提供了一个抖音视频批量上传的功能,用户可以通过该工具将视频上传到抖音平台。 + +## 主要模块 +- **DouYinVideo**: 负责视频上传的核心功能。 +- **AccountManager**: 管理抖音账号的登录和cookie信息。 +- **UserInfoHelper**: 获取用户信息。 +- **DBHelper**: 处理与数据库的交互。 +- **VideoValidator**: 验证视频文件、标题、标签等。 + +## 使用流程 +1. **环境准备**: 确保已安装Playwright和相关依赖。 +2. **账号设置**: 使用`AccountManager`类设置抖音账号,确保cookie文件有效。 +3. **视频准备**: 将待上传的视频放置在指定目录,并确保视频格式符合要求(如.mp4, .mov等)。 +4. **调用上传功能**: 使用`DouYinVideo`类的`batch_upload`方法进行批量上传。 + +## 方法说明 +### 1. `batch_upload` +```python +async def batch_upload(self, context: BrowserContext, video_dir: str, account_file: Path, daily_times: List[int] = [16]) -> None: + """批量上传视频""" + # 该方法接受浏览器上下文、视频目录和账号文件路径作为参数。 + # 它会验证视频目录,查找视频文件,并进行上传。 +``` + +### 2. `setup_account` +```python +async def setup_account(self, account_file: str, handle: bool = False, context: Optional[BrowserContext] = None) -> Dict[str, Any]: + """设置抖音账号,优先使用已存在的浏览器会话""" +``` + +### 3. `get_user_info` +```python +async def get_user_info(page: Page) -> Optional[Dict[str, Any]]: + """获取用户信息""" +``` + +## 注意事项 +- 确保视频文件和cookie文件的路径正确。 +- 处理上传过程中可能出现的错误,查看日志以获取详细信息。 + +## 流程图 +```mermaid +flowchart TD + A[环境准备] --> B[账号设置] + B --> C[视频准备] + C --> D[发布功能] + D --> E{发布结果} + E -->|成功| F[完成] + E -->|失败| G[查看日志] + C -->|继续上传| C +``` \ No newline at end of file diff --git a/uploader/douyin_uploader/__init__.py b/uploader/douyin_uploader/__init__.py index 4a213b89..106d6862 100644 --- a/uploader/douyin_uploader/__init__.py +++ b/uploader/douyin_uploader/__init__.py @@ -1,5 +1,26 @@ -from pathlib import Path - -from conf import BASE_DIR - -Path(BASE_DIR / "cookies" / "douyin_uploader").mkdir(exist_ok=True) \ No newline at end of file +""" +抖音上传器模块 +提供抖音视频上传、账号管理等功能 +""" + +__version__ = "1.0.0" +__author__ = "向阳很有米" + +from pathlib import Path +from conf import BASE_DIR + +# 创建必要的目录 +Path(BASE_DIR / "cookies" / "douyin_uploader").mkdir(exist_ok=True) + +# 导入主要组件 +from .modules.video import DouYinVideo +from .modules.account import account_manager +from .utils.user_info import UserInfoHelper + +__all__ = [ + "DouYinVideo", + "account_manager", + "UserInfoHelper", + "__version__", + "__author__" +] \ No newline at end of file diff --git a/uploader/douyin_uploader/main.py b/uploader/douyin_uploader/main.py deleted file mode 100644 index 9ed0d972..00000000 --- a/uploader/douyin_uploader/main.py +++ /dev/null @@ -1,240 +0,0 @@ -# -*- coding: utf-8 -*- -from datetime import datetime - -from playwright.async_api import Playwright, async_playwright, Page -import os -import asyncio - -from conf import LOCAL_CHROME_PATH -from utils.base_social_media import set_init_script -from utils.log import douyin_logger - - -async def cookie_auth(account_file): - async with async_playwright() as playwright: - browser = await playwright.chromium.launch(headless=True) - context = await browser.new_context(storage_state=account_file) - context = await set_init_script(context) - # 创建一个新的页面 - page = await context.new_page() - # 访问指定的 URL - await page.goto("https://creator.douyin.com/creator-micro/content/upload") - try: - await page.wait_for_url("https://creator.douyin.com/creator-micro/content/upload", timeout=5000) - except: - print("[+] 等待5秒 cookie 失效") - await context.close() - await browser.close() - return False - # 2024.06.17 抖音创作者中心改版 - if await page.get_by_text('手机号登录').count(): - print("[+] 等待5秒 cookie 失效") - return False - else: - print("[+] cookie 有效") - return True - - -async def douyin_setup(account_file, handle=False): - if not os.path.exists(account_file) or not await cookie_auth(account_file): - if not handle: - # Todo alert message - return False - douyin_logger.info('[+] cookie文件不存在或已失效,即将自动打开浏览器,请扫码登录,登陆后会自动生成cookie文件') - await douyin_cookie_gen(account_file) - return True - - -async def douyin_cookie_gen(account_file): - async with async_playwright() as playwright: - options = { - 'headless': False - } - # Make sure to run headed. - browser = await playwright.chromium.launch(**options) - # Setup context however you like. - context = await browser.new_context() # Pass any options - context = await set_init_script(context) - # Pause the page, and start recording manually. - page = await context.new_page() - await page.goto("https://creator.douyin.com/") - await page.pause() - # 点击调试器的继续,保存cookie - await context.storage_state(path=account_file) - - -class DouYinVideo(object): - def __init__(self, title, file_path, tags, publish_date: datetime, account_file, thumbnail_path=None): - self.title = title # 视频标题 - self.file_path = file_path - self.tags = tags - self.publish_date = publish_date - self.account_file = account_file - self.date_format = '%Y年%m月%d日 %H:%M' - self.local_executable_path = LOCAL_CHROME_PATH - self.thumbnail_path = thumbnail_path - - async def set_schedule_time_douyin(self, page, publish_date): - # 选择包含特定文本内容的 label 元素 - label_element = page.locator("[class^='radio']:has-text('定时发布')") - # 在选中的 label 元素下点击 checkbox - await label_element.click() - await asyncio.sleep(1) - publish_date_hour = publish_date.strftime("%Y-%m-%d %H:%M") - - await asyncio.sleep(1) - await page.locator('.semi-input[placeholder="日期和时间"]').click() - await page.keyboard.press("Control+KeyA") - await page.keyboard.type(str(publish_date_hour)) - await page.keyboard.press("Enter") - - await asyncio.sleep(1) - - async def handle_upload_error(self, page): - douyin_logger.info('视频出错了,重新上传中') - await page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(self.file_path) - - async def upload(self, playwright: Playwright) -> None: - # 使用 Chromium 浏览器启动一个浏览器实例 - if self.local_executable_path: - browser = await playwright.chromium.launch(headless=False, executable_path=self.local_executable_path) - else: - browser = await playwright.chromium.launch(headless=False) - # 创建一个浏览器上下文,使用指定的 cookie 文件 - context = await browser.new_context(storage_state=f"{self.account_file}") - context = await set_init_script(context) - - # 创建一个新的页面 - page = await context.new_page() - # 访问指定的 URL - await page.goto("https://creator.douyin.com/creator-micro/content/upload") - douyin_logger.info(f'[+]正在上传-------{self.title}.mp4') - # 等待页面跳转到指定的 URL,没进入,则自动等待到超时 - douyin_logger.info(f'[-] 正在打开主页...') - await page.wait_for_url("https://creator.douyin.com/creator-micro/content/upload") - # 点击 "上传视频" 按钮 - await page.locator("div[class^='container'] input").set_input_files(self.file_path) - - # 等待页面跳转到指定的 URL - while True: - # 判断是是否进入视频发布页面,没进入,则自动等待到超时 - try: - await page.wait_for_url( - "https://creator.douyin.com/creator-micro/content/publish?enter_from=publish_page") - break - except: - douyin_logger.info(f' [-] 正在等待进入视频发布页面...') - await asyncio.sleep(0.1) - - # 填充标题和话题 - # 检查是否存在包含输入框的元素 - # 这里为了避免页面变化,故使用相对位置定位:作品标题父级右侧第一个元素的input子元素 - await asyncio.sleep(1) - douyin_logger.info(f' [-] 正在填充标题和话题...') - title_container = page.get_by_text('作品标题').locator("..").locator("xpath=following-sibling::div[1]").locator("input") - if await title_container.count(): - await title_container.fill(self.title[:30]) - else: - titlecontainer = page.locator(".notranslate") - await titlecontainer.click() - await page.keyboard.press("Backspace") - await page.keyboard.press("Control+KeyA") - await page.keyboard.press("Delete") - await page.keyboard.type(self.title) - await page.keyboard.press("Enter") - css_selector = ".zone-container" - for index, tag in enumerate(self.tags, start=1): - await page.type(css_selector, "#" + tag) - await page.press(css_selector, "Space") - douyin_logger.info(f'总共添加{len(self.tags)}个话题') - - while True: - # 判断重新上传按钮是否存在,如果不存在,代表视频正在上传,则等待 - try: - # 新版:定位重新上传 - number = await page.locator('[class^="long-card"] div:has-text("重新上传")').count() - if number > 0: - douyin_logger.success(" [-]视频上传完毕") - break - else: - douyin_logger.info(" [-] 正在上传视频中...") - await asyncio.sleep(2) - - if await page.locator('div.progress-div > div:has-text("上传失败")').count(): - douyin_logger.error(" [-] 发现上传出错了... 准备重试") - await self.handle_upload_error(page) - except: - douyin_logger.info(" [-] 正在上传视频中...") - await asyncio.sleep(2) - - #上传视频封面 - await self.set_thumbnail(page, self.thumbnail_path) - - # 更换可见元素 - await self.set_location(page, "杭州市") - - # 頭條/西瓜 - third_part_element = '[class^="info"] > [class^="first-part"] div div.semi-switch' - # 定位是否有第三方平台 - if await page.locator(third_part_element).count(): - # 检测是否是已选中状态 - if 'semi-switch-checked' not in await page.eval_on_selector(third_part_element, 'div => div.className'): - await page.locator(third_part_element).locator('input.semi-switch-native-control').click() - - if self.publish_date != 0: - await self.set_schedule_time_douyin(page, self.publish_date) - - # 判断视频是否发布成功 - while True: - # 判断视频是否发布成功 - try: - publish_button = page.get_by_role('button', name="发布", exact=True) - if await publish_button.count(): - await publish_button.click() - await page.wait_for_url("https://creator.douyin.com/creator-micro/content/manage**", - timeout=3000) # 如果自动跳转到作品页面,则代表发布成功 - douyin_logger.success(" [-]视频发布成功") - break - except: - douyin_logger.info(" [-] 视频正在发布中...") - await page.screenshot(full_page=True) - await asyncio.sleep(0.5) - - await context.storage_state(path=self.account_file) # 保存cookie - douyin_logger.success(' [-]cookie更新完毕!') - await asyncio.sleep(2) # 这里延迟是为了方便眼睛直观的观看 - # 关闭浏览器上下文和浏览器实例 - await context.close() - await browser.close() - - async def set_thumbnail(self, page: Page, thumbnail_path: str): - if thumbnail_path: - await page.click('text="选择封面"') - await page.wait_for_selector("div.semi-modal-content:visible") - await page.click('text="设置竖封面"') - await page.wait_for_timeout(2000) # 等待2秒 - # 定位到上传区域并点击 - await page.locator("div[class^='semi-upload upload'] >> input.semi-upload-hidden-input").set_input_files(thumbnail_path) - await page.wait_for_timeout(2000) # 等待2秒 - await page.locator("div[class^='extractFooter'] button:visible:has-text('完成')").click() - # finish_confirm_element = page.locator("div[class^='confirmBtn'] >> div:has-text('完成')") - # if await finish_confirm_element.count(): - # await finish_confirm_element.click() - # await page.locator("div[class^='footer'] button:has-text('完成')").click() - - async def set_location(self, page: Page, location: str = "杭州市"): - # todo supoort location later - # await page.get_by_text('添加标签').locator("..").locator("..").locator("xpath=following-sibling::div").locator( - # "div.semi-select-single").nth(0).click() - await page.locator('div.semi-select span:has-text("输入地理位置")').click() - await page.keyboard.press("Backspace") - await page.wait_for_timeout(2000) - await page.keyboard.type(location) - await page.wait_for_selector('div[role="listbox"] [role="option"]', timeout=5000) - await page.locator('div[role="listbox"] [role="option"]').first.click() - - async def main(self): - async with async_playwright() as playwright: - await self.upload(playwright) - - diff --git a/uploader/douyin_uploader/modules/account.py b/uploader/douyin_uploader/modules/account.py new file mode 100644 index 00000000..d0a98dde --- /dev/null +++ b/uploader/douyin_uploader/modules/account.py @@ -0,0 +1,458 @@ +""" +抖音账号管理模块 +提供cookie验证、生成等功能 +""" + +from pathlib import Path +from typing import Dict, Any, Optional, Tuple +from playwright.async_api import Page, BrowserContext +import os +import asyncio +import json + +# 使用绝对导入 +from utils.log import douyin_logger +from conf import BASE_DIR # 导入项目根目录 +from utils.playwright_helper import PlaywrightHelper + +# 使用相对导入访问douyin_uploader包内的模块 +from ..utils.cookie_helper import CookieHelper +from ..utils.login_helper import LoginHelper +from ..utils.error_helper import ErrorHelper +from ..utils.db_helper import DBHelper +from ..utils.cookie_sync_manager import CookieSyncManager + +class AccountManager: + """抖音账号管理类""" + + def __init__(self): + # 初始化各个助手类 + self.playwright_helper = PlaywrightHelper() + self.cookie_helper = CookieHelper() + self.login_helper = LoginHelper() + self.error_helper = ErrorHelper() + self.db_helper = DBHelper(platform="douyin") + self.cookie_sync_manager = CookieSyncManager() + + # 获取当前工作目录 + current_dir = Path.cwd() + douyin_logger.info(f"当前工作目录: {current_dir}") + + # 设置浏览器用户数据目录(使用绝对路径) + self.base_user_data_dir = current_dir / ".playwright" / "user_data" / "douyin" + douyin_logger.info(f"浏览器用户数据目录: {self.base_user_data_dir}") + self.base_user_data_dir.mkdir(parents=True, exist_ok=True) + + # 设置cookie文件基础目录(使用绝对路径) + self.base_cookie_dir = current_dir / "cookies" / "douyin_uploader" + douyin_logger.info(f"Cookie文件目录: {self.base_cookie_dir}") + self.base_cookie_dir.mkdir(parents=True, exist_ok=True) + + def _get_account_dirs(self, account_id: str) -> tuple[Path, Path]: + """获取账号相关目录 + Args: + account_id: 账号ID + Returns: + tuple[Path, Path]: (user_data_dir, cookie_file) + """ + # 为每个账号创建独立的用户数据目录 + user_data_dir = self.base_user_data_dir / account_id + douyin_logger.info(f"账号 {account_id} 的用户数据目录: {user_data_dir}") + + # 使用绝对路径的cookie文件 + cookie_file = self.base_cookie_dir / f"{account_id}.json" + douyin_logger.info(f"账号 {account_id} 的cookie文件: {cookie_file}") + + # 检查目录和文件状态 + if user_data_dir.exists(): + douyin_logger.info(f"用户数据目录已存在,内容: {[f.name for f in user_data_dir.glob('*')]}") + else: + douyin_logger.info("用户数据目录不存在") + + if cookie_file.exists(): + douyin_logger.info(f"Cookie文件存在,大小: {cookie_file.stat().st_size} 字节") + try: + with open(cookie_file, 'r', encoding='utf-8') as f: + state = json.load(f) + cookies_count = len(state.get('cookies', [])) + origins_count = len(state.get('origins', [])) + douyin_logger.info(f"Cookie文件内容: {cookies_count} cookies, {origins_count} origins") + except Exception as e: + douyin_logger.error(f"读取Cookie文件失败: {str(e)}") + else: + douyin_logger.info("Cookie文件不存在") + douyin_logger.info(f"Cookie文件不存在,创建新文件: {cookie_file}") + with open(cookie_file, 'w', encoding='utf-8') as f: + json.dump({"cookies": [], "origins": []}, f) + douyin_logger.info(f"新建Cookie文件成功: {cookie_file}") + + # 确保cookie目录存在 + cookie_file.parent.mkdir(parents=True, exist_ok=True) + + return user_data_dir, cookie_file + + async def _handle_sync_after_operation( + self, + account_id: str, + operation_name: str + ) -> None: + """处理操作后的同步 + Args: + account_id: 账号ID + operation_name: 操作名称 + """ + try: + user_data_dir, cookie_file = self._get_account_dirs(account_id) + await self.cookie_sync_manager.sync_from_profile_to_file( + user_data_dir, + cookie_file, + account_id + ) + except Exception as e: + douyin_logger.error(f"操作后同步失败 [{operation_name}]: {str(e)}") + + async def _verify_cookie_and_get_user_info(self, account_file: str, headless: bool = True) -> tuple[bool, Optional[Dict[str, Any]]]: + """ + 验证cookie并获取用户信息的通用方法 + Args: + account_file: cookie文件路径 + headless: 是否使用无头模式,默认为True + Returns: + tuple[bool, Optional[Dict[str, Any]]]: (是否成功, 用户信息) + """ + if not self.cookie_helper.verify_cookie_file(account_file): + return False, None + + # 从cookie文件路径中提取account_id + account_id = Path(account_file).stem + user_data_dir, cookie_file = self._get_account_dirs(account_id) + + try: + # 准备浏览器配置 + browser_config = {} + + # 优先使用cookie文件 + if Path(cookie_file).exists(): + try: + with open(cookie_file, 'r', encoding='utf-8') as f: + storage = json.load(f) + if storage.get('cookies') or storage.get('origins'): + douyin_logger.info(f"使用cookie文件: {cookie_file}") + browser_config['storage_state'] = storage + except Exception as e: + douyin_logger.warning(f"读取cookie文件失败: {str(e)}") + + # 如果没有cookie文件,尝试使用user_data_dir + if not browser_config.get('storage_state') and Path(user_data_dir).exists(): + douyin_logger.info(f"使用持久化上下文: {user_data_dir}") + browser_config['user_data_dir'] = str(user_data_dir) + + async with self.playwright_helper.get_context(**browser_config) as context: + # 创建新页面 + page = await context.new_page() + + # 导航到创作者中心 + if not await self.login_helper.navigate_to_creator_center(page): + return False, None + + # 检查登录状态并获取用户信息 + is_logged_in, user_info = await self.login_helper.check_login_status(page) + + # 如果登录成功,更新cookie + if is_logged_in: + douyin_logger.info(f"已登录用户: {user_info.get('nickname', 'Unknown')}") + new_state = await context.storage_state() + if new_state.get('cookies') or new_state.get('origins'): + douyin_logger.info(f"更新cookie状态: {cookie_file}") + with open(cookie_file, 'w', encoding='utf-8') as f: + json.dump(new_state, f, ensure_ascii=False, indent=2) + + # 同步到user_data_dir(如果使用) + if browser_config.get('user_data_dir'): + await self._handle_sync_after_operation(account_id, "verify_cookie") + + return is_logged_in, user_info + + except Exception as e: + douyin_logger.error(f"验证cookie时发生错误: {str(e)}") + if page: + await self.error_helper.save_error_context(page, e, "verify_cookie") + return False, None + + async def setup_account( + self, + account_file: str, + handle: bool = False, + context: Optional[BrowserContext] = None + ) -> Dict[str, Any]: + """ + 设置抖音账号,优先使用已存在的浏览器会话 + Args: + account_file: cookie文件路径 + handle: 是否自动处理无效cookie + context: 可选的现有浏览器上下文 + Returns: + Dict[str, Any]: 设置结果,包含浏览器上下文 + """ + try: + account_id = Path(account_file).stem + user_data_dir, cookie_file = self._get_account_dirs(account_id) + + # 扩展浏览器启动参数 + browser_args = [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-infobars', + '--window-position=0,0', + '--ignore-certifcate-errors', + '--ignore-certifcate-errors-spki-list', + '--disable-background-networking', + '--disable-background-timer-throttling', + '--disable-backgrounding-occluded-windows', + '--disable-breakpad', + '--disable-client-side-phishing-detection', + '--disable-component-update', + '--disable-default-apps', + '--disable-dev-shm-usage', + '--disable-domain-reliability', + '--disable-features=AudioServiceOutOfProcess', + '--disable-hang-monitor', + '--disable-ipc-flooding-protection', + '--disable-notifications', + '--disable-offer-store-unmasked-wallet-cards', + '--disable-popup-blocking', + '--disable-print-preview', + '--disable-prompt-on-repost', + '--disable-renderer-backgrounding', + '--disable-speech-api', + '--disable-sync', + '--disable-web-security', + '--disk-cache-size=33554432', + '--hide-scrollbars', + '--ignore-gpu-blacklist', + '--metrics-recording-only', + '--mute-audio', + '--no-default-browser-check', + '--no-first-run', + '--no-pings', + '--no-zygote', + '--password-store=basic', + '--use-gl=swiftshader', + '--use-mock-keychain', + '--window-size=1920,1080', + # 添加媒体相关参数 + '--autoplay-policy=no-user-gesture-required', + '--disable-features=MediaEngagement', + '--enable-automation', + '--enable-features=NetworkService,NetworkServiceInProcess', + '--force-color-profile=srgb', + '--force-device-scale-factor=1', + ] + + # 准备浏览器配置 + browser_config = { + 'headless': False, # 确保非无头模式 + 'args': browser_args, + 'ignore_default_args': ['--enable-automation'], # 禁用自动化标记 + 'viewport': {'width': 1920, 'height': 1080}, + 'screen': {'width': 1920, 'height': 1080}, + 'bypass_csp': True, # 绕过内容安全策略 + 'accept_downloads': True, + 'locale': 'zh-CN', + 'timezone_id': 'Asia/Shanghai', + 'geolocation': {'latitude': 39.9042, 'longitude': 116.4074}, # 北京坐标 + 'permissions': ['geolocation'], + 'color_scheme': 'light', + 'device_scale_factor': 1, + 'is_mobile': False, + 'has_touch': False, + 'java_script_enabled': True + } + + # 如果存在user_data_dir,使用持久化上下文 + if Path(user_data_dir).exists(): + douyin_logger.info(f"使用持久化上下文: {user_data_dir}") + browser_config['user_data_dir'] = str(user_data_dir) + + # 如果没有提供现有的上下文,则创建新的 + should_close_context = False + if not context: + douyin_logger.info(f"使用配置创建浏览器上下文: {browser_config}") + context = await self.playwright_helper.get_context(**browser_config) + await asyncio.sleep(2) # 等待上下文初始化完成 + should_close_context = True + + try: + # 创建新页面 + page = await context.new_page() + + # 设置用户代理 + await page.set_extra_http_headers({ + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'Accept-Language': 'zh-CN,zh;q=0.9', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', + 'Accept-Encoding': 'gzip, deflate, br', + 'Connection': 'keep-alive' + }) + + # 如果存在cookie文件,加载cookie + if Path(cookie_file).exists(): + try: + with open(cookie_file, 'r', encoding='utf-8') as f: + storage = json.load(f) + if storage.get('cookies'): + douyin_logger.info(f"加载cookie: {len(storage['cookies'])} cookies") + await context.add_cookies(storage['cookies']) + except Exception as e: + douyin_logger.error(f"加载cookie失败: {str(e)}") + + # 访问创作者中心,使用更稳定的导航策略 + douyin_logger.info("访问创作者中心检查登录状态...") + try: + # 然后导航到创作者中心,使用更长的超时时间 + response = await page.goto( + "https://creator.douyin.com/creator-micro/home", + wait_until="domcontentloaded", # 改用domcontentloaded而不是networkidle + timeout=60000 # 增加超时时间到60秒 + ) + + if not response: + raise Exception("导航到创作者中心失败:无响应") + + if response.status >= 400: + raise Exception(f"导航到创作者中心失败:HTTP {response.status}") + + # 使用多个备选选择器等待页面加载 + selectors = [ + "div[class*='layout-content']", # 更通用的布局选择器 + "div[class*='header']", # 页面头部 + "div[class*='creator']", # 创作者相关元素 + "div[class*='menu']" # 菜单元素 + ] + + # 尝试等待任一选择器出现 + for selector in selectors: + try: + await page.wait_for_selector(selector, timeout=5000) + douyin_logger.info(f"页面加载完成,匹配选择器: {selector}") + break + except Exception: + continue + + await asyncio.sleep(2) + + except Exception as e: + douyin_logger.error(f"导航失败: {str(e)}") + # 如果导航失败,尝试刷新页面 + await page.reload(wait_until="domcontentloaded", timeout=30000) + await asyncio.sleep(2) + + # 检查登录状态 + is_logged_in, user_info = await self.login_helper.check_login_status(page) + + if not is_logged_in: + if handle: + douyin_logger.info("未登录,等待用户扫码登录...") + if not await self.login_helper.wait_for_login(page): + raise Exception("等待登录超时") + + is_logged_in, user_info = await self.login_helper.check_login_status(page) + if not is_logged_in or not user_info: + raise Exception("登录后状态检查失败") + + # 保存新的登录状态 + new_state = await context.storage_state() + if new_state.get('cookies') or new_state.get('origins'): + douyin_logger.info(f"保存新的登录状态到: {cookie_file}") + with open(cookie_file, 'w', encoding='utf-8') as f: + json.dump(new_state, f, ensure_ascii=False, indent=2) + + # 同步到user_data_dir(如果使用) + if browser_config.get('user_data_dir'): + await self._handle_sync_after_operation(account_id, "setup_account") + else: + raise Exception("未登录且未启用自动处理") + else: + douyin_logger.info(f"已登录用户: {user_info.get('nickname', 'Unknown')}") + # 更新cookie状态 + new_state = await context.storage_state() + if new_state.get('cookies') or new_state.get('origins'): + douyin_logger.info(f"更新cookie状态: {cookie_file}") + with open(cookie_file, 'w', encoding='utf-8') as f: + json.dump(new_state, f, ensure_ascii=False, indent=2) + + # 导航到上传页面,使用更稳定的导航策略 + douyin_logger.info("导航到上传页面...") + try: + await page.goto( + "https://creator.douyin.com/creator-micro/content/upload", + wait_until="domcontentloaded", + timeout=30000 + ) + # 等待上传按钮出现 + await page.wait_for_selector(".container-drag-AOMYqU", timeout=10000) + await asyncio.sleep(2) + except Exception as e: + douyin_logger.error(f"导航到上传页面失败: {str(e)}") + # 如果导航失败,尝试刷新页面 + await page.reload(wait_until="domcontentloaded", timeout=30000) + await asyncio.sleep(2) + + return { + 'success': True, + 'message': '账号设置成功', + 'user_info': user_info, + 'page': page, + 'context': context + } + + except Exception as e: + if page: + await page.close() + if should_close_context and context: + await context.close() + raise + + except Exception as e: + douyin_logger.error(f"设置账号时发生错误: {str(e)}") + return { + 'success': False, + 'message': f'设置账号失败: {str(e)}' + } + + async def update_account_info(self, account_file: str) -> Optional[Dict[str, Any]]: + """ + 更新或添加账号信息 + Args: + account_file: cookie文件路径 + Returns: + Optional[Dict[str, Any]]: 更新/添加后的账号信息,失败返回None + """ + try: + # 从cookie文件路径中提取account_id + account_id = Path(account_file).stem + user_data_dir, cookie_file = self._get_account_dirs(account_id) + + # 验证cookie并获取用户信息 + is_valid, user_info = await self._verify_cookie_and_get_user_info( + str(cookie_file), + headless=False + ) + if not is_valid or not user_info: + return None + + # 更新或添加到数据库 + if self.db_helper.update_account(user_info): + # 信息更新成功后同步 + await self._handle_sync_after_operation( + account_id, + "update_account_info" + ) + return user_info + return None + + except Exception as e: + douyin_logger.error(f"更新/添加账号信息失败: {str(e)}") + return None + +account_manager = AccountManager() \ No newline at end of file diff --git a/uploader/douyin_uploader/modules/validator.py b/uploader/douyin_uploader/modules/validator.py new file mode 100644 index 00000000..e144e6cf --- /dev/null +++ b/uploader/douyin_uploader/modules/validator.py @@ -0,0 +1,128 @@ +""" +抖音上传参数验证模块 +提供视频信息和上传参数的验证功能 +""" + +import os +from datetime import datetime +from typing import List, Optional + +class VideoValidator: + """视频参数验证器""" + + @staticmethod + def validate_video_file(file_path: str) -> bool: + """ + 验证视频文件是否存在且格式正确 + Args: + file_path: 视频文件路径 + Returns: + bool: 验证是否通过 + """ + if not os.path.exists(file_path): + return False + + valid_extensions = ['.mp4', '.mov', '.avi'] + file_ext = os.path.splitext(file_path)[1].lower() + return file_ext in valid_extensions + + @staticmethod + def validate_title(title: str) -> bool: + """ + 验证视频标题是否符合要求 + Args: + title: 视频标题 + Returns: + bool: 验证是否通过 + """ + if not title or len(title) > 30: + return False + return True + + @staticmethod + def validate_tags(tags: List[str]) -> bool: + """ + 验证标签是否符合要求 + Args: + tags: 标签列表 + Returns: + bool: 验证是否通过 + """ + if not tags: + return False + + for tag in tags: + if len(tag) > 20 or not tag.strip(): + return False + return True + + @staticmethod + def validate_publish_date(publish_date: datetime) -> bool: + """ + 验证发布时间是否符合要求 + Args: + publish_date: 发布时间 + Returns: + bool: 验证是否通过 + """ + if not publish_date: + return False + + now = datetime.now() + if publish_date < now: + return False + return True + + @staticmethod + def validate_thumbnail(thumbnail_path: Optional[str]) -> bool: + """ + 验证封面图片是否符合要求 + Args: + thumbnail_path: 封面图片路径 + Returns: + bool: 验证是否通过 + """ + if not thumbnail_path: + return True + + if not os.path.exists(thumbnail_path): + return False + + valid_extensions = ['.jpg', '.jpeg', '.png'] + file_ext = os.path.splitext(thumbnail_path)[1].lower() + return file_ext in valid_extensions + + @classmethod + def validate_all(cls, + title: str, + file_path: str, + tags: List[str], + publish_date: datetime, + thumbnail_path: Optional[str] = None) -> tuple[bool, str]: + """ + 验证所有上传参数 + Args: + title: 视频标题 + file_path: 视频文件路径 + tags: 标签列表 + publish_date: 发布时间 + thumbnail_path: 封面图片路径 + Returns: + tuple[bool, str]: (验证是否通过, 错误信息) + """ + if not cls.validate_video_file(file_path): + return False, "视频文件不存在或格式不正确" + + if not cls.validate_title(title): + return False, "视频标题不符合要求" + + if not cls.validate_tags(tags): + return False, "视频标签不符合要求" + + if not cls.validate_publish_date(publish_date): + return False, "发布时间不符合要求" + + if not cls.validate_thumbnail(thumbnail_path): + return False, "封面图片不存在或格式不正确" + + return True, "" \ No newline at end of file diff --git a/uploader/douyin_uploader/modules/video.py b/uploader/douyin_uploader/modules/video.py new file mode 100644 index 00000000..2bdef1a9 --- /dev/null +++ b/uploader/douyin_uploader/modules/video.py @@ -0,0 +1,725 @@ +""" +抖音视频上传模块 +提供视频上传、封面设置等功能 +""" + +import asyncio +import functools +from datetime import datetime +from pathlib import Path +from typing import List, Optional, Dict, Any, Tuple, Callable +import os +import json + +from playwright.async_api import Playwright, Page, Browser, BrowserContext +from utils.log import douyin_logger +from conf import LOCAL_CHROME_PATH, BASE_DIR +from utils.base_social_media import set_init_script +from utils.files_times import generate_schedule_time_next_day +from ..utils.playwright_helper import PlaywrightHelper +from .account import AccountManager +from ..utils.validator import VideoValidator + +def handle_douyin_errors(func: Callable): + """错误处理装饰器""" + @functools.wraps(func) + async def wrapper(*args, **kwargs): + try: + return await func(*args, **kwargs) + except VideoValidationError as e: + douyin_logger.error(f"视频验证失败: {str(e)}") + raise + except UploadError as e: + douyin_logger.error(f"上传失败: {str(e)}") + raise + except PublishError as e: + douyin_logger.error(f"发布失败: {str(e)}") + raise + except BrowserOperationError as e: + douyin_logger.error(f"浏览器操作失败: {str(e)}") + raise + except Exception as e: + douyin_logger.error(f"未预期的错误: {str(e)}") + raise DouYinVideoError(f"操作失败: {str(e)}") + return wrapper + +# 自定义异常类 +class DouYinVideoError(Exception): + """抖音视频上传基础异常类""" + pass + +class VideoValidationError(DouYinVideoError): + """视频验证失败异常""" + pass + +class UploadError(DouYinVideoError): + """上传失败异常""" + pass + +class PublishError(DouYinVideoError): + """发布失败异常""" + pass + +class BrowserOperationError(DouYinVideoError): + """浏览器操作异常""" + pass + +# 页面选择器常量 +SELECTORS = { + 'UPLOAD_BUTTONS': [ + '#douyin-creator-master-side-upload-wrap button', + 'button:has-text("发布视频")', + '[class*="header-button-wrap"] button', + '#douyin-creator-master-side-upload' + ], + 'TITLE_INPUT': '.notranslate', + 'TAGS_INPUT': '.zone-container', + 'PUBLISH_BUTTON': 'button.button-dhlUZE.primary-cECiOJ.fixed-J9O8Yw', + 'LOCATION_INPUT': 'div.semi-select span:has-text("输入地理位置")', + 'THIRD_PART_SWITCH': '[class^="info"] > [class^="first-part"] div div.semi-switch', + 'SCHEDULE_RADIO': "[class^='radio']:has-text('定时发布')", + 'SCHEDULE_INPUT': '.semi-input[placeholder="日期和时间"]', + 'MENTION_INPUT': '.zone-container', + 'THUMBNAIL_INPUT': '[type="file"]', + 'VIDEO_INPUT': '[type="file"]' +} + +class DouYinVideo: + """抖音视频上传类""" + + # 支持的文件格式 + SUPPORTED_VIDEO_EXTENSIONS = [".mp4", ".mov", ".avi"] + SUPPORTED_IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg', '.webp'] + + def __init__(self): + """初始化抖音视频上传器""" + self.account_manager = AccountManager() + + @staticmethod + def _validate_file_accessibility(file_path: Path) -> None: + """验证文件可访问性""" + if not file_path.exists(): + raise VideoValidationError(f"文件不存在: {file_path}") + if not os.access(file_path, os.R_OK): + raise VideoValidationError(f"文件无法读取: {file_path}") + if file_path.stat().st_size == 0: + raise VideoValidationError(f"文件大小为0: {file_path}") + + @classmethod + def _validate_file_format(cls, file_path: Path, supported_extensions: List[str]) -> None: + """验证文件格式""" + if file_path.suffix.lower() not in supported_extensions: + raise VideoValidationError( + f"不支持的文件格式 {file_path.suffix},支持的格式: {supported_extensions}" + ) + + @classmethod + def _validate_video_file(cls, file_path: Path) -> None: + """验证视频文件""" + cls._validate_file_accessibility(file_path) + cls._validate_file_format(file_path, cls.SUPPORTED_VIDEO_EXTENSIONS) + if not VideoValidator.validate_video_file(str(file_path)): + raise VideoValidationError(f"视频文件验证失败: {file_path}") + + @classmethod + def _validate_thumbnail_file(cls, file_path: Path) -> None: + """验证封面图片文件""" + cls._validate_file_accessibility(file_path) + cls._validate_file_format(file_path, cls.SUPPORTED_IMAGE_EXTENSIONS) + if not VideoValidator.validate_thumbnail(str(file_path)): + raise VideoValidationError(f"封面图片验证失败: {file_path}") + + @classmethod + def _validate_mentions(cls, mentions: List[str]) -> None: + """验证@提及""" + if not mentions or not VideoValidator.validate_mentions(mentions): + raise VideoValidationError(f"提及无效: {mentions}") + + @staticmethod + def _validate_title(title: str) -> None: + """验证标题""" + if not title or not VideoValidator.validate_title(title): + raise VideoValidationError(f"标题无效: {title}") + + @staticmethod + def _validate_tags(tags: List[str]) -> None: + """验证标签""" + if not tags or not VideoValidator.validate_tags(tags): + raise VideoValidationError(f"标签无效: {tags}") + + @staticmethod + def _validate_publish_date(publish_date: datetime) -> None: + """验证发布时间""" + if not VideoValidator.validate_publish_date(publish_date): + raise VideoValidationError(f"发布时间无效: {publish_date}") + + @classmethod + def find_thumbnail(cls, video_path: Path) -> Optional[Path]: + """查找视频对应的封面图片""" + try: + for ext in cls.SUPPORTED_IMAGE_EXTENSIONS: + thumbnail = video_path.with_suffix(ext) + if thumbnail.exists(): + cls._validate_thumbnail_file(thumbnail) + douyin_logger.info(f"找到封面图片: {thumbnail.name}") + return thumbnail + douyin_logger.warning(f"未找到视频 {video_path.name} 的封面图片") + return None + except VideoValidationError: + return None + except Exception as e: + douyin_logger.error(f"查找封面图片失败: {str(e)}") + return None + + @classmethod + def find_video_files(cls, base_dir: Path) -> List[Path]: + """在指定目录下查找视频文件""" + try: + video_files = set() + douyin_logger.info(f"查找视频文件的目录: {base_dir}") + # 打印出base_dir的类型 + douyin_logger.info(f"base_dir的类型: {type(base_dir)}") + for ext in cls.SUPPORTED_VIDEO_EXTENSIONS: + found_files = list(base_dir.glob(f"*{ext}")) + list(base_dir.glob(f"*{ext.upper()}")) + for file in found_files: + try: + cls._validate_video_file(file) + video_files.add(file) + except VideoValidationError as e: + douyin_logger.warning(f"跳过无效视频 {file}: {str(e)}") + sorted_files = sorted(list(video_files), key=lambda x: x.stat().st_mtime) + if not sorted_files: + douyin_logger.warning(f"在目录 {base_dir} 中未找到有效的视频文件") + return sorted_files + except Exception as e: + douyin_logger.error(f"查找视频文件失败: {str(e)}") + return [] + + @staticmethod + def load_video_info(video_path: Path) -> Optional[Dict[str, Any]]: + """加载视频的配置信息""" + try: + info_file = video_path.parent / "info.json" + if not info_file.exists(): + douyin_logger.warning(f"未找到配置文件: {info_file}") + return None + + with open(info_file, 'r', encoding='utf-8') as f: + try: + info_list = json.load(f) + except json.JSONDecodeError as e: + douyin_logger.error(f"配置文件格式错误: {str(e)}") + return None + + for info in info_list: + if isinstance(info, dict) and "douyin" in info: + douyin_logger.info(f"成功加载视频配置: {video_path.name}") + return info["douyin"] + + douyin_logger.warning("配置文件中未找到抖音平台信息") + return None + except Exception as e: + douyin_logger.error(f"读取配置文件失败: {str(e)}") + return None + + @handle_douyin_errors + async def batch_upload( + self, + context: BrowserContext, + video_dir: str, + account_file: Path, + daily_times: List[int] = [16] + ) -> None: + """批量上传视频""" + try: + # 验证目录 + video_dir_path = Path(video_dir) + if not video_dir_path.exists() or not video_dir_path.is_dir(): + raise VideoValidationError(f"视频目录不存在: {video_dir_path}") + + # 获取视频文件 + files = self.find_video_files(video_dir_path) + if not files: + douyin_logger.warning("未找到有效的视频文件") + return + + # 生成默认发布时间 + file_num = len(files) + default_publish_times = generate_schedule_time_next_day( + file_num, + 1, + daily_times=daily_times + ) + + # 上传视频 + for index, file in enumerate(files): + douyin_logger.info(f"正在处理第 {index + 1}/{file_num} 个视频: {file.name}") + + try: + # 加载视频配置 + video_info = self.load_video_info(file) + + # 验证视频参数 + is_valid, title, tags, mentions, thumbnail_path, publish_date = await self.validate_video( + file, + video_info + ) + + if not is_valid: + douyin_logger.error(f"跳过无效视频: {file.name}") + continue + + # 使用配置文件中的发布时间或默认时间 + if not publish_date: + publish_date = default_publish_times[index] + douyin_logger.info(f"使用默认发布时间: {publish_date}") + + # 上传单个视频 + page = await context.new_page() # 在当前上下文中创建新页面 + await self.upload_single_video( + page=page, + title=title, + file_path=str(file), + tags=tags, + mentions=mentions, + publish_date=publish_date, + account_file=str(account_file), + thumbnail_path=str(thumbnail_path) if thumbnail_path else None + ) + + # 上传间隔 + if index < file_num - 1: + await asyncio.sleep(1) # 减少间隔时间 + + except Exception as e: + douyin_logger.error(f"处理视频 {file.name} 失败: {str(e)}") + continue + + except Exception as e: + douyin_logger.error(f"批量上传失败: {str(e)}") + raise + + @handle_douyin_errors + async def publish_video(self, context: BrowserContext, video_path: Path, title: str, tags: List[str], mentions: List[str], thumbnail_path: Path, publish_time: datetime) -> None: + """发布视频""" + # 验证标题 + self._validate_title(title) + # 验证标签 + self._validate_tags(tags) + # 验证提及 + self._validate_mentions(mentions) + # 验证封面 + self._validate_thumbnail_file(thumbnail_path) + # 验证视频 + self._validate_video_file(video_path) + # 设置定时发布时间 + self._validate_publish_date(publish_time) + + # 执行发布逻辑 + page = await context.new_page() + await page.goto("https://creator.douyin.com/creator-micro/content/upload") # 正确的上传页面 + await page.fill(SELECTORS['TITLE_INPUT'], title) + await page.fill(SELECTORS['TAGS_INPUT'], ','.join(tags)) + for mention in mentions: + await page.fill(SELECTORS['MENTION_INPUT'], mention) + await page.set_input_files(SELECTORS['THUMBNAIL_INPUT'], thumbnail_path) + await page.set_input_files(SELECTORS['VIDEO_INPUT'], video_path) + + # 使用重试机制点击发布按钮 + await self.click_publish_button(page) + douyin_logger.info(f"视频发布成功: {title}") + + @handle_douyin_errors + async def upload_single_video( + self, + context: BrowserContext, + title: str, + file_path: str, + tags: List[str], + mentions: List[str], + publish_date: datetime, + account_file: str, + thumbnail_path: Optional[str] = None + ) -> None: + """上传单个视频""" + # 将字符串路径转换为Path对象 + file_path_obj = Path(file_path) + thumbnail_path_obj = Path(thumbnail_path) if thumbnail_path else None + + # 验证标题 + self._validate_title(title) + # 验证标签 + self._validate_tags(tags) + # 验证提及 + self._validate_mentions(mentions) + # 验证封面 + if thumbnail_path_obj: + self._validate_thumbnail_file(thumbnail_path_obj) + # 验证视频 + self._validate_video_file(file_path_obj) + # 设置定时发布时间 + self._validate_publish_date(publish_date) + + # 设置账号并获取浏览器上下文 + result = await self.account_manager.setup_account(account_file, handle=True, context=context) + if not result['success']: + raise UploadError(f"账号设置失败: {result['message']}") + + page = result['page'] + await self._perform_upload( + page=page, + title=title, + file_path=file_path, # 保持原始字符串路径用于上传 + tags=tags, + mentions=mentions, + publish_date=publish_date, + thumbnail_path=thumbnail_path # 保持原始字符串路径用于上传 + ) + + async def set_schedule_time(self, page: Page, publish_date: datetime) -> None: + """ + 设置定时发布时间 + Args: + page: Playwright页面对象 + publish_date: 发布时间 + """ + label_element = page.locator(SELECTORS['SCHEDULE_RADIO']) + await label_element.click() + await asyncio.sleep(1) + + publish_date_hour = publish_date.strftime("%Y-%m-%d %H:%M") + await page.locator(SELECTORS['SCHEDULE_INPUT']).click() + await page.keyboard.press("Control+KeyA") + await page.keyboard.type(str(publish_date_hour)) + await page.keyboard.press("Enter") + await asyncio.sleep(1) + + async def handle_upload_error(self, page: Page, file_path: str) -> None: + """ + 处理上传错误 + Args: + page: Playwright页面对象 + file_path: 视频文件路径 + """ + douyin_logger.info('视频出错了,重新上传中') + await page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(file_path) + + async def set_thumbnail(self, page: Page, thumbnail_path: str) -> None: + """ + 设置视频封面 + Args: + page: Playwright页面对象 + thumbnail_path: 封面图片路径 + """ + try: + douyin_logger.info("开始设置视频封面...") + + # 1. 点击选择封面按钮 + # 使用更精确的选择器定位整个封面控制区域 + cover_area = page.locator("div.coverControl-CjlzqC") + await cover_area.wait_for(state="visible", timeout=10000) + douyin_logger.info("找到封面控制区域") + + # 点击封面选择区域 + cover_btn = cover_area.locator("div.cover-Jg3T4p") + await cover_btn.wait_for(state="visible", timeout=5000) + douyin_logger.info("找到封面选择按钮") + await cover_btn.click() + + # 2. 等待模态框出现 + modal = page.locator("div.semi-modal-content.semi-modal-content-animate-show") + await modal.wait_for(state="visible", timeout=10000) + douyin_logger.info("模态框已显示") + + # 3. 在模态框内查找并点击上传区域 + upload_area = modal.locator("div.semi-upload-drag-area") + await upload_area.wait_for(state="visible", timeout=10000) + douyin_logger.info("上传区域已显示") + + # 4. 准备文件选择器并上传 + async with page.expect_file_chooser() as fc_info: + await upload_area.click() + file_chooser = await fc_info.value + + # 上传文件 + douyin_logger.info(f"上传封面图片: {thumbnail_path}") + await file_chooser.set_files(thumbnail_path) + + + await asyncio.sleep(2) + # 6. 点击完成按钮 + finish_btn = modal.locator("button.semi-button-primary:has-text('完成')") + await finish_btn.wait_for(state="visible", timeout=5000) + await finish_btn.click() + + # 7. 等待一小段时间确保处理完成 + await asyncio.sleep(2) + + douyin_logger.success("视频封面设置成功") + + except Exception as e: + douyin_logger.error(f"设置视频封面失败: {str(e)}") + raise UploadError(f"设置封面失败: {str(e)}") + + async def set_location(self, page: Page, location: str = "杭州市") -> None: + """ + 设置视频位置信息 + Args: + page: Playwright页面对象 + location: 位置信息 + """ + await page.locator(SELECTORS['LOCATION_INPUT']).click() + await page.keyboard.press("Backspace") + await page.wait_for_timeout(2000) + await page.keyboard.type(location) + await page.wait_for_selector('div[role="listbox"] [role="option"]', timeout=5000) + await page.locator('div[role="listbox"] [role="option"]').first.click() + + @handle_douyin_errors + async def _perform_upload( + self, + page: Page, + title: str, + file_path: str, + tags: List[str], + mentions: List[str], + publish_date: datetime, + thumbnail_path: Optional[str] = None + ) -> None: + """执行具体的上传操作""" + try: + # 1. 点击上传按钮进入上传页面 + douyin_logger.info(f'[+]正在上传-------{title}') + + # 尝试多个可能的选择器来定位上传按钮 + upload_button = None + for selector in SELECTORS['UPLOAD_BUTTONS']: + try: + douyin_logger.info(f"尝试定位上传按钮: {selector}") + if await page.locator(selector).count() > 0: + upload_button = page.locator(selector) + douyin_logger.info(f"找到上传按钮,使用选择器: {selector}") + break + except Exception as e: + douyin_logger.warning(f"尝试选择器 {selector} 失败: {str(e)}") + continue + + if not upload_button: + raise BrowserOperationError("无法找到上传按钮") + + # 等待按钮可见并点击 + douyin_logger.info("等待上传按钮可见...") + await upload_button.wait_for(state="visible", timeout=5000) + await page.wait_for_load_state("networkidle") + await asyncio.sleep(2) # 额外等待以确保按钮可交互 + + # 尝试点击按钮 + douyin_logger.info("尝试点击上传按钮...") + try: + await upload_button.click(timeout=5000) + except Exception as e: + douyin_logger.warning(f"常规点击失败: {str(e)}") + # 尝试使用JavaScript点击 + douyin_logger.info("尝试使用JavaScript点击...") + await page.evaluate('selector => document.querySelector(selector).click()', SELECTORS['UPLOAD_BUTTONS'][0]) + + # 等待进入上传页面 + douyin_logger.info("等待进入上传页面...") + try: + await page.wait_for_url( + "https://creator.douyin.com/creator-micro/content/upload", + timeout=10000, + wait_until="networkidle" + ) + douyin_logger.success("成功进入上传页面") + except Exception as e: + douyin_logger.error(f"等待上传页面超时: {str(e)}") + raise BrowserOperationError("无法进入上传页面") + + # 2. 上传视频文件 + douyin_logger.info("准备上传视频文件...") + # 等待上传区域可见 + upload_container = page.locator(".container-drag-AOMYqU") + await upload_container.wait_for(state="visible", timeout=10000) + + # 找到文件输入框 + file_input = page.locator(".container-drag-AOMYqU input[type='file']") + await file_input.wait_for(state="attached", timeout=10000) # 只等待元素存在,不需要可见 + + # 设置文件 + douyin_logger.info(f"设置视频文件: {file_path}") + await file_input.set_input_files(file_path) + + # 3. 等待进入发布页面 + douyin_logger.info(' [-] 等待进入视频发布页面...') + await page.wait_for_url( + "https://creator.douyin.com/creator-micro/content/post/video?enter_from=publish_page", + timeout=60000 # 设置较长的超时时间 + ) + douyin_logger.success("成功进入发布页面") + + # 4. 填写视频信息 + douyin_logger.info(' [-] 正在填充标题和话题...') + await asyncio.sleep(2) # 等待页面加载完成 + + # 设置标题 + title_container = page.get_by_text('作品标题').locator("..").locator("xpath=following-sibling::div[1]").locator("input") + if await title_container.count(): + await title_container.fill(title[:30]) + else: + titlecontainer = page.locator(SELECTORS['TITLE_INPUT']) + await titlecontainer.click() + await page.keyboard.press("Control+KeyA") + await page.keyboard.press("Delete") + await page.keyboard.type(title) + await page.keyboard.press("Enter") + + await asyncio.sleep(1) + # 设置标签 + css_selector = SELECTORS['TAGS_INPUT'] + for tag in tags[:5]: + await page.type(css_selector, "#" + tag) + await page.press(css_selector, "Space") + douyin_logger.info(f'总共添加{len(tags)}个话题') + await page.keyboard.press("Enter") + + await asyncio.sleep(1) + + for mention in mentions[:5]: + await page.type(css_selector, "@" + mention) + await page.press(css_selector, "Space") + douyin_logger.info(f'总共添加{len(mentions)}个@') + await page.keyboard.press("Enter") + + await asyncio.sleep(1) + + # 5. 等待视频上传完成 + douyin_logger.info(" [-] 等待视频上传完成...") + upload_timeout = 300 # 5分钟超时 + start_time = datetime.now() + + while True: + try: + if (datetime.now() - start_time).total_seconds() > upload_timeout: + raise UploadError("视频上传超时") + + if await page.locator('[class^="long-card"] div:has-text("重新上传")').count() > 0: + douyin_logger.success(" [-]视频上传完毕") + break + + if await page.locator('div.progress-div > div:has-text("上传失败")').count() > 0: + raise UploadError("视频上传失败") + + await asyncio.sleep(2) + douyin_logger.info(" [-] 正在上传视频中...") + except Exception as e: + if "上传失败" in str(e): + douyin_logger.error(" [-] 发现上传出错了... 准备重试") + await self.handle_upload_error(page, file_path) + else: + raise + + # 6. 设置封面 + if thumbnail_path: + douyin_logger.info(" [-] 设置视频封面...") + await self.set_thumbnail(page, thumbnail_path) + + await asyncio.sleep(1) + # 9. 设置发布时间 + if publish_date: + douyin_logger.info(" [-] 设置发布时间...") + await self.set_schedule_time(page, publish_date) + + # 10. 发布视频 + douyin_logger.info(" [-] 正在发布视频...") + # 使用新的选择器定位发布按钮 + publish_button = page.locator(SELECTORS['PUBLISH_BUTTON']) + await publish_button.wait_for(state="visible", timeout=10000) + douyin_logger.info(" [-] 发布按钮已就绪") + + # 确保按钮可点击 + await asyncio.sleep(1) + await publish_button.click() + + # 等待页面跳转到管理页面 + await page.wait_for_url( + "https://creator.douyin.com/creator-micro/content/manage**", + timeout=30000 + ) + douyin_logger.success(" [-]视频发布成功") + + except Exception as e: + douyin_logger.error(f"执行上传操作失败: {str(e)}") + raise UploadError(f"上传失败: {str(e)}") + + async def validate_video( + self, + file_path: Path, + video_info: Optional[Dict[str, Any]] = None + ) -> Tuple[bool, Optional[str], Optional[List[str]], Optional[Path], Optional[datetime]]: + """验证视频文件和相关参数""" + try: + # 检查文件可访问性 + self._validate_file_accessibility(file_path) + + # 验证视频文件 + self._validate_video_file(file_path) + + # 获取视频信息 + if video_info: + title = video_info.get("title", "").strip() + tags = [tag.strip() for tag in video_info.get("tags", [])] + mentions = video_info.get("mentions", []) + + try: + publish_date = datetime.strptime( + video_info.get("publish_date", ""), + "%Y-%m-%d %H:%M:%S" + ) + # 检查发布时间是否过期 + if publish_date < datetime.now(): + douyin_logger.warning(f"发布时间已过期,将使用默认时间") + publish_date = None + except: + publish_date = None + else: + douyin_logger.warning(f"使用文件名作为标题和标签") + title = file_path.stem + tags = [title] + publish_date = None + + # 验证标题 + self._validate_title(title) + + # 验证标签 + self._validate_tags(tags) + + # 验证发布时间 + if publish_date: + self._validate_publish_date(publish_date) + + # 查找并验证封面图片 + thumbnail_path = self.find_thumbnail(file_path) + if thumbnail_path: + self._validate_thumbnail_file(thumbnail_path) + + if mentions: + self._validate_mentions(mentions) + + return True, title, tags, mentions, thumbnail_path, publish_date + except Exception as e: + douyin_logger.error(f"验证视频失败: {str(e)}") + return False, None, None, None, None, None + + async def click_publish_button(self, page: Page, retries: int = 3) -> None: + """点击发布按钮,带重试机制""" + for attempt in range(retries): + try: + await page.wait_for_selector(SELECTORS['PUBLISH_BUTTON'], timeout=10000) + await page.click(SELECTORS['PUBLISH_BUTTON']) + douyin_logger.info("成功点击发布按钮") + return # 成功点击后退出 + except Exception as e: + douyin_logger.warning(f"点击发布按钮失败,尝试第 {attempt + 1} 次: {str(e)}") + await asyncio.sleep(2) # 等待一段时间再重试 + raise Exception("点击发布按钮失败,已达到最大重试次数") \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/__init__.py b/uploader/douyin_uploader/utils/__init__.py new file mode 100644 index 00000000..7d79768d --- /dev/null +++ b/uploader/douyin_uploader/utils/__init__.py @@ -0,0 +1,22 @@ +""" +工具模块包 +提供各种辅助功能 +""" + +from .user_info import UserInfoHelper +from .browser_helper import BrowserHelper +from .cookie_helper import CookieHelper +from .login_helper import LoginHelper +from .error_helper import ErrorHelper +from .db_helper import DBHelper +from .playwright_helper import PlaywrightHelper + +__all__ = [ + "UserInfoHelper", + "BrowserHelper", + "CookieHelper", + "LoginHelper", + "ErrorHelper", + "DBHelper", + "PlaywrightHelper" +] \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/browser_helper.py b/uploader/douyin_uploader/utils/browser_helper.py new file mode 100644 index 00000000..459c7816 --- /dev/null +++ b/uploader/douyin_uploader/utils/browser_helper.py @@ -0,0 +1,149 @@ +""" +浏览器资源管理助手 +提供浏览器资源的创建、关闭和清理功能 +""" + +from typing import Optional, Tuple +from pathlib import Path +from playwright.async_api import Page, Browser, BrowserContext, async_playwright +from utils.log import douyin_logger + +class BrowserHelper: + """浏览器资源管理助手类""" + + @staticmethod + async def create_browser_context( + headless: bool = True, + user_data_dir: Optional[str] = None + ) -> Tuple[Browser, BrowserContext]: + """ + 创建浏览器和上下文 + Args: + headless: 是否使用无头模式 + user_data_dir: 用户数据目录路径 + Returns: + Tuple[Browser, BrowserContext]: 浏览器和上下文对象 + """ + try: + playwright = await async_playwright().start() + browser_args = [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-infobars', + '--window-position=0,0', + '--ignore-certifcate-errors', + '--ignore-certifcate-errors-spki-list', + '--disable-background-networking', + '--disable-background-timer-throttling', + '--disable-backgrounding-occluded-windows', + '--disable-breakpad', + '--disable-client-side-phishing-detection', + '--disable-component-update', + '--disable-default-apps', + '--disable-dev-shm-usage', + '--disable-domain-reliability', + '--disable-extensions', + '--disable-features=AudioServiceOutOfProcess', + '--disable-hang-monitor', + '--disable-ipc-flooding-protection', + '--disable-notifications', + '--disable-offer-store-unmasked-wallet-cards', + '--disable-popup-blocking', + '--disable-print-preview', + '--disable-prompt-on-repost', + '--disable-renderer-backgrounding', + '--disable-speech-api', + '--disable-sync', + '--disable-web-security', + '--disk-cache-size=33554432', + '--hide-scrollbars', + '--ignore-gpu-blacklist', + '--metrics-recording-only', + '--mute-audio', + '--no-default-browser-check', + '--no-first-run', + '--no-pings', + '--no-zygote', + '--password-store=basic', + '--use-gl=swiftshader', + '--use-mock-keychain', + '--window-size=1920,1080', + ] + + context_params = { + 'viewport': {'width': 1920, 'height': 1080}, + 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'locale': 'zh-CN', + 'timezone_id': 'Asia/Shanghai', + 'geolocation': {'latitude': 39.9042, 'longitude': 116.4074}, # 北京坐标 + 'permissions': ['geolocation'], + 'color_scheme': 'light', + 'device_scale_factor': 1, + 'is_mobile': False, + 'has_touch': False, + 'java_script_enabled': True, + 'bypass_csp': True, + 'proxy': None # 如果需要代理可以在这里设置 + } + + if user_data_dir: + # 使用持久化上下文 + context = await playwright.chromium.launch_persistent_context( + user_data_dir=user_data_dir, + headless=headless, + args=browser_args, + ignore_default_args=['--enable-automation'], # 禁用自动化标记 + **context_params + ) + browser = context.browser + else: + # 创建普通浏览器实例 + browser = await playwright.chromium.launch( + headless=headless, + args=browser_args, + ignore_default_args=['--enable-automation'] # 禁用自动化标记 + ) + context = await browser.new_context(**context_params) + + return browser, context + + except Exception as e: + douyin_logger.error(f"创建浏览器上下文失败: {str(e)}") + raise + + @staticmethod + async def close_resources( + page: Optional[Page] = None, + context: Optional[BrowserContext] = None, + browser: Optional[Browser] = None + ) -> None: + """ + 按顺序关闭Playwright资源 + Args: + page: Playwright页面对象 + context: 浏览器上下文 + browser: 浏览器实例 + """ + if page: + try: + douyin_logger.info("正在关闭页面...") + await page.close() + douyin_logger.info("页面已关闭") + except Exception as e: + douyin_logger.warning(f"关闭页面时发生错误: {str(e)}") + + if context: + try: + douyin_logger.info("正在关闭浏览器上下文...") + await context.close() + douyin_logger.info("浏览器上下文已关闭") + except Exception as e: + douyin_logger.warning(f"关闭浏览器上下文时发生错误: {str(e)}") + + if browser: + try: + douyin_logger.info("正在关闭浏览器...") + await browser.close() + douyin_logger.info("浏览器已关闭") + except Exception as e: + douyin_logger.warning(f"关闭浏览器时发生错误: {str(e)}") \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/cookie_helper.py b/uploader/douyin_uploader/utils/cookie_helper.py new file mode 100644 index 00000000..f6cef7e6 --- /dev/null +++ b/uploader/douyin_uploader/utils/cookie_helper.py @@ -0,0 +1,154 @@ +""" +Cookie管理助手 +提供cookie的保存、验证和状态管理功能 +""" + +import os +from typing import Optional, List, Dict, Any +from playwright.async_api import BrowserContext, Page +from utils.log import douyin_logger +import json +from pathlib import Path +from loguru import logger + +class CookieHelper: + """Cookie管理助手类""" + + def __init__(self, cookie_dir: str = "cookies"): + """ + 初始化Cookie助手 + Args: + cookie_dir: cookie存储目录 + """ + self.cookie_dir = Path(cookie_dir) + self.cookie_dir.mkdir(exist_ok=True) + + async def save_cookie_state( + self, + context: BrowserContext, + cookie_file: str + ) -> bool: + """ + 保存浏览器上下文的cookie状态 + Args: + context: 浏览器上下文 + cookie_file: cookie文件路径 + Returns: + bool: 保存是否成功 + """ + try: + # 确保目录存在 + os.makedirs(os.path.dirname(cookie_file), exist_ok=True) + + # 保存cookie状态到文件 + douyin_logger.info(f"保存cookie状态到文件: {cookie_file}") + await context.storage_state(path=cookie_file) + + # 验证文件大小 + if os.path.getsize(cookie_file) > 100 * 1024: # 100KB + douyin_logger.warning(f"Cookie文件大小超过限制: {cookie_file}") + return False + + douyin_logger.success(f"Cookie保存成功: {cookie_file}") + return True + + except Exception as e: + douyin_logger.error(f"保存cookie状态失败: {str(e)}") + return False + + @staticmethod + def verify_cookie_file(cookie_file: str) -> bool: + """ + 验证cookie文件是否存在且有效 + Args: + cookie_file: cookie文件路径 + Returns: + bool: 文件是否有效 + """ + try: + if not os.path.exists(cookie_file): + douyin_logger.warning(f"Cookie文件不存在: {cookie_file}") + return False + + if os.path.getsize(cookie_file) == 0: + douyin_logger.warning(f"Cookie文件为空: {cookie_file}") + return False + + return True + + except Exception as e: + douyin_logger.error(f"验证cookie文件失败: {str(e)}") + return False + + async def save_cookies(self, page: Page, cookie_file: str) -> bool: + """ + 保存cookies到文件 + Args: + page: Playwright页面对象 + cookie_file: cookie文件名 + Returns: + bool: 是否保存成功 + """ + try: + cookies = await page.context.cookies() + cookie_path = self.cookie_dir / cookie_file + cookie_path.write_text(json.dumps(cookies, ensure_ascii=False, indent=2)) + logger.info(f"Cookies已保存到: {cookie_path}") + return True + except Exception as e: + logger.error(f"保存cookies失败: {str(e)}") + return False + + async def load_cookies(self, page: Page, cookie_file: str) -> bool: + """ + 从文件加载cookies + Args: + page: Playwright页面对象 + cookie_file: cookie文件名 + Returns: + bool: 是否加载成功 + """ + try: + cookie_path = self.cookie_dir / cookie_file + if not cookie_path.exists(): + logger.warning(f"Cookie文件不存在: {cookie_path}") + return False + + cookies = json.loads(cookie_path.read_text(encoding='utf-8')) + await page.context.add_cookies(cookies) + logger.info(f"已加载cookies: {cookie_path}") + return True + except Exception as e: + logger.error(f"加载cookies失败: {str(e)}") + return False + + def get_cookie_files(self) -> List[str]: + """ + 获取所有cookie文件列表 + Returns: + List[str]: cookie文件名列表 + """ + try: + return [f.name for f in self.cookie_dir.glob("*.json")] + except Exception as e: + logger.error(f"获取cookie文件列表失败: {str(e)}") + return [] + + def delete_cookie(self, cookie_file: str) -> bool: + """ + 删除指定的cookie文件 + Args: + cookie_file: cookie文件名 + Returns: + bool: 是否删除成功 + """ + try: + cookie_path = self.cookie_dir / cookie_file + if cookie_path.exists(): + cookie_path.unlink() + logger.info(f"已删除cookie文件: {cookie_path}") + return True + return False + except Exception as e: + logger.error(f"删除cookie文件失败: {str(e)}") + return False \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/cookie_sync_manager.py b/uploader/douyin_uploader/utils/cookie_sync_manager.py new file mode 100644 index 00000000..112e9a38 --- /dev/null +++ b/uploader/douyin_uploader/utils/cookie_sync_manager.py @@ -0,0 +1,283 @@ +""" +Cookie同步管理器 +负责管理user_data_dir和cookie文件之间的同步 +""" + +from typing import Dict, Any, Optional +from pathlib import Path +from datetime import datetime +from playwright.async_api import async_playwright, Browser, BrowserContext +import json + +# 使用绝对导入 +from utils.log import douyin_logger + +class CookieSyncManager: + """Cookie同步管理器类""" + + def __init__(self): + """初始化同步管理器""" + self.last_sync_time: Dict[str, datetime] = {} # 记录每个账号最后同步时间 + self._sync_interval = 300 # 同步间隔(秒) + + async def sync_from_profile_to_file( + self, + user_data_dir: Path, + cookie_file: Path, + account_id: str + ) -> bool: + """ + 从user_data_dir同步到cookie文件 + Args: + user_data_dir: 用户数据目录 + cookie_file: cookie文件路径 + account_id: 账号ID + Returns: + bool: 是否同步成功 + """ + try: + # 获取当前时间 + current_time = datetime.now() + + # 检查是否需要同步(5分钟内不重复同步) + if account_id in self.last_sync_time: + time_diff = (current_time - self.last_sync_time[account_id]).total_seconds() + if time_diff < self._sync_interval: + douyin_logger.debug(f"账号 {account_id} 最近已同步,跳过") + return True + + # 确保目录存在 + cookie_file.parent.mkdir(parents=True, exist_ok=True) + + # 如果cookie文件存在且不为空,跳过同步 + if cookie_file.exists(): + try: + with open(cookie_file, 'r', encoding='utf-8') as f: + state = json.load(f) + if state.get('cookies') or state.get('origins'): + douyin_logger.info(f"账号 {account_id} cookie文件有效,跳过同步") + return True + except Exception as e: + douyin_logger.warning(f"读取cookie文件失败: {str(e)}") + + # 创建临时上下文并同步 + async with async_playwright() as playwright: + # 使用launch_persistent_context读取现有数据 + context = await playwright.chromium.launch_persistent_context( + user_data_dir=str(user_data_dir), + headless=True + ) + try: + # 获取当前状态 + state = await context.storage_state() + if state.get('cookies') or state.get('origins'): + # 只有当有有效的cookie时才保存 + await context.storage_state(path=str(cookie_file)) + self.last_sync_time[account_id] = current_time + douyin_logger.info(f"账号 {account_id} 同步到文件成功") + return True + else: + douyin_logger.warning(f"账号 {account_id} 没有有效的cookie,跳过同步") + return False + finally: + await context.close() + + except Exception as e: + douyin_logger.error(f"同步cookie到文件失败 [{account_id}]: {str(e)}") + return False + + async def restore_from_file_to_profile( + self, + cookie_file: Path, + user_data_dir: Path, + account_id: str + ) -> bool: + """ + 从cookie文件恢复到user_data_dir(仅在必要时使用) + Args: + cookie_file: cookie文件路径 + user_data_dir: 用户数据目录 + account_id: 账号ID + Returns: + bool: 是否恢复成功 + """ + try: + # 检查cookie文件是否存在 + if not cookie_file.exists(): + douyin_logger.error(f"Cookie文件不存在: {cookie_file}") + return False + + # 确保目录存在 + user_data_dir.mkdir(parents=True, exist_ok=True) + + # 创建临时上下文并恢复 + async with async_playwright() as playwright: + # 先创建临时上下文加载cookie文件 + browser = await playwright.chromium.launch(headless=True) + context = await browser.new_context() + try: + # 加载cookie文件 + await context.storage_state(path=str(cookie_file)) + + # 保存到user_data_dir + persistent_context = await playwright.chromium.launch_persistent_context( + user_data_dir=str(user_data_dir), + headless=True + ) + try: + # 获取cookie并应用到持久化上下文 + state = await context.storage_state() + await persistent_context.add_cookies(state['cookies']) + douyin_logger.info(f"账号 {account_id} 从文件恢复成功") + return True + finally: + await persistent_context.close() + finally: + await context.close() + await browser.close() + + except Exception as e: + douyin_logger.error(f"从文件恢复cookie失败 [{account_id}]: {str(e)}") + return False + + async def ensure_consistency( + self, + user_data_dir: Path, + cookie_file: Path, + account_id: str + ) -> bool: + """ + 确保两种存储方式的数据一致性 + Args: + user_data_dir: 用户数据目录 + cookie_file: cookie文件路径 + account_id: 账号ID + Returns: + bool: 是否一致 + """ + try: + # 如果cookie文件不存在,直接同步 + if not cookie_file.exists(): + return await self.sync_from_profile_to_file( + user_data_dir, + cookie_file, + account_id + ) + + # 获取两边的storage state进行比较 + async with async_playwright() as playwright: + # 获取profile中的状态 + profile_context = await playwright.chromium.launch_persistent_context( + user_data_dir=str(user_data_dir), + headless=True + ) + try: + profile_state = await profile_context.storage_state() + finally: + await profile_context.close() + + # 获取文件中的状态 + browser = await playwright.chromium.launch(headless=True) + context = await browser.new_context() + try: + await context.storage_state(path=str(cookie_file)) + file_state = await context.storage_state() + finally: + await context.close() + await browser.close() + + # 比较cookies + if self._states_are_different(profile_state, file_state): + douyin_logger.info(f"账号 {account_id} 数据不一致,执行同步") + return await self.sync_from_profile_to_file( + user_data_dir, + cookie_file, + account_id + ) + + douyin_logger.debug(f"账号 {account_id} 数据一致") + return True + + except Exception as e: + douyin_logger.error(f"检查一致性失败 [{account_id}]: {str(e)}") + return False + + def _states_are_different(self, state1: Dict, state2: Dict) -> bool: + """ + 比较两个storage state是否有实质性差异 + + 比较规则: + 1. 必需cookie: sessionid, passport_csrf_token 等登录相关cookie + 2. 值比较: 检查cookie的值是否相同 + 3. 过期时间: 检查cookie是否已过期或即将过期 + 4. 域名范围: .douyin.com 等关键域名的cookie + """ + try: + cookies1 = {c['name']: c for c in state1['cookies']} + cookies2 = {c['name']: c for c in state2['cookies']} + + # 关键cookie列表 + critical_cookies = { + 'sessionid', + 'passport_csrf_token', + 'ttwid', + 'passport_auth_status', + 'sid_guard', + 'uid_tt', + 'sid_tt', + 'd_ticket' + } + + # 关键域名列表 + critical_domains = { + '.douyin.com', + 'creator.douyin.com', + '.bytedance.com' + } + + # 1. 检查关键cookie是否都存在 + for name in critical_cookies: + if name not in cookies1 or name not in cookies2: + douyin_logger.debug(f"关键cookie {name} 不一致") + return True + + cookie1 = cookies1[name] + cookie2 = cookies2[name] + + # 2. 检查值是否相同 + if cookie1['value'] != cookie2['value']: + douyin_logger.debug(f"Cookie {name} 值不一致") + return True + + # 3. 检查是否过期 + current_time = datetime.now().timestamp() + # 如果任一cookie已过期或将在1小时内过期 + if ('expires' in cookie1 and cookie1['expires'] < current_time + 3600) or \ + ('expires' in cookie2 and cookie2['expires'] < current_time + 3600): + douyin_logger.debug(f"Cookie {name} 已过期或即将过期") + return True + + # 4. 检查关键域名的cookie + for domain in critical_domains: + domain_cookies1 = {c['name']: c for c in state1['cookies'] if c['domain'] == domain} + domain_cookies2 = {c['name']: c for c in state2['cookies'] if c['domain'] == domain} + + if len(domain_cookies1) != len(domain_cookies2): + douyin_logger.debug(f"域名 {domain} 的cookie数量不一致") + return True + + # 比较该域名下所有cookie的值 + for name, cookie1 in domain_cookies1.items(): + if name not in domain_cookies2: + douyin_logger.debug(f"域名 {domain} 下的cookie {name} 不存在") + return True + cookie2 = domain_cookies2[name] + if cookie1['value'] != cookie2['value']: + douyin_logger.debug(f"域名 {domain} 下的cookie {name} 值不一致") + return True + + return False + + except Exception as e: + douyin_logger.error(f"比较状态失败: {str(e)}") + return True # 出错时认为不一致,触发同步 \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/db_helper.py b/uploader/douyin_uploader/utils/db_helper.py new file mode 100644 index 00000000..5d2b80fc --- /dev/null +++ b/uploader/douyin_uploader/utils/db_helper.py @@ -0,0 +1,197 @@ +""" +数据库操作助手 +提供统一的数据库操作接口 +""" + +from typing import Optional, Dict, Any, List +from utils.log import douyin_logger +from utils.social_media_db import SocialMediaDB + +class DBHelper: + """数据库操作助手类""" + + def __init__(self, platform: str = "douyin"): + self.db = SocialMediaDB() + self.platform = platform + + def update_account( + self, + user_info: Dict[str, Any], + cookie_file: Optional[str] = None + ) -> bool: + """ + 更新账号信息 + Args: + user_info: 用户信息字典 + cookie_file: cookie文件路径 + Returns: + bool: 更新是否成功 + """ + try: + # 处理抖音号格式 + account_id = user_info.get('douyin_id', '').replace('抖音号:', '').strip() + if not account_id: + douyin_logger.error("账号ID不能为空") + return False + + # 准备账号数据 + account_data = { + 'nickname': user_info.get('nickname', ''), + 'video_count': user_info.get('video_count', 0), + 'follower_count': user_info.get('fans_count', 0), + 'extra': { + 'douyin_id': account_id, + 'signature': user_info.get('signature', '这个人很懒,没有留下任何签名'), + 'following_count': user_info.get('following_count', 0), + 'likes_count': user_info.get('likes_count', 0), + 'avatar_url': user_info.get('avatar_url', ''), + 'updated_at': user_info.get('updated_at', '') + } + } + + if not account_data['nickname']: + douyin_logger.error("账号昵称不能为空") + return False + + # 更新账号信息 + if not self.db.add_or_update_account( + self.platform, + account_id, + account_data + ): + douyin_logger.error("更新账号信息失败") + return False + + # 如果提供了cookie文件,更新cookie + if cookie_file: + if not self.add_cookie(self.platform, account_id, cookie_file): + douyin_logger.error("更新cookie失败") + return False + + return True + + except Exception as e: + douyin_logger.error(f"更新账号信息失败: {str(e)}") + return False + + finally: + self.db.close() + + def get_account_info(self, account_id: str) -> Optional[Dict[str, Any]]: + """ + 获取账号信息 + Args: + account_id: 账号ID + Returns: + Optional[Dict[str, Any]]: 账号信息 + """ + try: + accounts = self.db.get_all_accounts(self.platform) + account = next( + (acc for acc in accounts if acc['account_id'] == account_id), + None + ) + return account + + except Exception as e: + douyin_logger.error(f"获取账号信息失败: {str(e)}") + return None + + finally: + self.db.close() + + def get_all_accounts(self) -> List[Dict[str, Any]]: + """ + 获取所有账号信息 + Returns: + List[Dict[str, Any]]: 账号信息列表 + """ + try: + return self.db.get_all_accounts(self.platform) + except Exception as e: + douyin_logger.error(f"获取所有账号失败: {str(e)}") + return [] + finally: + self.db.close() + + def add_cookie(self, platform: str, account_id: str, cookie_file: str) -> bool: + """ + 添加或更新cookie记录 + Args: + platform: 平台名称 + account_id: 账号ID + cookie_file: cookie文件路径 + Returns: + bool: 是否操作成功 + """ + try: + # 处理account_id格式 + clean_account_id = account_id.replace('抖音号:', '').strip() + + # 直接添加/更新cookie记录(使用数据库的UPSERT功能) + if self.db.add_cookie(platform, clean_account_id, cookie_file): + douyin_logger.info(f"添加/更新cookie记录成功: {clean_account_id}") + return True + else: + douyin_logger.error(f"添加/更新cookie记录失败: {clean_account_id}") + return False + + except Exception as e: + douyin_logger.error(f"操作cookie记录失败: {str(e)}") + return False + + def get_account_cookie_path(self, account_id: str) -> Optional[Dict[str, Any]]: + """ + 获取账号的cookie路径信息 + Args: + account_id: 账号ID + Returns: + Optional[Dict[str, Any]]: 账号信息 + """ + try: + cookie_path = self.db.get_valid_cookies(self.platform, account_id) + if cookie_path: + return cookie_path + else: + return None + + except Exception as e: + douyin_logger.error(f"获取账号信息失败: {str(e)}") + return None + + finally: + self.db.close() + + def get_cookie_path_by_nickname(self, nickname: str) -> Optional[str]: + """ + 通过昵称查找账号的cookie路径 + Args: + nickname: 账号昵称 + Returns: + Optional[str]: cookie文件路径,如果未找到返回None + """ + try: + sql = """ + SELECT ac.cookie_path + FROM social_media_accounts sma + JOIN account_cookies ac ON sma.platform = ac.platform + AND sma.account_id = ac.account_id + WHERE sma.platform = ? AND sma.nickname = ? AND ac.is_valid = 1 + ORDER BY ac.last_check DESC + LIMIT 1 + """ + + result = self.db.db.query_one(sql, (self.platform, nickname)) + if result: + douyin_logger.info(f"找到账号 {nickname} 的cookie路径") + return result['cookie_path'] + else: + douyin_logger.warning(f"未找到账号 {nickname} 的cookie路径") + return None + + except Exception as e: + douyin_logger.error(f"查询cookie路径失败: {str(e)}") + return None + + finally: + self.db.close() \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/error_helper.py b/uploader/douyin_uploader/utils/error_helper.py new file mode 100644 index 00000000..a01c9a84 --- /dev/null +++ b/uploader/douyin_uploader/utils/error_helper.py @@ -0,0 +1,96 @@ +""" +错误处理助手 +提供统一的错误处理和异常情况管理功能 +""" + +import os +from datetime import datetime +from typing import Optional +from playwright.async_api import Page +from utils.log import douyin_logger + +class ErrorHelper: + """错误处理助手类""" + + def __init__(self): + self.error_dir = "error_logs" + os.makedirs(self.error_dir, exist_ok=True) + + def _get_error_filename(self, prefix: str) -> str: + """ + 生成错误文件名 + Args: + prefix: 文件名前缀 + Returns: + str: 完整的文件名 + """ + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + return os.path.join(self.error_dir, f"{prefix}_{timestamp}") + + async def save_error_screenshot( + self, + page: Page, + error: Exception, + prefix: str = "error" + ) -> Optional[str]: + """ + 保存错误现场截图 + Args: + page: Playwright页面对象 + error: 异常对象 + prefix: 文件名前缀 + Returns: + Optional[str]: 截图文件路径 + """ + try: + screenshot_path = f"{self._get_error_filename(prefix)}.png" + await page.screenshot(path=screenshot_path, full_page=True) + douyin_logger.info(f"错误截图已保存: {screenshot_path}") + return screenshot_path + except Exception as e: + douyin_logger.error(f"保存错误截图失败: {str(e)}") + return None + + async def save_error_page_source( + self, + page: Page, + error: Exception, + prefix: str = "error" + ) -> Optional[str]: + """ + 保存错误页面源码 + Args: + page: Playwright页面对象 + error: 异常对象 + prefix: 文件名前缀 + Returns: + Optional[str]: 源码文件路径 + """ + try: + source_path = f"{self._get_error_filename(prefix)}.html" + with open(source_path, "w", encoding="utf-8") as f: + f.write(await page.content()) + douyin_logger.info(f"页面源码已保存: {source_path}") + return source_path + except Exception as e: + douyin_logger.error(f"保存页面源码失败: {str(e)}") + return None + + async def save_error_context( + self, + page: Page, + error: Exception, + prefix: str = "error" + ) -> tuple[Optional[str], Optional[str]]: + """ + 保存完整的错误现场信息 + Args: + page: Playwright页面对象 + error: 异常对象 + prefix: 文件名前缀 + Returns: + tuple[Optional[str], Optional[str]]: (截图路径, 源码路径) + """ + screenshot_path = await self.save_error_screenshot(page, error, prefix) + source_path = await self.save_error_page_source(page, error, prefix) + return screenshot_path, source_path \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/login_helper.py b/uploader/douyin_uploader/utils/login_helper.py new file mode 100644 index 00000000..cc250090 --- /dev/null +++ b/uploader/douyin_uploader/utils/login_helper.py @@ -0,0 +1,179 @@ +""" +登录状态检查助手 +提供登录状态的检查和验证功能 +""" + +from typing import Optional, Dict, Any, Tuple +from playwright.async_api import Page, Browser, Response +from loguru import logger +from .page_check_helper import PageCheckHelper +from .cookie_helper import CookieHelper + +class LoginHelper: + """登录状态检查助手类""" + + # 页面URL常量 + EXPECTED_URL = "https://creator.douyin.com/creator-micro/home" + LOGIN_URL = "https://creator.douyin.com/" + + def __init__(self, cookie_dir: str = "cookies"): + """ + 初始化登录助手 + Args: + cookie_dir: cookie存储目录 + """ + self.cookie_helper = CookieHelper(cookie_dir) + + async def navigate_to_creator_center(self, page: Page) -> Tuple[bool, str]: + """ + 导航到创作者中心 + Args: + page: Playwright页面对象 + Returns: + Tuple[bool, str]: (是否成功, 最终URL) + """ + try: + logger.info("导航到创作者中心...") + response = await page.goto( + self.EXPECTED_URL, + wait_until="networkidle", + timeout=30000 + ) + + if not response: + logger.error("页面加载失败: 无响应") + return False, "" + + if response.status >= 400: + logger.error(f"页面加载失败: HTTP {response.status}") + return False, "" + + # 获取最终URL(处理重定向) + final_url = page.url + logger.info(f"最终页面URL: {final_url}") + + # 检查是否重定向到登录页 + if self.LOGIN_URL in final_url: + logger.info("页面已重定向到登录页面") + return True, final_url + + # 验证页面加载 + if not await PageCheckHelper.check_page_loaded(page): + logger.warning("页面加载验证失败") + return False, final_url + + logger.info("页面导航成功") + return True, final_url + + except Exception as e: + logger.error(f"导航到创作者中心失败: {str(e)}") + return False, "" + + async def check_login_status(self, page: Page, browser: Optional[Browser] = None, headless: bool = True) -> Tuple[bool, Optional[Dict[str, Any]]]: + """ + 检查登录状态 + Args: + page: Playwright页面对象 + browser: 浏览器实例(可选) + headless: 是否无头模式 + Returns: + Tuple[bool, Optional[Dict[str, Any]]]: (是否已登录, 用户信息) + """ + try: + # 验证登录状态 + is_logged_in = await PageCheckHelper.verify_login_status(page) + if not is_logged_in: + logger.warning("登录状态验证失败") + return False, None + + # 获取用户信息 + try: + user_info = await PageCheckHelper.get_user_info(page) + if user_info and user_info.get('nickname'): # 只检查昵称是否存在 + logger.info(f"当前登录用户: {user_info['nickname']}") + return True, user_info + except Exception as e: + logger.warning(f"获取用户信息时出错: {str(e)}") + + # 即使获取用户信息失败,只要验证通过就返回True + return True, None + + except Exception as e: + logger.error(f"检查登录状态时出错: {str(e)}") + return False, None + + async def verify_cookie_and_get_user_info( + self, + page: Page, + cookie_file: str, + headless: bool = True + ) -> Tuple[bool, Optional[Dict[str, Any]]]: + """ + 验证cookie并获取用户信息 + Args: + page: Playwright页面对象 + cookie_file: cookie文件名 + headless: 是否无头模式 + Returns: + Tuple[bool, Optional[Dict[str, Any]]]: (是否验证成功, 用户信息) + """ + try: + # 加载cookies + if not await self.cookie_helper.load_cookies(page, cookie_file): + return False, None + + # 导航到创作者平台 + success, final_url = await self.navigate_to_creator_center(page) + if not success: + return False, None + + # 验证登录状态 + return await self.check_login_status(page, None, headless) + + except Exception as e: + logger.error(f"验证cookie时出错: {str(e)}") + return False, None + + async def wait_for_login(self, page: Page, timeout: int = 300000) -> Tuple[bool, Optional[Dict[str, Any]]]: + """ + 等待用户完成登录 + Args: + page: Playwright页面对象 + timeout: 超时时间(毫秒) + Returns: + Tuple[bool, Optional[Dict[str, Any]]]: (是否登录成功, 用户信息) + """ + try: + # 等待URL变化 + await page.wait_for_url(self.EXPECTED_URL, timeout=timeout) + logger.info("检测到登录成功") + + # 验证登录状态 + return await self.check_login_status(page) + + except Exception as e: + logger.error(f"等待登录超时: {str(e)}") + return False, None + + async def save_login_state(self, page: Page, cookie_file: str) -> bool: + """ + 保存登录状态 + Args: + page: Playwright页面对象 + cookie_file: cookie文件名 + Returns: + bool: 是否保存成功 + """ + try: + # 验证当前是否已登录 + is_logged_in, _ = await self.check_login_status(page) + if not is_logged_in: + logger.warning("当前未登录,无法保存登录状态") + return False + + # 保存cookies + return await self.cookie_helper.save_cookies(page, cookie_file) + + except Exception as e: + logger.error(f"保存登录状态失败: {str(e)}") + return False \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/page_check_helper.py b/uploader/douyin_uploader/utils/page_check_helper.py new file mode 100644 index 00000000..13361b94 --- /dev/null +++ b/uploader/douyin_uploader/utils/page_check_helper.py @@ -0,0 +1,139 @@ +from typing import Optional, Dict, Any +import asyncio +from datetime import datetime +from playwright.async_api import Page +from loguru import logger + +class PageCheckHelper: + # 配置参数 + PAGE_LOAD_TIMEOUT = 10000 # 总体页面加载超时时间 + ELEMENT_WAIT_TIMEOUT = 5000 # 元素等待超时时间 + CHECK_INTERVAL = 500 # 检查间隔时间 + MAX_RETRIES = 3 # 最大重试次数 + + @staticmethod + async def check_element_exists(page: Page, selector: str, timeout: int = ELEMENT_WAIT_TIMEOUT) -> bool: + """ + 检查元素是否存在 + """ + try: + element = await page.wait_for_selector(selector, timeout=timeout) + return element is not None + except Exception as e: + logger.debug(f"元素 {selector} 检查失败: {str(e)}") + return False + + @staticmethod + async def check_multiple_elements(page: Page, selectors: list, timeout: int = ELEMENT_WAIT_TIMEOUT) -> bool: + """ + 并行检查多个元素是否存在 + """ + tasks = [PageCheckHelper.check_element_exists(page, selector, timeout) for selector in selectors] + results = await asyncio.gather(*tasks) + return all(results) + + @staticmethod + async def check_page_loaded(page: Page) -> bool: + """ + 检查页面是否完全加载 + """ + try: + # 检查当前URL + current_url = page.url + logger.info(f"当前页面URL: {current_url}") + + # 如果URL是登录页,说明未登录 + if current_url == "https://creator.douyin.com/": + logger.info("已跳转到登录页面,未登录状态") + return False + + # 如果URL包含creator-micro,说明已登录 + if "creator-micro" in current_url: + logger.info("在创作者中心页面,已登录状态") + return True + + # 其他情况,检查页面元素 + containers = [ + ".semi-layout-content", # 新版容器 + "#douyin-creator-master-side-upload", # 上传按钮容器 + ".container-vEyGlK" # 用户信息容器 + ] + + for container in containers: + if await PageCheckHelper.check_element_exists(page, container): + logger.info(f"找到创作者中心元素: {container}") + return True + + logger.warning("未找到创作者中心元素,可能未登录") + return False + + except Exception as e: + logger.error(f"页面加载检查失败: {str(e)}") + return False + + @staticmethod + async def verify_login_status(page: Page, max_retries: int = MAX_RETRIES) -> bool: + """ + 验证登录状态,支持重试机制 + """ + for i in range(max_retries): + try: + if await PageCheckHelper.check_page_loaded(page): + return True + + logger.warning(f"登录验证第 {i+1} 次失败,准备重试...") + await asyncio.sleep(PageCheckHelper.CHECK_INTERVAL / 1000) # 转换为秒 + + except Exception as e: + logger.error(f"登录验证出错: {str(e)}") + + return False + + @staticmethod + async def get_user_info(page: Page) -> Optional[Dict[str, Any]]: + """ + 获取用户信息 + """ + try: + # 等待用户信息加载 + if not await PageCheckHelper.check_page_loaded(page): + return None + + # 获取用户信息 + user_info = {} + + # 获取基本信息 + user_info['nickname'] = (await page.text_content('.name-_lSSDc')) or '' + douyin_id_text = (await page.text_content('.unique_id-EuH8eA')) or '' + user_info['douyin_id'] = douyin_id_text.replace('抖音号:', '') # 移除前缀 + user_info['avatar'] = (await page.get_attribute('.avatar-XoPjK6 img', 'src')) or '' + + # 获取签名 + user_info['signature'] = (await page.text_content('.signature-HLGxt7')) or '这个人很懒,没有留下任何签名' + + # 获取统计数据 + stats_elements = await page.query_selector_all('.statics-item-MDWoNA') + for i, element in enumerate(stats_elements[:3]): # 只取前三个元素 + number = await element.query_selector('.number-No6ev9') + if number: + value = await number.text_content() or '0' + if i == 0: + user_info['following_count'] = int(value) + elif i == 1: + user_info['fans_count'] = int(value) + elif i == 2: + user_info['likes_count'] = int(value) + + # 设置默认值 + user_info.setdefault('following_count', 0) + user_info.setdefault('fans_count', 0) + user_info.setdefault('likes_count', 0) + + # 添加更新时间 + user_info['updated_at'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + return user_info + + except Exception as e: + logger.error(f"获取用户信息失败: {str(e)}") + return None \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/playwright_helper.py b/uploader/douyin_uploader/utils/playwright_helper.py new file mode 100644 index 00000000..27a4784e --- /dev/null +++ b/uploader/douyin_uploader/utils/playwright_helper.py @@ -0,0 +1,81 @@ +""" +Playwright辅助工具 +提供Playwright相关的辅助功能 +""" + +import os +import sys +import subprocess +from typing import Optional +from utils.log import douyin_logger +from playwright.async_api import async_playwright + +class PlaywrightHelper: + """Playwright辅助工具类""" + + @staticmethod + def install_browser() -> bool: + """ + 安装Playwright浏览器 + Returns: + bool: 安装是否成功 + """ + try: + # 检查是否已安装浏览器 + browser_path = os.environ.get('PLAYWRIGHT_BROWSERS_PATH', None) + if browser_path and os.path.exists(os.path.join(browser_path, 'chromium')): + douyin_logger.info("浏览器已安装") + return True + + douyin_logger.info("开始安装浏览器...") + # 使用subprocess运行playwright install命令 + result = subprocess.run( + [sys.executable, "-m", "playwright", "install", "chromium"], + capture_output=True, + text=True + ) + + if result.returncode == 0: + douyin_logger.success("浏览器安装成功") + return True + else: + douyin_logger.error(f"浏览器安装失败: {result.stderr}") + return False + + except Exception as e: + douyin_logger.error(f"安装浏览器时发生错误: {str(e)}") + return False + + @staticmethod + def get_browser_path() -> Optional[str]: + """ + 获取浏览器安装路径 + Returns: + Optional[str]: 浏览器路径,如果未找到则返回None + """ + try: + browser_path = os.environ.get('PLAYWRIGHT_BROWSERS_PATH', None) + if browser_path and os.path.exists(os.path.join(browser_path, 'chromium')): + return browser_path + return None + except Exception as e: + douyin_logger.error(f"获取浏览器路径失败: {str(e)}") + return None + + @staticmethod + async def cleanup_resources() -> None: + """ + 清理Playwright资源 + """ + try: + # 获取当前的playwright实例 + playwright = async_playwright()._impl_obj + if playwright: + # 停止所有浏览器实例 + for browser in playwright.chromium.browsers: + await browser.close() + # 停止playwright服务 + await playwright.stop() + douyin_logger.info("Playwright资源已清理") + except Exception as e: + douyin_logger.error(f"清理Playwright资源失败: {str(e)}") \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/stealth.min.js b/uploader/douyin_uploader/utils/stealth.min.js new file mode 100644 index 00000000..0519ecba --- /dev/null +++ b/uploader/douyin_uploader/utils/stealth.min.js @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/user_info.py b/uploader/douyin_uploader/utils/user_info.py new file mode 100644 index 00000000..ba44dbe7 --- /dev/null +++ b/uploader/douyin_uploader/utils/user_info.py @@ -0,0 +1,146 @@ +""" +用户信息助手 +提供用户信息的获取和处理功能 +""" + +from typing import Optional, Dict, Any +from datetime import datetime +from playwright.async_api import Page +from utils.log import douyin_logger + +class UserInfoHelper: + """用户信息助手类""" + + @staticmethod + async def get_user_info(page: Page) -> Optional[Dict[str, Any]]: + """ + 获取用户信息 + Args: + page: Playwright页面对象 + Returns: + Optional[Dict[str, Any]]: 用户信息字典 + """ + try: + # 开始获取用户信息 + douyin_logger.info("开始获取用户信息...") + + # 获取昵称 + nickname = "" + nickname_selectors = ['.name-_lSSDc'] + for selector in nickname_selectors: + try: + element = await page.wait_for_selector(selector, timeout=5000) + if element: + nickname = await element.text_content() or "" + douyin_logger.info(f"找到昵称元素: {selector}") + break + except Exception: + continue + + # 获取抖音号 + douyin_id = "" + douyin_id_selectors = ['.unique_id-EuH8eA'] + for selector in douyin_id_selectors: + try: + element = await page.wait_for_selector(selector, timeout=5000) + if element: + douyin_id = await element.text_content() or "" + douyin_logger.info(f"找到抖音号元素: {selector}") + break + except Exception: + continue + + # 获取签名 + signature = "" + signature_selectors = ['.signature-HLGxt7'] + for selector in signature_selectors: + try: + element = await page.wait_for_selector(selector, timeout=5000) + if element: + signature = await element.text_content() or "" + douyin_logger.info(f"找到签名元素: {selector}") + break + except Exception: + continue + + # 获取关注数 + following_count = 0 + following_selectors = ['#guide_home_following .number-No6ev9'] + for selector in following_selectors: + try: + element = await page.wait_for_selector(selector, timeout=5000) + if element: + following_text = await element.text_content() or "0" + following_count = int(following_text) + douyin_logger.info(f"找到关注数元素: {selector}") + break + except Exception: + continue + + # 获取粉丝数 + fans_count = 0 + fans_selectors = ['#guide_home_fans .number-No6ev9'] + for selector in fans_selectors: + try: + element = await page.wait_for_selector(selector, timeout=5000) + if element: + fans_text = await element.text_content() or "0" + fans_count = int(fans_text) + douyin_logger.info(f"找到粉丝数元素: {selector}") + break + except Exception: + continue + + # 获取获赞数 + likes_count = 0 + likes_selectors = ['.statics-item-MDWoNA:not([id]) .number-No6ev9'] + for selector in likes_selectors: + try: + element = await page.wait_for_selector(selector, timeout=5000) + if element: + likes_text = await element.text_content() or "0" + likes_count = int(likes_text) + douyin_logger.info(f"找到获赞数元素: {selector}") + break + except Exception: + continue + + # 获取头像URL + avatar_url = "" + avatar_selectors = [ + '.img-PeynF_', + '.avatar-XoPjK6 img', + 'div[class*="avatar"] img' + ] + for selector in avatar_selectors: + try: + element = await page.wait_for_selector(selector, timeout=5000) + if element: + avatar_url = await element.get_attribute('src') or "" + douyin_logger.info(f"找到头像元素: {selector}") + break + except Exception: + continue + + # 组装用户信息 + user_info = { + 'nickname': nickname, + 'douyin_id': douyin_id, + 'signature': signature, + 'following_count': following_count, + 'fans_count': fans_count, + 'likes_count': likes_count, + 'avatar_url': avatar_url, + 'updated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S') + } + + # 验证必要字段 + if not nickname or not douyin_id: + douyin_logger.warning("缺少必要的用户信息字段") + return None + + return user_info + + except Exception as e: + douyin_logger.error(f"获取用户信息失败: {str(e)}") + return None \ No newline at end of file diff --git a/uploader/douyin_uploader/utils/validator.py b/uploader/douyin_uploader/utils/validator.py new file mode 100644 index 00000000..88519b71 --- /dev/null +++ b/uploader/douyin_uploader/utils/validator.py @@ -0,0 +1,212 @@ +""" +视频验证模块 +提供视频文件、标题、标签等验证功能 +""" + +import os +from datetime import datetime, timedelta +from typing import List +import re + +class VideoValidator: + """视频验证类""" + + @staticmethod + def validate_video_file(file_path: str) -> bool: + """ + 验证视频文件 + Args: + file_path: 视频文件路径 + Returns: + bool: 是否验证通过 + """ + try: + # 检查文件是否存在且可读 + if not os.path.exists(file_path): + return False + if not os.access(file_path, os.R_OK): + return False + + # 检查文件大小 + file_size = os.path.getsize(file_path) + if file_size == 0: + return False + + # 视频文件大小限制:10MB - 4GB + min_size = 10 * 1024 * 1024 # 10MB + max_size = 4 * 1024 * 1024 * 1024 # 4GB + if not (min_size <= file_size <= max_size): + return False + + return True + + except Exception: + return False + + @staticmethod + def validate_thumbnail(file_path: str) -> bool: + """ + 验证封面图片 + Args: + file_path: 图片文件路径 + Returns: + bool: 是否验证通过 + """ + try: + # 检查文件是否存在且可读 + if not os.path.exists(file_path): + return False + if not os.access(file_path, os.R_OK): + return False + + # 检查文件大小 + file_size = os.path.getsize(file_path) + if file_size == 0: + return False + + # 图片文件大小限制:1KB - 5MB + min_size = 1 * 1024 # 1KB + max_size = 5 * 1024 * 1024 # 5MB + if not (min_size <= file_size <= max_size): + return False + + return True + + except Exception: + return False + + @staticmethod + def validate_title(title: str) -> bool: + """ + 验证视频标题 + Args: + title: 视频标题 + Returns: + bool: 是否验证通过 + """ + try: + # 标题不能为空 + if not title or not title.strip(): + return False + + # 标题长度限制:2-100个字符 + title_length = len(title.strip()) + if not (2 <= title_length <= 100): + return False + + # 标题不能包含特殊字符 + pattern = r'^[a-zA-Z0-9\u4e00-\u9fa5\s,.!?,。!?、::()()【】\[\]]+$' + if not re.match(pattern, title): + return False + + return True + + except Exception: + return False + + @staticmethod + def validate_tags(tags: List[str]) -> bool: + """ + 验证视频标签 + Args: + tags: 标签列表 + Returns: + bool: 是否验证通过 + """ + try: + # 标签列表不能为空 + if not tags: + return False + + # 标签数量限制:1-20个 + if not (1 <= len(tags) <= 20): + return False + + # 验证每个标签 + for tag in tags: + # 标签不能为空 + if not tag or not tag.strip(): + return False + + # 标签长度限制:1-20个字符 + tag_length = len(tag.strip()) + if not (1 <= tag_length <= 20): + return False + + # 标签只能包含中文、英文、数字 + pattern = r'^[a-zA-Z0-9\u4e00-\u9fa5]+$' + if not re.match(pattern, tag): + return False + + return True + + except Exception: + return False + + @staticmethod + def validate_publish_date(publish_date: datetime) -> bool: + """ + 验证发布时间 + Args: + publish_date: 发布时间 + Returns: + bool: 是否验证通过 + """ + try: + # 发布时间不能为空 + if not publish_date: + return False + + # 发布时间不能早于当前时间 + if publish_date < datetime.now(): + return False + + # 发布时间不能超过30天 + max_days = 30 + max_date = datetime.now().replace( + hour=23, + minute=59, + second=59, + microsecond=999999 + ) + timedelta(days=max_days) + + if publish_date > max_date: + return False + + return True + + except Exception: + return False + + @staticmethod + def validate_mentions(mentions: List[str]) -> bool: + """ + 验证@提及 + Args: + mentions: @提及列表 + """ + try: + # 提及列表不能为空 + if not mentions: + return False + + # 提及数量限制:1-5个 + if not (1 <= len(mentions) <= 5): + return False + + # 验证每个提及 + for mention in mentions: + # 提及不能为空 + if not mention or not mention.strip(): + return False + + # 提及长度限制:1-10个字符 + mention_length = len(mention.strip()) + if not (1 <= mention_length <= 10): + return False + + return True + + except Exception: + return False + diff --git a/uploader/ks_uploader/__init__.py b/uploader/ks_uploader/__init__.py index c6176570..3bf992f8 100644 --- a/uploader/ks_uploader/__init__.py +++ b/uploader/ks_uploader/__init__.py @@ -1,5 +1,18 @@ -from pathlib import Path - -from conf import BASE_DIR - -Path(BASE_DIR / "cookies" / "ks_uploader").mkdir(exist_ok=True) \ No newline at end of file +# -*- coding: utf-8 -*- +from pathlib import Path + +from conf import BASE_DIR + +from .modules.account import account_manager +from .modules.video import KSVideoUploader, KSBatchUploader +from .modules.validator import validator + +# 确保cookie目录存在 +Path(BASE_DIR / "cookies" / "ks_uploader").mkdir(exist_ok=True) + +__all__ = [ + 'account_manager', + 'KSVideoUploader', + 'KSBatchUploader', + 'validator' +] \ No newline at end of file diff --git a/uploader/ks_uploader/main.py b/uploader/ks_uploader/main.py deleted file mode 100644 index 87850284..00000000 --- a/uploader/ks_uploader/main.py +++ /dev/null @@ -1,214 +0,0 @@ -# -*- coding: utf-8 -*- -from datetime import datetime - -from playwright.async_api import Playwright, async_playwright -import os -import asyncio - -from conf import LOCAL_CHROME_PATH -from utils.base_social_media import set_init_script -from utils.files_times import get_absolute_path -from utils.log import kuaishou_logger - - -async def cookie_auth(account_file): - async with async_playwright() as playwright: - browser = await playwright.chromium.launch(headless=True) - context = await browser.new_context(storage_state=account_file) - context = await set_init_script(context) - # 创建一个新的页面 - page = await context.new_page() - # 访问指定的 URL - await page.goto("https://cp.kuaishou.com/article/publish/video") - try: - await page.wait_for_selector("div.names div.container div.name:text('机构服务')", timeout=5000) # 等待5秒 - - kuaishou_logger.info("[+] 等待5秒 cookie 失效") - return False - except: - kuaishou_logger.success("[+] cookie 有效") - return True - - -async def ks_setup(account_file, handle=False): - account_file = get_absolute_path(account_file, "ks_uploader") - if not os.path.exists(account_file) or not await cookie_auth(account_file): - if not handle: - return False - kuaishou_logger.info('[+] cookie文件不存在或已失效,即将自动打开浏览器,请扫码登录,登陆后会自动生成cookie文件') - await get_ks_cookie(account_file) - return True - - -async def get_ks_cookie(account_file): - async with async_playwright() as playwright: - options = { - 'args': [ - '--lang en-GB' - ], - 'headless': False, # Set headless option here - } - # Make sure to run headed. - browser = await playwright.chromium.launch(**options) - # Setup context however you like. - context = await browser.new_context() # Pass any options - context = await set_init_script(context) - # Pause the page, and start recording manually. - page = await context.new_page() - await page.goto("https://cp.kuaishou.com") - await page.pause() - # 点击调试器的继续,保存cookie - await context.storage_state(path=account_file) - - -class KSVideo(object): - def __init__(self, title, file_path, tags, publish_date: datetime, account_file): - self.title = title # 视频标题 - self.file_path = file_path - self.tags = tags - self.publish_date = publish_date - self.account_file = account_file - self.date_format = '%Y-%m-%d %H:%M' - self.local_executable_path = LOCAL_CHROME_PATH - - async def handle_upload_error(self, page): - kuaishou_logger.error("视频出错了,重新上传中") - await page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files(self.file_path) - - async def upload(self, playwright: Playwright) -> None: - # 使用 Chromium 浏览器启动一个浏览器实例 - print(self.local_executable_path) - if self.local_executable_path: - browser = await playwright.chromium.launch( - headless=False, - executable_path=self.local_executable_path, - ) - else: - browser = await playwright.chromium.launch( - headless=False - ) # 创建一个浏览器上下文,使用指定的 cookie 文件 - context = await browser.new_context(storage_state=f"{self.account_file}") - context = await set_init_script(context) - context.on("close", lambda: context.storage_state(path=self.account_file)) - - # 创建一个新的页面 - page = await context.new_page() - # 访问指定的 URL - await page.goto("https://cp.kuaishou.com/article/publish/video") - kuaishou_logger.info('正在上传-------{}.mp4'.format(self.title)) - # 等待页面跳转到指定的 URL,没进入,则自动等待到超时 - kuaishou_logger.info('正在打开主页...') - await page.wait_for_url("https://cp.kuaishou.com/article/publish/video") - # 点击 "上传视频" 按钮 - upload_button = page.locator("button[class^='_upload-btn']") - await upload_button.wait_for(state='visible') # 确保按钮可见 - - async with page.expect_file_chooser() as fc_info: - await upload_button.click() - file_chooser = await fc_info.value - await file_chooser.set_files(self.file_path) - - await asyncio.sleep(2) - - # if not await page.get_by_text("封面编辑").count(): - # raise Exception("似乎没有跳转到到编辑页面") - - await asyncio.sleep(1) - - # 等待按钮可交互 - new_feature_button = page.locator('button[type="button"] span:text("我知道了")') - if await new_feature_button.count() > 0: - await new_feature_button.click() - - kuaishou_logger.info("正在填充标题和话题...") - await page.get_by_text("描述").locator("xpath=following-sibling::div").click() - kuaishou_logger.info("clear existing title") - await page.keyboard.press("Backspace") - await page.keyboard.press("Control+KeyA") - await page.keyboard.press("Delete") - kuaishou_logger.info("filling new title") - await page.keyboard.type(self.title) - await page.keyboard.press("Enter") - - # 快手只能添加3个话题 - for index, tag in enumerate(self.tags[:3], start=1): - kuaishou_logger.info("正在添加第%s个话题" % index) - await page.keyboard.type(f"#{tag} ") - await asyncio.sleep(2) - - max_retries = 60 # 设置最大重试次数,最大等待时间为 2 分钟 - retry_count = 0 - - while retry_count < max_retries: - try: - # 获取包含 '上传中' 文本的元素数量 - number = await page.locator("text=上传中").count() - - if number == 0: - kuaishou_logger.success("视频上传完毕") - break - else: - if retry_count % 5 == 0: - kuaishou_logger.info("正在上传视频中...") - await asyncio.sleep(2) - except Exception as e: - kuaishou_logger.error(f"检查上传状态时发生错误: {e}") - await asyncio.sleep(2) # 等待 2 秒后重试 - retry_count += 1 - - if retry_count == max_retries: - kuaishou_logger.warning("超过最大重试次数,视频上传可能未完成。") - - # 定时任务 - if self.publish_date != 0: - await self.set_schedule_time(page, self.publish_date) - - # 判断视频是否发布成功 - while True: - try: - publish_button = page.get_by_text("发布", exact=True) - if await publish_button.count() > 0: - await publish_button.click() - - await asyncio.sleep(1) - confirm_button = page.get_by_text("确认发布") - if await confirm_button.count() > 0: - await confirm_button.click() - - # 等待页面跳转,确认发布成功 - await page.wait_for_url( - "https://cp.kuaishou.com/article/manage/video?status=2&from=publish", - timeout=5000, - ) - kuaishou_logger.success("视频发布成功") - break - except Exception as e: - kuaishou_logger.info(f"视频正在发布中... 错误: {e}") - await page.screenshot(full_page=True) - await asyncio.sleep(1) - - await context.storage_state(path=self.account_file) # 保存cookie - kuaishou_logger.info('cookie更新完毕!') - await asyncio.sleep(2) # 这里延迟是为了方便眼睛直观的观看 - # 关闭浏览器上下文和浏览器实例 - await context.close() - await browser.close() - - async def main(self): - async with async_playwright() as playwright: - await self.upload(playwright) - - async def set_schedule_time(self, page, publish_date): - kuaishou_logger.info("click schedule") - publish_date_hour = publish_date.strftime("%Y-%m-%d %H:%M:%S") - await page.locator("label:text('发布时间')").locator('xpath=following-sibling::div').locator( - '.ant-radio-input').nth(1).click() - await asyncio.sleep(1) - - await page.locator('div.ant-picker-input input[placeholder="选择日期时间"]').click() - await asyncio.sleep(1) - - await page.keyboard.press("Control+KeyA") - await page.keyboard.type(str(publish_date_hour)) - await page.keyboard.press("Enter") - await asyncio.sleep(1) diff --git a/uploader/ks_uploader/modules/account.py b/uploader/ks_uploader/modules/account.py new file mode 100644 index 00000000..dd7159fa --- /dev/null +++ b/uploader/ks_uploader/modules/account.py @@ -0,0 +1,567 @@ +# -*- coding: utf-8 -*- +from datetime import datetime +import os +import asyncio +from typing import Dict, Any, Optional +from playwright.async_api import async_playwright, Page +from pathlib import Path + +from utils.base_social_media import set_init_script +from utils.files_times import get_absolute_path +from utils.log import kuaishou_logger +from ..utils.constants import ( + BROWSER_ARGS, BROWSER_VIEWPORT, USER_AGENT, + LOGIN_URL, COOKIE_VALID_TIME, COOKIE_CHECK_INTERVAL, + PROFILE_URL +) +from utils.social_media_db import SocialMediaDB +from utils.cookie_helper import CookieHelper + +class KSAccountInfo: + """快手账号信息类""" + def __init__(self, data: Dict[str, Any]): + self.avatar = data.get('avatar', '') # 头像 + self.username = data.get('username', '') # 用户名 + self.kwai_id = data.get('kwai_id', '') # 快手ID + self.followers = data.get('followers', 0) # 粉丝数 + self.following = data.get('following', 0) # 关注数 + self.likes = data.get('likes', 0) # 获赞数 + self.description = data.get('description', '') # 个人简介 + self.updated_at = datetime.now().isoformat() + + def to_dict(self) -> Dict[str, Any]: + """转换为字典""" + return { + 'avatar': self.avatar, + 'username': self.username, + 'kwai_id': self.kwai_id, + 'followers': self.followers, + 'following': self.following, + 'likes': self.likes, + 'description': self.description, + 'updated_at': self.updated_at + } + + @staticmethod + def parse_number(text: str) -> float: + """解析数字(支持w/万等单位)""" + try: + if not text: + return 0 + text = text.strip().lower() + if 'w' in text: + return float(text.replace('w', '')) * 10000 + if '万' in text: + return float(text.replace('万', '')) * 10000 + return float(text.replace(',', '')) + except: + return 0 + +class KSAccountManager: + def __init__(self): + self.cookie_pool = {} + self.valid_time = COOKIE_VALID_TIME + self._account_info = None + self._info_cache = {} # 账号信息缓存 + self._last_update_time = {} # 上次更新时间记录 + self._pending_updates = [] # 待更新的账号信息 + self._db = SocialMediaDB() # SQLite是轻量级的,可以保持单个连接 + + def __del__(self): + """确保数据库连接正确关闭""" + if hasattr(self, '_db'): + self._db.close() + + def _add_pending_update(self, platform: str, username: str, info: Dict[str, Any]): + """添加待更新的账号信息""" + self._pending_updates.append({ + 'platform': platform, + 'username': username, + 'info': info, + 'timestamp': datetime.now() + }) + + def _process_pending_updates(self): + """处理待更新的账号信息""" + if not self._pending_updates: + return + + try: + # 批量更新数据库 + for item in self._pending_updates: + try: + self._db.add_or_update_account( + item['platform'], + item['username'], + item['info'] + ) + except Exception as e: + kuaishou_logger.error(f"更新账号信息失败: {str(e)}") + + self._pending_updates.clear() + + except Exception as e: + kuaishou_logger.error(f"处理更新队列时发生错误: {str(e)}") + + def _get_cached_info(self, account_file: str) -> Optional[Dict[str, Any]]: + """从缓存获取账号信息""" + if account_file not in self._info_cache: + return None + + last_update = self._last_update_time.get(account_file) + if not last_update or (datetime.now() - last_update).total_seconds() > COOKIE_CHECK_INTERVAL: + return None + + return self._info_cache[account_file] + + def _update_cache(self, account_file: str, info: Dict[str, Any]): + """更新缓存信息""" + self._info_cache[account_file] = info + self._last_update_time[account_file] = datetime.now() + + async def _extract_profile_info(self, page: Page) -> Optional[KSAccountInfo]: + """提取个人资料信息""" + try: + # 等待页面加载完成 + await page.wait_for_load_state("networkidle") + await asyncio.sleep(2) # 等待React渲染完成 + + # 直接通过evaluate获取所有需要的信息 + info = await page.evaluate("""() => { + const info = {}; + + // 获取用户名 - 从header-info-card中获取 + const usernameEl = document.querySelector('div.header-info-card .user-name'); + info.username = usernameEl ? usernameEl.textContent.trim() : ''; + + // 获取快手ID + const userIdEl = document.querySelector('div.header-info-card .user-kwai-id'); + info.kwai_id = userIdEl ? userIdEl.textContent.trim() : ''; + + // 获取统计数据 + const statsElements = document.querySelectorAll('div.user-cnt__item'); + info.followers = 0; + info.following = 0; + info.likes = 0; + + statsElements.forEach(el => { + const text = el.textContent.toLowerCase(); + const spanText = el.querySelector('span').textContent; + const numText = text.replace(spanText, '').trim(); + + if (spanText === '粉丝') { + info.followers = numText.includes('w') || numText.includes('万') ? + parseFloat(numText.replace('w', '').replace('万', '')) * 10000 : + parseFloat(numText.replace(',', '')); + } else if (spanText === '关注') { + info.following = numText.includes('w') || numText.includes('万') ? + parseFloat(numText.replace('w', '').replace('万', '')) * 10000 : + parseFloat(numText.replace(',', '')); + } else if (spanText === '获赞') { + info.likes = numText.includes('w') || numText.includes('万') ? + parseFloat(numText.replace('w', '').replace('万', '')) * 10000 : + parseFloat(numText.replace(',', '')); + } + }); + + // 获取个人简介 + const descEl = document.querySelector('div.header-info-card .user-desc'); + info.description = descEl ? descEl.textContent.trim() : ''; + + // 获取头像 + const avatarEl = document.querySelector('div.header-info-card .user-image'); + info.avatar = avatarEl ? avatarEl.getAttribute('src') : ''; + + return info; + }""") + + # 记录调试信息 + kuaishou_logger.debug("成功提取个人资料信息") + kuaishou_logger.debug(f"用户名: {info.get('username', '')}") + kuaishou_logger.debug(f"快手ID: {info.get('kwai_id', '')}") + kuaishou_logger.debug(f"粉丝数: {info.get('followers', 0)}") + kuaishou_logger.debug(f"关注数: {info.get('following', 0)}") + kuaishou_logger.debug(f"获赞数: {info.get('likes', 0)}") + kuaishou_logger.debug(f"头像URL: {info.get('avatar', '')}") + kuaishou_logger.debug(f"个人简介: {info.get('description', '')}") + + # 如果至少有用户名,就返回结果 + if info.get('username'): + return KSAccountInfo(info) + + kuaishou_logger.error("无法获取完整的账号信息") + return None + + except Exception as e: + kuaishou_logger.error(f"提取个人资料信息失败: {str(e)}") + try: + # 保存页面内容和截图以便调试 + page_content = await page.content() + kuaishou_logger.debug(f"页面内容: {page_content[:1000]}...") + await page.screenshot(path="error_extract_profile.png") + kuaishou_logger.info("已保存错误页面截图: error_extract_profile.png") + except Exception as debug_error: + kuaishou_logger.error(f"保存调试信息失败: {str(debug_error)}") + return None + + async def check_profile_page(self, page) -> tuple[bool, Optional[str]]: + """检查是否成功加载个人资料页面并获取用户名 + Args: + page: Playwright页面对象 + Returns: + (是否成功, 用户名) + """ + try: + # 直接通过evaluate获取用户名 + result = await page.evaluate("""() => { + const elements = document.querySelectorAll('div[class*="user-name"]'); + for (const el of elements) { + const text = el.textContent.trim(); + if (text) { + return { + text: text, + found: true + }; + } + } + return { + text: '', + found: false + }; + }""") + + if result['found']: + return True, result['text'] + + except Exception as e: + kuaishou_logger.debug(f"检查个人资料页面失败: {str(e)}") + + return False, None + + async def validate_cookie( + self, + account_file: str, + expected_username: Optional[str] = None, + quick_check: bool = True + ) -> bool: + """验证Cookie是否有效 + Args: + account_file: Cookie文件路径 + expected_username: 期望的用户名,用于验证登录是否正确 + quick_check: 是否只进行快速验证,不更新用户信息 + Returns: + bool: cookie是否有效 + """ + # 检查缓存 + if not quick_check: + cached_info = self._get_cached_info(account_file) + if cached_info: + self._account_info = KSAccountInfo(cached_info) + return True + + playwright = None + browser = None + context = None + page = None + + try: + playwright = await async_playwright().start() + browser = await playwright.chromium.launch(headless=True, args=BROWSER_ARGS) + context = await browser.new_context( + storage_state=account_file, + viewport=BROWSER_VIEWPORT, + user_agent=USER_AGENT + ) + page = await context.new_page() + + # 访问个人资料页面 + await page.goto(PROFILE_URL, wait_until="networkidle") + + # 检查是否需要登录 + if "passport.kuaishou.com" in page.url: + return False + + # 获取用户名 + username_result = await page.evaluate("""() => { + const el = document.querySelector('div.header-info-card .user-name'); + return el ? el.textContent.trim() : ''; + }""") + + if not username_result: + return False + + if expected_username and username_result != expected_username: + return False + + # 如果不是快速检查,则更新用户信息 + if not quick_check: + self._account_info = await self._extract_profile_info(page) + if not self._account_info: + return False + + # 更新缓存 + self._update_cache(account_file, self._account_info.to_dict()) + + # 添加到待更新列表 + self._add_pending_update( + "kuaishou", + username_result, + self._account_info.to_dict() + ) + + # 处理待更新 + self._process_pending_updates() + + return True + + except Exception as e: + kuaishou_logger.error(f"验证cookie时发生错误: {str(e)}") + return False + + finally: + # 按顺序清理资源 + if page: + try: + await page.close() + except: + pass + if context: + try: + await context.close() + except: + pass + if browser: + try: + await browser.close() + except: + pass + if playwright: + try: + await playwright.stop() + except: + pass + + async def update_account_info(self, account_file: str) -> Optional[Dict[str, Any]]: + """更新账号信息""" + if await self.validate_cookie(account_file, quick_check=False): + return self._account_info.to_dict() if self._account_info else None + return None + + async def setup_account(self, account_file: str, handle: bool = False) -> bool: + """设置账号""" + account_file = get_absolute_path(account_file, "ks_uploader") + + try: + cookie_path = self._db.get_cookie_path("kuaishou", account_file) + if cookie_path: + account_file = cookie_path + except Exception as e: + kuaishou_logger.error(f"从数据库获取cookie失败: {str(e)}") + + if not os.path.exists(account_file) or not await self.validate_cookie(account_file): + if not handle: + return False + kuaishou_logger.info('[+] cookie文件不存在或已失效,即将自动打开浏览器,请扫码登录,登陆后会自动生成cookie文件') + await self.refresh_cookie(account_file) + return True + + async def batch_validate_cookies(self, cookie_files: list) -> dict: + """批量验证Cookie""" + results = {} + for cookie_file in cookie_files: + is_valid = await self.validate_cookie(cookie_file) + results[cookie_file] = { + 'valid': is_valid, + 'checked_at': datetime.now().isoformat(), + 'account_info': self._account_info.to_dict() if self._account_info else None + } + + # 处理所有待更新的信息 + self._process_pending_updates() + return results + + async def refresh_cookie(self, account_file: str, expected_username: Optional[str] = None) -> bool: + """刷新Cookie + Args: + account_file: Cookie文件路径 + expected_username: 期望的用户名,用于验证登录是否正确 + """ + browser = None + context = None + page = None + + try: + kuaishou_logger.info(f"开始刷新Cookie,目标文件: {account_file}") + async with async_playwright() as playwright: + options = { + 'args': BROWSER_ARGS, + 'headless': False, + } + kuaishou_logger.info("正在启动浏览器...") + browser = await playwright.chromium.launch(**options) + + kuaishou_logger.info("正在创建浏览器上下文...") + context = await browser.new_context( + viewport=BROWSER_VIEWPORT, + user_agent=USER_AGENT + ) + context = await set_init_script(context) + + kuaishou_logger.info("正在创建新页面...") + page = await context.new_page() + + # 直接访问登录页 + kuaishou_logger.info(f"正在访问登录页面: {LOGIN_URL}") + await page.goto(LOGIN_URL, wait_until="networkidle") + kuaishou_logger.info("等待用户登录...") + + # 等待登录成功并跳转到个人资料页面 + max_wait_time = 300 # 5分钟超时 + check_interval = 3 # 每3秒检查一次 + + for _ in range(max_wait_time // check_interval): + await asyncio.sleep(check_interval) + current_url = page.url + + # 如果还在登录页面,继续等待 + if "passport.kuaishou.com" in current_url: + kuaishou_logger.info("等待用户完成登录...") + continue + + # 如果已经跳转到个人资料页面 + if PROFILE_URL in current_url: + kuaishou_logger.info("检测到已跳转到个人资料页面") + + # 等待页面加载完成 + await page.wait_for_load_state("networkidle") + + # 检查是否成功加载个人资料页面 + success, username = await self.check_profile_page(page) + if not success: + kuaishou_logger.info("等待页面加载...") + continue + + if expected_username and username != expected_username: + kuaishou_logger.error(f"用户名不匹配,期望: {expected_username}, 实际: {username}") + return False + + kuaishou_logger.success(f"登录成功!当前用户: {username}") + await context.storage_state(path=account_file) + kuaishou_logger.success(f"登录状态已保存到: {account_file}") + + # 提取账号信息 + self._account_info = await self._extract_profile_info(page) + return True + else: + # 如果在其他页面,尝试跳转到个人资料页面 + kuaishou_logger.info("尝试跳转到个人资料页面...") + await page.goto(PROFILE_URL, wait_until="networkidle") + + kuaishou_logger.error("登录等待超时") + return False + + except Exception as e: + kuaishou_logger.error(f"刷新Cookie失败: {str(e)}") + return False + + finally: + try: + kuaishou_logger.info("正在清理资源...") + if page: + await page.close() + if context: + await context.close() + if browser: + await browser.close() + kuaishou_logger.info("资源清理完成") + except Exception as e: + kuaishou_logger.error(f"清理资源时发生错误: {str(e)}") + + def get_account_info(self) -> Optional[Dict[str, Any]]: + """获取当前账号信息""" + return self._account_info.to_dict() if self._account_info else None + + async def setup_cookie(self, account_file: str, expected_username: Optional[str] = None, force_refresh: bool = False) -> Dict[str, Any]: + """设置和验证Cookie + + 完整的Cookie设置流程,包括: + 1. 验证现有Cookie + 2. 必要时刷新Cookie + 3. 更新账号信息 + 4. 保存到数据库 + + Args: + account_file: Cookie文件路径 + expected_username: 期望的用户名 + force_refresh: 是否强制刷新 + + Returns: + Dict[str, Any]: 设置结果 + """ + try: + result = { + 'success': False, + 'message': '', + 'cookie_file': str(account_file), + 'timestamp': datetime.now().isoformat() + } + + # 确保目录存在 + Path(account_file).parent.mkdir(parents=True, exist_ok=True) + + # 检查现有Cookie + if not force_refresh and os.path.exists(account_file): + # 验证文件 + if not CookieHelper.validate_cookie_file(account_file): + kuaishou_logger.warning("现有Cookie文件无效") + else: + # 验证Cookie + is_valid = await self.validate_cookie(account_file, expected_username, quick_check=False) + if is_valid: + result.update({ + 'success': True, + 'message': 'Cookie有效', + 'username': self._account_info.username if self._account_info else None, + 'expires_at': datetime.now().timestamp() + COOKIE_VALID_TIME + }) + return result + + # 需要刷新Cookie + kuaishou_logger.info('开始获取新的Cookie...') + + # 备份现有Cookie + if os.path.exists(account_file): + CookieHelper.backup_cookie_file(account_file) + + # 刷新Cookie + if await self.refresh_cookie(account_file, expected_username): + # 验证新Cookie + is_valid = await self.validate_cookie(account_file, expected_username, quick_check=False) + if is_valid: + result.update({ + 'success': True, + 'message': 'Cookie已更新并验证成功', + 'username': self._account_info.username if self._account_info else None, + 'expires_at': datetime.now().timestamp() + COOKIE_VALID_TIME + }) + else: + result.update({ + 'message': 'Cookie更新成功但验证失败' + }) + else: + result.update({ + 'message': 'Cookie获取失败' + }) + + return result + + except Exception as e: + kuaishou_logger.error(f"Cookie设置过程发生异常: {str(e)}") + return { + 'success': False, + 'message': str(e), + 'cookie_file': str(account_file), + 'timestamp': datetime.now().isoformat() + } + +account_manager = KSAccountManager() \ No newline at end of file diff --git a/uploader/ks_uploader/modules/validator.py b/uploader/ks_uploader/modules/validator.py new file mode 100644 index 00000000..5841cb13 --- /dev/null +++ b/uploader/ks_uploader/modules/validator.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +from typing import List, Dict, Any +import os +from datetime import datetime + +class KSDataValidator: + def __init__(self): + self.title_max_length = 100 + self.max_tags = 3 + self.allowed_video_extensions = ['.mp4', '.mov', '.avi'] + self.max_video_size = 4 * 1024 * 1024 * 1024 # 4GB + + def validate_video_params(self, title: str, tags: List[str], + file_path: str, publish_date: datetime = None) -> Dict[str, Any]: + """ + 验证视频参数 + Returns: + Dict[str, Any]: {'valid': bool, 'errors': List[str]} + """ + errors = [] + + # 验证标题 + if not title: + errors.append("标题不能为空") + elif len(title) > self.title_max_length: + errors.append(f"标题长度不能超过{self.title_max_length}个字符") + + # 验证标签 + if not tags: + errors.append("至少需要一个标签") + elif len(tags) > self.max_tags: + errors.append(f"标签数量不能超过{self.max_tags}个") + + # 验证文件 + if not os.path.exists(file_path): + errors.append("视频文件不存在") + else: + # 验证文件扩展名 + _, ext = os.path.splitext(file_path) + if ext.lower() not in self.allowed_video_extensions: + errors.append(f"不支持的视频格式: {ext}") + + # 验证文件大小 + file_size = os.path.getsize(file_path) + if file_size > self.max_video_size: + errors.append(f"视频文件大小不能超过4GB") + + # 验证发布时间 + if publish_date and publish_date < datetime.now(): + errors.append("定时发布时间不能早于当前时间") + + return { + 'valid': len(errors) == 0, + 'errors': errors + } + + def validate_upload_result(self, response: Dict[str, Any]) -> Dict[str, Any]: + """ + 验证上传结果 + Returns: + Dict[str, Any]: {'valid': bool, 'errors': List[str]} + """ + errors = [] + + if not response.get('success'): + errors.append("上传失败") + if 'error' in response: + errors.append(f"错误信息: {response['error']}") + + return { + 'valid': len(errors) == 0, + 'errors': errors + } + +validator = KSDataValidator() \ No newline at end of file diff --git a/uploader/ks_uploader/modules/video.py b/uploader/ks_uploader/modules/video.py new file mode 100644 index 00000000..116d5cee --- /dev/null +++ b/uploader/ks_uploader/modules/video.py @@ -0,0 +1,671 @@ +# -*- coding: utf-8 -*- +from datetime import datetime +import asyncio +import os +from typing import List, Dict, Optional, Any +from playwright.async_api import Playwright, async_playwright, Page, Browser, BrowserContext, TimeoutError + +from conf import LOCAL_CHROME_PATH +from utils.base_social_media import set_init_script +from utils.log import kuaishou_logger +from ..utils.constants import ( + UPLOAD_TIMEOUT, MAX_RETRIES, + BASE_URL, UPLOAD_URL, MANAGE_URL +) + +class UploadError(Exception): + """上传错误基类""" + pass + +class VideoUploadError(UploadError): + """视频上传错误""" + pass + +class VideoPublishError(UploadError): + """视频发布错误""" + pass + +class KSVideoUploader: + def __init__(self, title: str, file_path: str, tags: List[str], + publish_date: Optional[datetime], account_file: str, mentions: List[str] = None, + cover_file: Optional[str] = None): + self.title = title + self.file_path = file_path + self.tags = tags + self.mentions = mentions or [] + self.publish_date = publish_date + self.account_file = account_file + self.cover_file = cover_file + self.date_format = '%Y-%m-%d %H:%M' + self.local_executable_path = LOCAL_CHROME_PATH + self.max_retries = MAX_RETRIES + self.upload_timeout = UPLOAD_TIMEOUT + self._upload_start_time = None + self._last_progress = 0 + self._retry_delays = [2, 5, 10] + + async def _retry_with_backoff(self, func: callable, *args, **kwargs) -> Any: + """实现指数退避的重试机制""" + last_error = None + kuaishou_logger.debug(f"开始重试操作: func={func.__name__}, args={args}, kwargs={kwargs}") + for i, delay in enumerate(self._retry_delays): + try: + result = await func(*args, **kwargs) + kuaishou_logger.debug(f"重试操作成功: result={result}") + return result + except Exception as e: + last_error = e + if i == len(self._retry_delays) - 1: # 最后一次重试 + kuaishou_logger.error(f"重试次数已用完,最后错误: {str(last_error)}") + raise last_error + kuaishou_logger.warning(f"第{i+1}次重试失败,{delay}秒后重试: {str(e)}") + await asyncio.sleep(delay) + raise last_error + + async def _check_network_status(self, page: Page) -> bool: + """检查网络状态""" + try: + await page.wait_for_load_state("networkidle", timeout=5000) + return True + except TimeoutError: + return False + + async def _monitor_upload_progress(self, page: Page) -> None: + """监控上传进度""" + try: + # 检查进度条元素 + progress_div = page.locator("div.progress-div") + if await progress_div.count() > 0: + # 尝试获取进度文本 + progress_text = await progress_div.text_content() + if progress_text and "%" in progress_text: + current_progress = int(''.join(filter(str.isdigit, progress_text))) + if current_progress > self._last_progress: + self._last_progress = current_progress + kuaishou_logger.info(f"上传进度: {current_progress}%") + except Exception as e: + # 如果获取进度失败,不要报错,因为可能已经上传完成 + kuaishou_logger.debug(f"获取上传进度: {str(e)}") + + async def handle_upload_error(self, page: Page) -> None: + """处理上传错误""" + try: + # 检查是否有错误提示 + error_text = await page.locator("div.error-message").text_content() + if error_text: + raise VideoUploadError(f"上传出错: {error_text}") + + # 尝试重新上传 + kuaishou_logger.error("视频出错了,重新上传中") + await self._retry_with_backoff( + page.locator('div.progress-div [class^="upload-btn-input"]').set_input_files, + self.file_path + ) + except Exception as e: + raise VideoUploadError(f"处理上传错误失败: {str(e)}") + + async def fill_video_info(self, page: Page) -> None: + """填充视频信息""" + try: + kuaishou_logger.info("正在填充标题和话题...") + + # 等待页面完全加载 + await page.wait_for_load_state("networkidle") + await asyncio.sleep(2) # 等待页面渲染完成 + + # 等待描述输入区域加载 + description_editor = await page.wait_for_selector("#work-description-edit", timeout=15000) + if not description_editor: + raise VideoUploadError("无法找到描述输入区域") + + # 确保编辑区域可见和可交互 + await description_editor.wait_for_element_state("visible") + await description_editor.wait_for_element_state("enabled") + + # 点击编辑区域激活 + await description_editor.click() + await asyncio.sleep(1) + + # 清除并输入标题 + await page.keyboard.press("Control+A") + await page.keyboard.press("Delete") + await asyncio.sleep(0.5) + + # 输入标题和描述 + await page.keyboard.type(self.title) + await page.keyboard.press("Enter") + await asyncio.sleep(1) + + # 添加话题(最多3个) + for index, tag in enumerate(self.tags[:4], start=1): + kuaishou_logger.info(f"正在添加第{index}个话题") + await page.keyboard.type(f"#{tag}") + await asyncio.sleep(0.25) # 等待话题建议出现 + await page.keyboard.press("Space") + # # 检查是否有话题建议出现 + # tag_suggestions = page.locator("._tag_oei9t_283") + # if await tag_suggestions.count() > 0: + # # 点击第一个建议的话题 + # await tag_suggestions.first.click() + # else: + # # 如果没有建议,就直接用空格分隔 + # await page.keyboard.press("Space") + + await asyncio.sleep(1) # 确保话题添加完成 + + # 验证话题是否添加成功 + content = await description_editor.text_content() + if f"#{tag}" not in content: + kuaishou_logger.warning(f"话题 #{tag} 可能未成功添加") + + await page.keyboard.press("Enter") + await asyncio.sleep(1) + + # 输入提及用户 + for mention in self.mentions: + await page.keyboard.type(f"@{mention}") + await asyncio.sleep(0.2) + await page.keyboard.press("Space") + # _at-user-container_oei9t_173 + + # 检查字数限制 + text_count = page.locator("._text-tip_oei9t_250") + if await text_count.count() > 0: + count_text = await text_count.text_content() + kuaishou_logger.info(f"当前字数: {count_text}") + + kuaishou_logger.info("视频信息填充完成") + + except Exception as e: + kuaishou_logger.error(f"填充视频信息失败: {str(e)}", exc_info=True) + # 保存页面截图以便调试 + try: + screenshot_path = f"error_fill_info_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png" + await page.screenshot(path=screenshot_path, full_page=True) + kuaishou_logger.info(f"错误截图已保存: {screenshot_path}") + except: + pass + raise VideoUploadError(f"填充视频信息失败: {str(e)}") + + async def wait_for_upload(self, page: Page) -> bool: + """等待上传完成""" + retry_count = 0 + upload_start_time = datetime.now() + last_log_time = datetime.now() + + while retry_count < self.upload_timeout // 2: + try: + current_time = datetime.now() + # 每5秒记录一次基本状态 + if (current_time - last_log_time).total_seconds() >= 5: + kuaishou_logger.info("正在检查上传状态...") + last_log_time = current_time + + # 检查网络状态 + if not await self._check_network_status(page): + kuaishou_logger.warning("网络连接不稳定") + + # 记录当前页面URL + current_url = page.url + kuaishou_logger.debug(f"当前页面URL: {current_url}") + + # 检查上传状态 + # 1. 检查预览区域是否存在 + preview_section = page.locator("section._wrapper_1ahzu_1") + preview_count = await preview_section.count() + kuaishou_logger.debug(f"预览区域数量: {preview_count}") + + if preview_count > 0: + # 2. 检查是否在上传中 + progress_text = page.locator(".ant-progress-text") + progress_count = await progress_text.count() + + if progress_count > 0: + # 获取进度 + progress = await progress_text.text_content() + + if progress and "%" in progress: + current_progress = int(''.join(filter(str.isdigit, progress))) + # 只有当进度发生变化时才记录 + if current_progress != self._last_progress: + self._last_progress = current_progress + kuaishou_logger.info(f"上传进度: {current_progress}%") + # 记录已经过时间 + elapsed_time = (datetime.now() - upload_start_time).total_seconds() + kuaishou_logger.info(f"已用时: {elapsed_time:.1f}秒") + if current_progress >= 100: + kuaishou_logger.success("上传进度达到100%!") + return True + else: + kuaishou_logger.debug("未找到进度条") + + # 3. 检查是否已完成(预览视频区域出现) + preview_video = page.locator("div._preview-video_1ahzu_181") + video_count = await preview_video.count() + + if video_count > 0: + # 检查视频元素是否真的可见 + is_visible = await preview_video.is_visible() + if is_visible: + kuaishou_logger.success("检测到预览视频区域,上传已完成!") + return True + + # 4. 检查上传状态文本 + upload_status = page.locator("span._phone-label_1ahzu_34") + if await upload_status.count() > 0: + status_text = await upload_status.text_content() + kuaishou_logger.info(f"当前上传状态: {status_text}") + + # 5. 检查是否有错误提示 + error_element = page.locator("div.error-message") + if await error_element.count(): + error_text = await error_element.text_content() + kuaishou_logger.error(f"检测到错误消息: {error_text}") + raise VideoUploadError(f"上传失败: {error_text}") + + # 6. 检查上传超时 + elapsed_time = (datetime.now() - upload_start_time).total_seconds() + if elapsed_time > self.upload_timeout: + kuaishou_logger.error(f"上传超时,已经过 {elapsed_time:.1f} 秒") + raise TimeoutError("上传超时") + + await asyncio.sleep(2) + retry_count += 1 + + except TimeoutError as e: + kuaishou_logger.error(f"上传超时: {str(e)}") + # 保存页面截图 + await page.screenshot(path=f"upload_timeout_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png") + raise VideoUploadError(f"上传超时: {str(e)}") + except Exception as e: + kuaishou_logger.error(f"检查上传状态时发生错误: {str(e)}") + # 保存页面截图 + await page.screenshot(path=f"upload_error_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png") + await asyncio.sleep(2) + + kuaishou_logger.error("达到最大重试次数,上传可能失败") + return False + + async def set_schedule_time(self, page: Page) -> None: + """设置定时发布""" + if not self.publish_date: + return + + try: + kuaishou_logger.info("设置定时发布...") + publish_date_hour = self.publish_date.strftime("%Y-%m-%d %H:%M:%S") + + # 选择定时发布选项 + schedule_radio = page.locator("label:text('发布时间')").locator('xpath=following-sibling::div').locator('.ant-radio-input').nth(1) + await schedule_radio.click() + await asyncio.sleep(1) + + # 设置发布时间 + date_input = page.locator('div.ant-picker-input input[placeholder="选择日期时间"]') + await date_input.click() + await asyncio.sleep(1) + await page.keyboard.press("Control+A") + await page.keyboard.type(str(publish_date_hour)) + await page.keyboard.press("Enter") + + # 验证时间是否设置成功 + await asyncio.sleep(1) + date_text = await date_input.input_value() + if not date_text or publish_date_hour not in date_text: + raise VideoPublishError("定时发布时间设置失败") + + except Exception as e: + raise VideoPublishError(f"设置定时发布失败: {str(e)}") + + async def publish_video(self, page: Page) -> bool: + """发布视频""" + max_publish_attempts = 3 + publish_attempt = 0 + + while publish_attempt < max_publish_attempts: + try: + # 点击发布按钮 + publish_button = page.get_by_text("发布", exact=True) + if await publish_button.count() > 0: + await publish_button.click() + await asyncio.sleep(1) + + # 确认发布 + confirm_button = page.get_by_text("确认发布") + if await confirm_button.count() > 0: + await confirm_button.click() + + # 等待发布完成 + try: + await page.wait_for_url( + f"{MANAGE_URL}?status=2&from=publish", + timeout=5000, + ) + kuaishou_logger.success("视频发布成功") + return True + except TimeoutError: + # 检查是否有错误提示 + error_element = page.locator("div.error-message") + if await error_element.count(): + error_text = await error_element.text_content() + raise VideoPublishError(f"发布失败: {error_text}") + + except Exception as e: + publish_attempt += 1 + if publish_attempt == max_publish_attempts: + raise VideoPublishError(f"发布视频失败: {str(e)}") + kuaishou_logger.warning(f"发布尝试 {publish_attempt} 失败: {str(e)}") + await asyncio.sleep(2) + await page.screenshot(path=f"publish_error_{publish_attempt}.png") + + return False + + async def edit_cover(self, page: Page) -> None: + """编辑视频封面 + + Args: + page: Playwright页面对象 + """ + if not self.cover_file: + kuaishou_logger.info("未提供封面文件,跳过封面设置") + return + + try: + kuaishou_logger.info("开始设置视频封面...") + + # 等待封面编辑区域加载 + cover_editor = page.locator("div._high-cover-editor_y5cqm_1") + await cover_editor.wait_for(state="visible", timeout=10000) + + # 找到默认封面区域并点击 + default_cover = cover_editor.locator("div._default-cover_y5cqm_68") + await default_cover.wait_for(state="visible", timeout=5000) + + # 鼠标移动到封面区域以显示替换按钮 + await default_cover.hover() + await asyncio.sleep(1) # 等待替换按钮出现 + + # 点击替换按钮打开弹窗 + replace_button = default_cover.locator("span._cover-editor-text_y5cqm_58").filter(has_text="替换") + await replace_button.click() + await asyncio.sleep(1) + + # 等待弹窗加载 + modal = page.locator("div.ant-modal") + await modal.wait_for(state="visible", timeout=5000) + + # 找到上传封面的input元素 + file_input = modal.locator('input[type="file"][accept*="image"]') + + # 上传封面文件 + await file_input.set_input_files(self.cover_file) + kuaishou_logger.info(f"已选择封面文件: {self.cover_file}") + + # 等待封面加载完成 + await asyncio.sleep(2) + + # 点击完成按钮 + finish_button = modal.get_by_role("button", name="完成") + await finish_button.click() + + # 等待弹窗关闭 + await modal.wait_for(state="hidden", timeout=5000) + + # 等待封面更新 + await page.wait_for_load_state("networkidle") + kuaishou_logger.success("封面设置完成") + + except Exception as e: + kuaishou_logger.error(f"设置视频封面失败: {str(e)}") + # 保存错误截图 + await page.screenshot(path=f"error_cover_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png") + raise VideoUploadError(f"设置视频封面失败: {str(e)}") + + async def start(self) -> bool: + """开始上传流程""" + try: + kuaishou_logger.debug("开始上传流程") + async with async_playwright() as playwright: + return await self.upload(playwright) + except Exception as e: + kuaishou_logger.error(f"启动上传失败: {str(e)}", exc_info=True) + return False + + async def upload(self, playwright: Playwright) -> bool: + """上传视频主流程""" + browser = None + context = None + page = None + try: + # 初始化浏览器 + browser_options = { + 'headless': False, # 禁用无头模式 + 'args': [ + '--disable-gpu', + '--no-sandbox', + '--disable-dev-shm-usage', + '--disable-setuid-sandbox', + '--start-maximized' # 最大化窗口 + ] + } + if self.local_executable_path: + browser_options['executable_path'] = self.local_executable_path + + kuaishou_logger.info("正在启动浏览器...") + browser = await playwright.chromium.launch(**browser_options) + + # 创建上下文 + context = await browser.new_context( + storage_state=self.account_file, + viewport={'width': 1920, 'height': 1080}, + user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' + ) + context = await set_init_script(context) + context.set_default_timeout(30000) # 设置默认超时时间为30秒 + + # 创建页面 + page = await context.new_page() + + # 访问上传页面并等待加载 + kuaishou_logger.info("正在打开上传页面...") + await self._retry_with_backoff(page.goto, UPLOAD_URL) + await self._retry_with_backoff(page.wait_for_load_state, "networkidle") + + # 处理引导框 + try: + # 等待引导框出现 + guide_close_button = page.locator('div[class*="_close_"]') + if await guide_close_button.count() > 0: + kuaishou_logger.info("检测到引导框,正在关闭...") + await guide_close_button.click() + await asyncio.sleep(1) + except Exception as e: + kuaishou_logger.warning(f"处理引导框时出现异常: {str(e)}") + + # 验证是否在上传页面 + if not await self._verify_upload_page(page): + raise VideoUploadError("未能正确加载上传页面") + + kuaishou_logger.info(f'正在上传视频: {self.title}') + + # 等待上传按钮出现 + upload_button = page.locator("button[class^='_upload-btn']") + await self._retry_with_backoff(upload_button.wait_for, state='visible', timeout=10000) + + # 上传文件 + try: + async with page.expect_file_chooser() as fc_info: + await upload_button.click() + file_chooser = await fc_info.value + await file_chooser.set_files(self.file_path) + kuaishou_logger.info("文件已选择,等待上传...") + except Exception as e: + raise VideoUploadError(f"文件选择失败: {str(e)}") + + # 等待文件上传开始 + await asyncio.sleep(2) + + # 处理新功能提示 + new_feature_button = page.locator('button[type="button"] span:text("我知道了")') + if await new_feature_button.count() > 0: + await new_feature_button.click() + await asyncio.sleep(1) + + # 等待跳转到编辑页面 + try: + await self._retry_with_backoff( + page.wait_for_url, + "**/publish/video**", + timeout=10000, + wait_until="networkidle" + ) + kuaishou_logger.info("已跳转到视频编辑页面") + except Exception as e: + kuaishou_logger.error("等待跳转到编辑页面失败") + await page.screenshot(path=f"error_redirect_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png") + raise VideoUploadError(f"未能跳转到视频编辑页面: {str(e)}") + + # 填充视频信息 + await self._retry_with_backoff(self.fill_video_info, page) + + # 等待上传完成 + if not await self._retry_with_backoff(self.wait_for_upload, page): + raise VideoUploadError("视频上传超时或失败") + + # 设置视频封面(移到这里,在视频上传完成后) + await self._retry_with_backoff(self.edit_cover, page) + + # 设置定时发布 + if self.publish_date: + await self._retry_with_backoff(self.set_schedule_time, page) + + # 发布视频 + if not await self._retry_with_backoff(self.publish_video, page): + raise VideoPublishError("视频发布失败") + + # 保存Cookie + await context.storage_state(path=self.account_file) + kuaishou_logger.info('cookie更新完毕!') + return True + + except Exception as e: + kuaishou_logger.error(f"上传过程发生错误: {str(e)}", exc_info=True) + if page: + await page.screenshot(path=f"error_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png") + return False + + finally: + if context: + try: + await context.storage_state(path=self.account_file) + except: + pass + await context.close() + if browser: + await browser.close() + + async def _verify_upload_page(self, page: Page) -> bool: + """验证是否在上传页面""" + try: + # 等待页面加载完成 + await page.wait_for_load_state("networkidle") + + # 检查URL + current_url = page.url + if "publish/video" not in current_url: + kuaishou_logger.error(f"当前页面URL不正确: {current_url}") + return False + + # 检查上传按钮是否存在 + upload_button = page.locator("button[class^='_upload-btn']") + if not await upload_button.count(): + kuaishou_logger.error("未找到上传按钮") + return False + + return True + except Exception as e: + kuaishou_logger.error(f"验证上传页面失败: {str(e)}") + return False + +class KSBatchUploader: + def __init__(self, max_concurrent: int = 3): + self.max_concurrent = max_concurrent + self.semaphore = asyncio.Semaphore(max_concurrent) + self._upload_results = {} + self._failed_uploads = [] + kuaishou_logger.info(f"初始化批量上传器,最大并发数: {max_concurrent}") + + async def upload_with_semaphore(self, uploader: KSVideoUploader) -> bool: + """使用信号量控制并发上传""" + async with self.semaphore: + try: + kuaishou_logger.info(f"开始上传视频: {uploader.title}") + result = await uploader.start() + kuaishou_logger.debug(f"视频上传结果: title={uploader.title}, result={result}") + self._upload_results[uploader.title] = { + 'success': result, + 'timestamp': datetime.now().isoformat(), + 'file_path': uploader.file_path + } + if not result: + self._failed_uploads.append(uploader) + return result + except Exception as e: + kuaishou_logger.error(f"上传视频失败: title={uploader.title}, error={str(e)}", exc_info=True) + self._upload_results[uploader.title] = { + 'success': False, + 'error': str(e), + 'timestamp': datetime.now().isoformat(), + 'file_path': uploader.file_path + } + self._failed_uploads.append(uploader) + return False + + async def retry_failed_uploads(self) -> None: + """重试失败的上传""" + if not self._failed_uploads: + return + + kuaishou_logger.info(f"开始重试 {len(self._failed_uploads)} 个失败的上传") + retry_tasks = [self.upload_with_semaphore(uploader) for uploader in self._failed_uploads] + await asyncio.gather(*retry_tasks) + self._failed_uploads.clear() + + async def batch_upload(self, uploaders: List[KSVideoUploader]) -> Dict[str, Any]: + """批量上传视频""" + try: + kuaishou_logger.info(f"开始批量上传,视频数量: {len(uploaders)}") + # 创建上传任务 + upload_tasks = [self.upload_with_semaphore(uploader) for uploader in uploaders] + + # 执行上传任务 + kuaishou_logger.debug("执行上传任务") + results = await asyncio.gather(*upload_tasks, return_exceptions=True) + kuaishou_logger.debug(f"上传任务执行结果: {results}") + + # 重试失败的上传 + if self._failed_uploads: + kuaishou_logger.info(f"有失败的上传任务,数量: {len(self._failed_uploads)}") + await self.retry_failed_uploads() + + # 生成报告 + success_count = sum(1 for r in results if r is True) + failed_count = sum(1 for r in results if r is False) + error_count = sum(1 for r in results if isinstance(r, Exception)) + + kuaishou_logger.info(f"批量上传完成统计 - 总数: {len(uploaders)}, 成功: {success_count}, 失败: {failed_count}, 错误: {error_count}") + + return { + 'results': self._upload_results, + 'total': len(uploaders), + 'success': success_count, + 'failed': failed_count, + 'errors': error_count + } + + except Exception as e: + kuaishou_logger.error(f"批量上传过程发生错误: {str(e)}", exc_info=True) + return { + 'results': self._upload_results, + 'error': str(e) + } \ No newline at end of file diff --git a/uploader/ks_uploader/utils/constants.py b/uploader/ks_uploader/utils/constants.py new file mode 100644 index 00000000..e2f98e1f --- /dev/null +++ b/uploader/ks_uploader/utils/constants.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + +# 视频相关常量 +VIDEO_EXTENSIONS = ['.mp4', '.mov', '.avi'] +MAX_VIDEO_SIZE = 4 * 1024 * 1024 * 1024 # 4GB +MAX_TITLE_LENGTH = 100 +MAX_TAGS = 3 +MIN_VIDEO_DURATION = 5 # 最短视频时长(秒) +MAX_VIDEO_DURATION = 900 # 最长视频时长(秒) +ALLOWED_VIDEO_CODECS = ['h264', 'h265'] +ALLOWED_AUDIO_CODECS = ['aac'] + +# 上传相关常量 +UPLOAD_TIMEOUT = 120 # 秒 +MAX_RETRIES = 3 +MAX_CONCURRENT_UPLOADS = 3 +CHUNK_SIZE = 5 * 1024 * 1024 # 5MB +PROGRESS_UPDATE_INTERVAL = 2 # 进度更新间隔(秒) +RETRY_DELAYS = [2, 5, 10] # 重试延迟时间(秒) + +# URL常量 +BASE_URL = "https://cp.kuaishou.com" +UPLOAD_URL = f"{BASE_URL}/article/publish/video" +MANAGE_URL = f"{BASE_URL}/article/manage/video" +LOGIN_URL = f"{BASE_URL}/login" +PROFILE_URL = f"{BASE_URL}/profile" # 个人资料页面 + +# Cookie相关常量 +COOKIE_VALID_TIME = 24 * 60 * 60 # 24小时 +COOKIE_CHECK_INTERVAL = 60 * 60 # 1小时 +COOKIE_REFRESH_THRESHOLD = 22 * 60 * 60 # 22小时(提前2小时刷新) + +# 浏览器配置 +BROWSER_ARGS = [ + '--disable-gpu', + '--no-sandbox', + '--disable-dev-shm-usage', + '--disable-setuid-sandbox', + '--disable-web-security', + '--disable-features=IsolateOrigins,site-per-process' +] + +BROWSER_VIEWPORT = { + 'width': 1920, + 'height': 1080 +} + +USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36' + +# 错误处理配置 +MAX_UPLOAD_RETRIES = 3 +MAX_PUBLISH_RETRIES = 3 +ERROR_SCREENSHOT_DIR = "error_screenshots" +UPLOAD_ERROR_WAIT_TIME = 5 # 上传错误等待时间(秒) + +# 性能优化配置 +DEFAULT_TIMEOUT = 30000 # 默认超时时间(毫秒) +NETWORK_IDLE_TIMEOUT = 5000 # 网络空闲超时时间(毫秒) +PAGE_LOAD_TIMEOUT = 30000 # 页面加载超时时间(毫秒) +ELEMENT_TIMEOUT = 10000 # 元素等待超时时间(毫秒) + +# 日志配置 +LOG_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' +LOG_LEVEL = 'INFO' +LOG_FILE = 'kuaishou_uploader.log' + +# 选择器配置 +SELECTORS = { + 'upload_button': "button[class^='_upload-btn']", + 'description': "div.description", + 'publish_button': "button:text('发布')", + 'confirm_button': "button:text('确认发布')", + 'progress_text': "div.progress-text", + 'error_message': "div.error-message", + 'new_feature_button': 'button[type="button"] span:text("我知道了")', + 'schedule_time': { + 'radio': "label:text('发布时间') xpath=following-sibling::div .ant-radio-input", + 'input': 'div.ant-picker-input input[placeholder="选择日期时间"]' + }, + 'profile': { + 'info_card': "div.header-info-card", + 'avatar': "div.header-info-card img.user-image", + 'username': "div.header-info-card div.user-name", + 'kwai_id': "div.header-info-card div.user-kwai-id", + 'stats': "div.header-info-card div.user-cnt__item", + 'description': "div.header-info-card div.user-desc" + } +} \ No newline at end of file diff --git a/uploader/tencent_uploader/__init__.py b/uploader/tencent_uploader/__init__.py index 9860b4a4..3b1735b9 100644 --- a/uploader/tencent_uploader/__init__.py +++ b/uploader/tencent_uploader/__init__.py @@ -1,5 +1,16 @@ -from pathlib import Path - -from conf import BASE_DIR - -Path(BASE_DIR / "cookies" / "tencent_uploader").mkdir(exist_ok=True) \ No newline at end of file +# -*- coding: utf-8 -*- +from pathlib import Path + +from conf import BASE_DIR +from .main import weixin_setup +from .modules.video import TencentVideo +from .modules.cookie import get_tencent_cookie + +# 确保cookie目录存在 +Path(BASE_DIR / "cookies" / "tencent_uploader").mkdir(exist_ok=True) + +__all__ = [ + 'weixin_setup', + 'TencentVideo', + 'get_tencent_cookie', +] \ No newline at end of file diff --git a/uploader/tencent_uploader/analytics.py b/uploader/tencent_uploader/analytics.py new file mode 100644 index 00000000..5d2b2cda --- /dev/null +++ b/uploader/tencent_uploader/analytics.py @@ -0,0 +1,10 @@ +class PerformanceTracker: + def track_video_performance(self, video_id): + """跟踪视频的: + - 播放量 + - 互动数据 + - 违规记录 + """ + + def generate_report(self): + """生成数据可视化报告""" \ No newline at end of file diff --git a/uploader/tencent_uploader/main.py b/uploader/tencent_uploader/main.py index 6baeee36..071c4763 100644 --- a/uploader/tencent_uploader/main.py +++ b/uploader/tencent_uploader/main.py @@ -1,267 +1,41 @@ # -*- coding: utf-8 -*- -from datetime import datetime +from pathlib import Path -from playwright.async_api import Playwright, async_playwright -import os -import asyncio - -from conf import LOCAL_CHROME_PATH -from utils.base_social_media import set_init_script from utils.files_times import get_absolute_path from utils.log import tencent_logger +from .modules.account import cookie_auth, batch_cookie_auth, get_tencent_cookie - -def format_str_for_short_title(origin_title: str) -> str: - # 定义允许的特殊字符 - allowed_special_chars = "《》“”:+?%°" - - # 移除不允许的特殊字符 - filtered_chars = [char if char.isalnum() or char in allowed_special_chars else ' ' if char == ',' else '' for - char in origin_title] - formatted_string = ''.join(filtered_chars) - - # 调整字符串长度 - if len(formatted_string) > 16: - # 截断字符串 - formatted_string = formatted_string[:16] - elif len(formatted_string) < 6: - # 使用空格来填充字符串 - formatted_string += ' ' * (6 - len(formatted_string)) - - return formatted_string - - -async def cookie_auth(account_file): - async with async_playwright() as playwright: - browser = await playwright.chromium.launch(headless=True) - context = await browser.new_context(storage_state=account_file) - context = await set_init_script(context) - # 创建一个新的页面 - page = await context.new_page() - # 访问指定的 URL - await page.goto("https://channels.weixin.qq.com/platform/post/create") - try: - await page.wait_for_selector('div.title-name:has-text("微信小店")', timeout=5000) # 等待5秒 - tencent_logger.error("[+] 等待5秒 cookie 失效") - return False - except: - tencent_logger.success("[+] cookie 有效") - return True - - -async def get_tencent_cookie(account_file): - async with async_playwright() as playwright: - options = { - 'args': [ - '--lang en-GB' - ], - 'headless': False, # Set headless option here - } - # Make sure to run headed. - browser = await playwright.chromium.launch(**options) - # Setup context however you like. - context = await browser.new_context() # Pass any options - # Pause the page, and start recording manually. - context = await set_init_script(context) - page = await context.new_page() - await page.goto("https://channels.weixin.qq.com") - await page.pause() - # 点击调试器的继续,保存cookie - await context.storage_state(path=account_file) - +__all__ = ['weixin_setup', 'batch_cookie_auth', 'get_tencent_cookie'] async def weixin_setup(account_file, handle=False): - account_file = get_absolute_path(account_file, "tencent_uploader") - if not os.path.exists(account_file) or not await cookie_auth(account_file): - if not handle: - # Todo alert message - return False - tencent_logger.info('[+] cookie文件不存在或已失效,即将自动打开浏览器,请扫码登录,登陆后会自动生成cookie文件') - await get_tencent_cookie(account_file) - return True - - -class TencentVideo(object): - def __init__(self, title, file_path, tags, publish_date: datetime, account_file, category=None): - self.title = title # 视频标题 - self.file_path = file_path - self.tags = tags - self.publish_date = publish_date - self.account_file = account_file - self.category = category - self.local_executable_path = LOCAL_CHROME_PATH - - async def set_schedule_time_tencent(self, page, publish_date): - label_element = page.locator("label").filter(has_text="定时").nth(1) - await label_element.click() - - await page.click('input[placeholder="请选择发表时间"]') - - str_month = str(publish_date.month) if publish_date.month > 9 else "0" + str(publish_date.month) - current_month = str_month + "月" - # 获取当前的月份 - page_month = await page.inner_text('span.weui-desktop-picker__panel__label:has-text("月")') - - # 检查当前月份是否与目标月份相同 - if page_month != current_month: - await page.click('button.weui-desktop-btn__icon__right') - - # 获取页面元素 - elements = await page.query_selector_all('table.weui-desktop-picker__table a') - - # 遍历元素并点击匹配的元素 - for element in elements: - if 'weui-desktop-picker__disabled' in await element.evaluate('el => el.className'): - continue - text = await element.inner_text() - if text.strip() == str(publish_date.day): - await element.click() - break - - # 输入小时部分(假设选择11小时) - await page.click('input[placeholder="请选择时间"]') - await page.keyboard.press("Control+KeyA") - await page.keyboard.type(str(publish_date.hour)) - - # 选择标题栏(令定时时间生效) - await page.locator("div.input-editor").click() - - async def handle_upload_error(self, page): - tencent_logger.info("视频出错了,重新上传中") - await page.locator('div.media-status-content div.tag-inner:has-text("删除")').click() - await page.get_by_role('button', name="删除", exact=True).click() - file_input = page.locator('input[type="file"]') - await file_input.set_input_files(self.file_path) - - async def upload(self, playwright: Playwright) -> None: - # 使用 Chromium (这里使用系统内浏览器,用chromium 会造成h264错误 - browser = await playwright.chromium.launch(headless=False, executable_path=self.local_executable_path) - # 创建一个浏览器上下文,使用指定的 cookie 文件 - context = await browser.new_context(storage_state=f"{self.account_file}") - context = await set_init_script(context) - - # 创建一个新的页面 - page = await context.new_page() - # 访问指定的 URL - await page.goto("https://channels.weixin.qq.com/platform/post/create") - tencent_logger.info(f'[+]正在上传-------{self.title}.mp4') - # 等待页面跳转到指定的 URL,没进入,则自动等待到超时 - await page.wait_for_url("https://channels.weixin.qq.com/platform/post/create") - # await page.wait_for_selector('input[type="file"]', timeout=10000) - file_input = page.locator('input[type="file"]') - await file_input.set_input_files(self.file_path) - # 填充标题和话题 - await self.add_title_tags(page) - # 添加商品 - # await self.add_product(page) - # 合集功能 - await self.add_collection(page) - # 原创选择 - await self.add_original(page) - # 检测上传状态 - await self.detect_upload_status(page) - if self.publish_date != 0: - await self.set_schedule_time_tencent(page, self.publish_date) - # 添加短标题 - await self.add_short_title(page) - - await self.click_publish(page) - - await context.storage_state(path=f"{self.account_file}") # 保存cookie - tencent_logger.success(' [-]cookie更新完毕!') - await asyncio.sleep(2) # 这里延迟是为了方便眼睛直观的观看 - # 关闭浏览器上下文和浏览器实例 - await context.close() - await browser.close() - - async def add_short_title(self, page): - short_title_element = page.get_by_text("短标题", exact=True).locator("..").locator( - "xpath=following-sibling::div").locator( - 'span input[type="text"]') - if await short_title_element.count(): - short_title = format_str_for_short_title(self.title) - await short_title_element.fill(short_title) - - async def click_publish(self, page): - while True: - try: - publish_buttion = page.locator('div.form-btns button:has-text("发表")') - if await publish_buttion.count(): - await publish_buttion.click() - await page.wait_for_url("https://channels.weixin.qq.com/platform/post/list", timeout=1500) - tencent_logger.success(" [-]视频发布成功") - break - except Exception as e: - current_url = page.url - if "https://channels.weixin.qq.com/platform/post/list" in current_url: - tencent_logger.success(" [-]视频发布成功") - break - else: - tencent_logger.exception(f" [-] Exception: {e}") - tencent_logger.info(" [-] 视频正在发布中...") - await asyncio.sleep(0.5) - - async def detect_upload_status(self, page): - while True: - # 匹配删除按钮,代表视频上传完毕,如果不存在,代表视频正在上传,则等待 - try: - # 匹配删除按钮,代表视频上传完毕 - if "weui-desktop-btn_disabled" not in await page.get_by_role("button", name="发表").get_attribute( - 'class'): - tencent_logger.info(" [-]视频上传完毕") - break - else: - tencent_logger.info(" [-] 正在上传视频中...") - await asyncio.sleep(2) - # 出错了视频出错 - if await page.locator('div.status-msg.error').count() and await page.locator( - 'div.media-status-content div.tag-inner:has-text("删除")').count(): - tencent_logger.error(" [-] 发现上传出错了...准备重试") - await self.handle_upload_error(page) - except: - tencent_logger.info(" [-] 正在上传视频中...") - await asyncio.sleep(2) - - async def add_title_tags(self, page): - await page.locator("div.input-editor").click() - await page.keyboard.type(self.title) - await page.keyboard.press("Enter") - for index, tag in enumerate(self.tags, start=1): - await page.keyboard.type("#" + tag) - await page.keyboard.press("Space") - tencent_logger.info(f"成功添加hashtag: {len(self.tags)}") - - async def add_collection(self, page): - collection_elements = page.get_by_text("添加到合集").locator("xpath=following-sibling::div").locator( - '.option-list-wrap > div') - if await collection_elements.count() > 1: - await page.get_by_text("添加到合集").locator("xpath=following-sibling::div").click() - await collection_elements.first.click() - - async def add_original(self, page): - if await page.get_by_label("视频为原创").count(): - await page.get_by_label("视频为原创").check() - # 检查 "我已阅读并同意 《视频号原创声明使用条款》" 元素是否存在 - label_locator = await page.locator('label:has-text("我已阅读并同意 《视频号原创声明使用条款》")').is_visible() - if label_locator: - await page.get_by_label("我已阅读并同意 《视频号原创声明使用条款》").check() - await page.get_by_role("button", name="声明原创").click() - # 2023年11月20日 wechat更新: 可能新账号或者改版账号,出现新的选择页面 - if await page.locator('div.label span:has-text("声明原创")').count() and self.category: - # 因处罚无法勾选原创,故先判断是否可用 - if not await page.locator('div.declare-original-checkbox input.ant-checkbox-input').is_disabled(): - await page.locator('div.declare-original-checkbox input.ant-checkbox-input').click() - if not await page.locator( - 'div.declare-original-dialog label.ant-checkbox-wrapper.ant-checkbox-wrapper-checked:visible').count(): - await page.locator('div.declare-original-dialog input.ant-checkbox-input:visible').click() - if await page.locator('div.original-type-form > div.form-label:has-text("原创类型"):visible').count(): - await page.locator('div.form-content:visible').click() # 下拉菜单 - await page.locator( - f'div.form-content:visible ul.weui-desktop-dropdown__list li.weui-desktop-dropdown__list-ele:has-text("{self.category}")').first.click() - await page.wait_for_timeout(1000) - if await page.locator('button:has-text("声明原创"):visible').count(): - await page.locator('button:has-text("声明原创"):visible').click() - - async def main(self): - async with async_playwright() as playwright: - await self.upload(playwright) + """ + 设置微信登录 + Args: + account_file: cookie文件路径 + handle: 是否允许手动处理(自动登录) + Returns: + bool: 设置是否成功 + """ + try: + # 获取绝对路径 + account_file = get_absolute_path(account_file, "tencent_uploader") + account_dir = Path(account_file).parent + + # 检查是否存在同目录下的其他账号cookie文件 + existing_cookies = list(account_dir.glob("*.json")) + + # 验证所有cookie(现在cookie_auth会自动处理登录) + if handle: + cookie_results = await batch_cookie_auth([str(f) for f in existing_cookies]) + # 只要有一个账号验证成功就返回True + return any(valid for _, (valid, _) in cookie_results.items()) + else: + # 如果不允许手动处理,只验证cookie有效性 + for cookie_file in existing_cookies: + if await cookie_auth(str(cookie_file)): + return True + return False + + except Exception as e: + tencent_logger.error(f"账号设置失败: {str(e)}") + return False \ No newline at end of file diff --git a/uploader/tencent_uploader/modules/__init__.py b/uploader/tencent_uploader/modules/__init__.py new file mode 100644 index 00000000..8f7e73ea --- /dev/null +++ b/uploader/tencent_uploader/modules/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +from .account import get_account_info, cookie_auth, batch_cookie_auth +from .cookie import get_tencent_cookie +from .video import TencentVideo +from .utils import format_str_for_short_title + +__all__ = [ + 'get_account_info', + 'cookie_auth', + 'batch_cookie_auth', + 'get_tencent_cookie', + 'TencentVideo', + 'format_str_for_short_title', +] \ No newline at end of file diff --git a/uploader/tencent_uploader/modules/account.py b/uploader/tencent_uploader/modules/account.py new file mode 100644 index 00000000..185b1b54 --- /dev/null +++ b/uploader/tencent_uploader/modules/account.py @@ -0,0 +1,250 @@ +# -*- coding: utf-8 -*- +from pathlib import Path +import asyncio +from playwright.async_api import async_playwright +import sys +import json +from datetime import datetime +from pathlib import Path + +# 添加项目根目录到Python路径 +ROOT_DIR = Path(__file__).parent.parent.parent.parent +sys.path.append(str(ROOT_DIR)) + +from utils.base_social_media import set_init_script +from utils.log import tencent_logger +from utils.social_media_db import SocialMediaDB +from typing import Optional + +async def get_account_info(page) -> dict: + """ + 获取账号信息的通用方法,使用多重备选策略 + + Args: + page: playwright页面对象 + + Returns: + dict: 包含账号信息的字典 + """ + try: + # 多重备选选择器,按优先级排序 + nickname_selectors = [ + 'h2.finder-nickname', # 基础类选择器 + '.finder-nickname', # 简单类选择器 + 'div:has-text("视频号ID") >> xpath=../h2', # 使用相邻元素定位 + 'h2:has-text("视频号")', # 使用文本内容定位 + ] + + # 尝试所有选择器直到找到元素 + nickname = None + for selector in nickname_selectors: + element = page.locator(selector).first + if await element.count(): + nickname = await element.inner_text() + break + + if not nickname: + raise Exception("无法获取账号昵称") + + # 获取其他账号信息 + info = { + 'nickname': nickname, + 'id': await page.locator('#finder-uid-copy').get_attribute('data-clipboard-text') or '', + 'video_count': await page.locator('.finder-content-info .finder-info-num').first.inner_text() or '0', + 'follower_count': await page.locator('.second-info .finder-info-num').inner_text() or '0', + 'updated_at': datetime.now().strftime("%Y-%m-%d %H:%M:%S") + } + # 更新数据库 + try: + db = SocialMediaDB() + account = db.get_account("tencent", info['id']) + + if account: + # 如果账号已存在,更新信息 + db.update_account("tencent", info['id'], { + 'nickname': info['nickname'], + 'video_count': int(info['video_count']), + 'follower_count': int(info['follower_count']) + }) + else: + # 如果账号不存在,添加新账号 + db.add_or_update_account( + "tencent", + info['id'], + info['nickname'], + int(info['video_count']), + int(info['follower_count']) + ) + db.close() + except Exception as e: + tencent_logger.error(f"更新账号数据库失败: {str(e)}") + + return info + + except Exception as e: + tencent_logger.error(f"获取账号信息失败: {str(e)}") + return None + + +async def cookie_auth(account_file): + """ + 使用 account_file 中的 cookie 进行微信渠道平台的登录验证。 + 如果cookie失效,会自动尝试重新登录获取新cookie。 + + Args: + account_file (str): 包含 cookie 信息的文件路径。 + + Returns: + bool: 如果 cookie 有效或成功获取新cookie,返回 True;否则返回 False。 + """ + from .cookie import get_tencent_cookie # 避免循环导入 + + account_name = Path(account_file).stem + tencent_logger.info(f"[+][{account_name}] 开始验证cookie...") + + # 启动一个无头的 Chromium 浏览器实例。 + async with async_playwright() as playwright: + browser = await playwright.chromium.launch(headless=True) + # 创建一个新的浏览器上下文,并加载账户文件中的存储状态(cookie)。 + context = await browser.new_context(storage_state=str(account_file)) + context = await set_init_script(context) + # 创建一个新的页面。 + page = await context.new_page() + # 访问指定的 URL。 + await page.goto("https://channels.weixin.qq.com/platform/post/create") + tencent_logger.info(f"访问指定的 URL。{account_file}- cookie 验证") + try: + # 等待页面上出现特定的元素,以验证 cookie 是否有效。 + await page.wait_for_selector('div.title-name:has-text("微信小店")', timeout=5000) # 等待5秒 + # 如果元素出现,说明 cookie 失效。 + tencent_logger.error(f"[+][{account_name}] cookie 失效,准备重新登录") + + # 直接尝试重新登录获取新cookie + if new_cookie_file := await get_tencent_cookie(str(account_file)): + # 验证新获取的cookie是否有效 + return await cookie_auth(str(new_cookie_file)) + return False + + except: + # 如果元素未出现,说明 cookie 有效。 + tencent_logger.success(f"[+][{account_name}] cookie 有效") + + # 更新数据库中的cookie状态 + try: + db = SocialMediaDB() + # 获取账号信息 + account_info = await get_account_info(page) + if account_info: + db.update_cookie_status("tencent", account_info['id'], str(account_file), True) + db.close() + except Exception as e: + tencent_logger.error(f"更新cookie状态失败: {str(e)}") + + return True + + +async def batch_cookie_auth(cookie_files: list) -> dict: + """ + 并发验证多个账号的cookie有效性 + + Args: + cookie_files: cookie文件路径列表 + + Returns: + dict: {cookie_file: (is_valid, account_name)} + 例如:{ + 'path/to/cookie1.json': (True, '账号1'), + 'path/to/cookie2.json': (False, '账号2') + } + """ + async def verify_single_cookie(cookie_file): + account_name = Path(cookie_file).stem + is_valid = await cookie_auth(str(cookie_file)) # 确保传入字符串路径 + return str(cookie_file), (is_valid, account_name) # 确保返回字符串路径 + + # 创建所有cookie验证任务 + tasks = [verify_single_cookie(file) for file in cookie_files] + + # 并发执行所有验证任务 + results = await asyncio.gather(*tasks) + + # 转换为字典格式返回 + return dict(results) + + +async def get_tencent_cookie(save_dir: str) -> Optional[str]: + """ + 获取腾讯视频号的cookie + 通过扫码登录获取新账号的cookie并保存账号信息 + + Args: + save_dir: cookie保存目录 + + Returns: + Optional[str]: 成功返回cookie文件路径,失败返回None + """ + try: + async with async_playwright() as p: + browser = await p.chromium.launch(headless=False) + context = await browser.new_context() + context = await set_init_script(context) + page = await context.new_page() + + # 访问登录页面 + await page.goto("https://channels.weixin.qq.com/platform/login") + tencent_logger.info("请使用微信扫码登录...") + + # 等待登录成功并跳转到首页 + await page.wait_for_url("https://channels.weixin.qq.com/platform", timeout=300000) # 5分钟超时 + await page.wait_for_load_state("networkidle") + + # 使用get_account_info获取账号信息 + account_info = await get_account_info(page) + if not account_info: + raise Exception("无法获取账号信息") + + # 使用昵称作为文件名 + nickname = account_info['nickname'] + save_path = str(Path(save_dir) / f"{nickname}.json") + + # 保存cookie + await context.storage_state(path=save_path) + tencent_logger.success(f"Cookie已保存到: {save_path}") + + # 确保账号信息正确添加到数据库 + db = SocialMediaDB() + try: + # 检查账号是否存在 + account = db.get_account("tencent", account_info['id']) + + if account: + # 更新现有账号信息 + db.update_account("tencent", account_info['id'], { + 'nickname': nickname, + 'video_count': int(account_info['video_count']), + 'follower_count': int(account_info['follower_count']) + }) + else: + # 添加新账号 + db.add_or_update_account( + platform="tencent", + platform_id=account_info['id'], + nickname=nickname, + video_count=int(account_info['video_count']), + follower_count=int(account_info['follower_count']) + ) + + # 添加或更新cookie + db.add_cookie("tencent", account_info['id'], save_path) + + # 更新cookie状态 + db.update_cookie_status("tencent", account_info['id'], save_path, True) + + finally: + db.close() + + return save_path + + except Exception as e: + tencent_logger.error(f"获取Cookie失败: {str(e)}") + return None \ No newline at end of file diff --git a/uploader/tencent_uploader/modules/content_crawler.py b/uploader/tencent_uploader/modules/content_crawler.py new file mode 100644 index 00000000..4c1739d8 --- /dev/null +++ b/uploader/tencent_uploader/modules/content_crawler.py @@ -0,0 +1,892 @@ +""" +视频号内容抓取模块 +专注于已发布内容的获取和数据分析 +""" +from typing import List, Dict, Optional, Any +import asyncio +import logging +from pathlib import Path +import warnings +import base64 +import re +import random +from datetime import datetime +from playwright.async_api import async_playwright, Browser, Page, BrowserContext +from conf import LOCAL_CHROME_PATH +from utils.video_content_db import VideoContentDB +from utils.social_media_db import SocialMediaDB +from utils.content_deduplication import ContentDeduplication +from utils.log import tencent_logger as logger +import sqlite3 + +# 忽略ResourceWarning +warnings.filterwarnings("ignore", category=ResourceWarning) + +def convert_number_text(text: str) -> int: + """ + 将带单位的数字转换为整数 + + Args: + text: 数字文本,如 "1.5万" + + Returns: + int: 转换后的整数 + """ + text = text.strip() + if not text: + return 0 + + # 匹配数字部分 + match = re.match(r'([\d.]+)([万亿])?', text) + if not match: + return 0 + + number = float(match.group(1)) + unit = match.group(2) if match.group(2) else '' + + # 处理单位 + if unit == '万': + number *= 10000 + elif unit == '亿': + number *= 100000000 + + return int(number) + +class VideoContentCrawler: + """视频号内容抓取器 - 专注于已发布内容的获取""" + + VIDEOS_PER_PAGE = 20 # 每页最多显示的视频数量 + + def __init__( + self, + account_file: str, # 账号cookie文件 + account_id: int, # 账号ID + local_executable_path: str = LOCAL_CHROME_PATH, # chrome路径 + headless: bool = False, + save_thumb_as_base64: bool = True, # 是否将封面图保存为base64 + min_delay: float = 3.0, # 最小延迟时间(秒) + max_delay: float = 7.0, # 最大延迟时间(秒) + max_retries: int = 3, # 最大重试次数 + reverse: bool = True # 是否倒序爬取 + ): + """ + 初始化抓取器 + + Args: + account_file: cookie文件路径 + account_id: 账号ID + local_executable_path: Chrome浏览器可执行文件路径 + headless: 是否使用无头模式 + save_thumb_as_base64: 是否将封面图转换为base64数据 + min_delay: 最小延迟时间(秒) + max_delay: 最大延迟时间(秒) + max_retries: 最大重试次数 + reverse: 是否倒序爬取 + """ + self.account_file = account_file + self.account_id = account_id + self.local_executable_path = local_executable_path + self.headless = headless + self.save_thumb_as_base64 = save_thumb_as_base64 + self.min_delay = min_delay + self.max_delay = max_delay + self.max_retries = max_retries + self.reverse = reverse + self.browser: Optional[Browser] = None + self.context: Optional[BrowserContext] = None + self.page: Optional[Page] = None + self._playwright = None + self.db: Optional[VideoContentDB] = None # 初始化为None + + async def __aenter__(self): + """异步上下文管理器入口""" + logger.info("=== 进入异步上下文管理器 ===") + try: + logger.info("开始调用init方法...") + await self.init() + logger.info("init方法调用完成") + logger.info("=== 异步上下文管理器初始化完成,返回self ===") + return self + except Exception as e: + logger.error(f"异步上下文管理器初始化失败: {str(e)}") + # 确保资源被清理 + await self.close() + raise + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """异步上下文管理器出口""" + logger.info("=== 退出异步上下文管理器 ===") + try: + await self.close() + logger.info("=== 异步上下文管理器清理完成 ===") + except Exception as e: + logger.error(f"异步上下文管理器清理失败: {str(e)}") + raise + + async def init(self): + """初始化浏览器和上下文""" + try: + # 首先验证账号ID是否存在 + logger.info("验证账号ID...") + social_db = SocialMediaDB() + try: + accounts = social_db.get_all_accounts("tencent") + if not any(acc.get('id') == self.account_id for acc in accounts): + raise ValueError(f"账号ID {self.account_id} 不存在") + logger.info("账号ID验证通过") + finally: + social_db.close() + + # 创建数据库连接 + logger.info("创建数据库连接...") + self.db = VideoContentDB() + logger.info("数据库连接创建成功") + + logger.info("开始初始化Playwright...") + self._playwright = await async_playwright().start() + logger.info("Playwright启动成功") + + logger.info("开始启动浏览器...") + self.browser = await self._playwright.chromium.launch( + headless=self.headless, + executable_path=self.local_executable_path + ) + logger.info("浏览器启动成功") + + logger.info("开始创建浏览器上下文...") + self.context = await self.browser.new_context(storage_state=self.account_file) + logger.info("浏览器上下文创建成功") + + logger.info("开始创建新页面...") + self.page = await self.context.new_page() + logger.info("新页面创建成功") + + except Exception as e: + logger.error(f"初始化失败: {str(e)}") + # 确保资源被正确清理 + await self.close() + raise + + async def close(self): + """关闭浏览器和清理资源""" + try: + if self.page: + await self.page.close() + self.page = None + if self.context: + await self.context.close() + self.context = None + if self.browser: + await self.browser.close() + self.browser = None + if self._playwright: + await self._playwright.stop() + self._playwright = None + # 关闭数据库连接 + if self.db: + self.db.close() + self.db = None + except Exception as e: + logger.error(f"关闭资源时出错: {str(e)}") + + async def random_delay(self): + """随机延迟,避免频繁请求""" + delay = random.uniform(self.min_delay, self.max_delay) + await asyncio.sleep(delay) + + async def check_page_status(self) -> bool: + """ + 检查页面状态 + + Returns: + bool: 页面是否正常 + """ + try: + # 检查是否有错误提示 + error_text = await self.page.text_content(".weui-desktop-dialog__title") or "" + if "异常" in error_text or "错误" in error_text: + logger.error(f"页面出现错误: {error_text}") + return False + + # 检查是否需要验证码 + captcha = await self.page.query_selector(".verify-code") + if captcha: + logger.error("需要验证码验证") + return False + + return True + except Exception as e: + logger.error(f"检查页面状态失败: {str(e)}") + return False + + async def retry_on_error(self, func, *args, **kwargs): + """ + 错误重试装饰器 + + Args: + func: 要执行的异步函数 + *args: 位置参数 + **kwargs: 关键字参数 + + Returns: + 函数执行结果 + """ + for attempt in range(self.max_retries): + try: + return await func(*args, **kwargs) + except Exception as e: + if attempt == self.max_retries - 1: + raise + logger.warning(f"操作失败,正在重试({attempt + 1}/{self.max_retries}): {str(e)}") + await self.random_delay() + + async def get_total_pages(self) -> int: + """ + 获取总页数 + + Returns: + int: 总页数 + """ + try: + # 等待分页元素加载 + await self.page.wait_for_selector(".weui-desktop-pagination__num", timeout=20000) + + # 获取所有页码元素 + page_nums = await self.page.query_selector_all(".weui-desktop-pagination__num") + max_page = 1 + + # 遍历所有页码,找出最大的数字 + for num in page_nums: + text = await num.text_content() + if text.isdigit(): + max_page = max(max_page, int(text)) + + logger.info(f"获取到总页数: {max_page}") + return max_page + + except Exception as e: + logger.error(f"获取总页数失败: {str(e)}") + return 1 + + async def get_current_page(self) -> int: + """ + 获取当前页码 + + Returns: + int: 当前页码 + """ + try: + # 使用更精确的选择器 + current = await self.page.query_selector(".weui-desktop-pagination__num_current") + if current: + page_text = await current.text_content() + if page_text.isdigit(): + return int(page_text) + return 1 + except Exception as e: + logger.error(f"获取当前页码失败: {str(e)}") + return 1 + + async def get_crawled_page_count(self) -> int: + """ + 获取已爬取的页数 + + Returns: + int: 已爬取的页数 + """ + try: + # 获取该账号已保存的视频数量 + count = self.db.get_video_count(self.account_id) + # 计算页数(向上取整) + return (count + self.VIDEOS_PER_PAGE - 1) // self.VIDEOS_PER_PAGE + except Exception as e: + logger.error(f"获取已爬取页数失败: {str(e)}") + return 0 + + async def jump_to_page(self, target_page: int) -> bool: + """ + 跳转到指定页面 + + Args: + target_page: 目标页码 + + Returns: + bool: 是否跳转成功 + """ + try: + # 找到跳转输入框和按钮 + input_field = await self.page.query_selector(".weui-desktop-pagination__input") + jump_button = await self.page.query_selector(".weui-desktop-pagination__form .weui-desktop-link") + + if not input_field or not jump_button: + return False + + # 输入页码 + await input_field.fill(str(target_page)) + # 点击跳转 + await jump_button.click() + + # 等待页面加载 + await self.page.wait_for_load_state("networkidle") + + # 验证是否跳转成功 + current_page = await self.page.query_selector(".weui-desktop-pagination__num_current") + if current_page: + current_page_text = await current_page.text_content() + return int(current_page_text) == target_page + + return False + + except Exception as e: + logger.error(f"跳转到第 {target_page} 页失败: {str(e)}") + return False + + async def get_video_list(self, max_pages: int = 1) -> List[Dict[str, Any]]: + """ + 获取视频列表 + + Args: + max_pages: 最大抓取页数,如果为-1则抓取所有页 + + Returns: + List[Dict]: 视频信息列表 + """ + videos = [] + logger.info("=== 进入get_video_list方法 ===") + logger.info(f"参数: max_pages = {max_pages}") + + # 检查组件状态 + if not self.page or not self.context or not self.browser or not self._playwright: + logger.error("爬虫组件未完全初始化") + return videos + + try: + # 访问列表页 + logger.info("正在访问视频列表页面...") + await self.page.goto("https://channels.weixin.qq.com/platform/post/list") + logger.info("页面导航完成,等待加载...") + await self.page.wait_for_load_state("networkidle") + logger.info("页面加载完成,检查登录状态...") + + # 等待页面内容加载,使用多个可能的选择器 + logger.info("等待页面内容加载...") + try: + # 尝试等待可能的内容选择器 + selectors = [ + "div[class*='post-feed-item']", + ".post-feed-item", + ".post-item", + "div[class*='weui-desktop-card__bd']" + ] + + content_found = False + for selector in selectors: + try: + logger.info(f"尝试查找选择器: {selector}") + await self.page.wait_for_selector(selector, timeout=5000) + logger.info(f"找到内容元素: {selector}") + content_found = True + break + except Exception as e: + logger.warning(f"选择器 {selector} 未找到: {str(e)}") + continue + + if not content_found: + # 如果所有选择器都失败,截图并记录页面内容 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + screenshot_path = f"error_screenshot_{timestamp}.png" + await self.page.screenshot(path=screenshot_path) + page_content = await self.page.content() + logger.error(f"页面内容加载失败,已保存截图: {screenshot_path}") + logger.debug(f"页面HTML: {page_content[:500]}...") # 只记录前500个字符 + return videos + + except Exception as e: + logger.error(f"页面内容加载失败: {str(e)}") + return videos + + # 检查页面状态 + logger.info("检查页面状态...") + if not await self.check_page_status(): + logger.error("页面状态异常") + return videos + + # 等待页面完全稳定 + logger.info("等待页面稳定...") + await self.random_delay() + + # 如果是倒序爬取,先跳转到最后一页 + if self.reverse: + logger.info("开始倒序爬取,尝试跳转到最后一页...") + # 获取最后一页的页码 + last_page_num = await self.page.query_selector(".weui-desktop-pagination__num:last-child") + if last_page_num: + last_page_text = await last_page_num.text_content() + logger.info(f"找到页码元素,内容为: {last_page_text}") + if last_page_text.isdigit(): + logger.info(f"找到最后一页页码: {last_page_text}") + if not await self.jump_to_page(int(last_page_text)): + logger.error("跳转到最后一页失败") + return videos + # 等待页面加载和稳定 + await self.random_delay() + + # 等待新页面内容加载 + try: + logger.info("等待最后一页内容加载...") + await self.page.wait_for_selector("div[class*='post-feed-item']", timeout=20000) + # 检查是否真的加载到了内容 + items = await self.page.query_selector_all("div[class*='post-feed-item']") + logger.info(f"最后一页内容已加载,找到 {len(items)} 个视频项") + + # 检查页面URL和当前页码 + current_url = self.page.url + current_page = await self.get_current_page() + logger.info(f"当前页面URL: {current_url}") + logger.info(f"当前页码: {current_page}") + + # 检查页面状态 + page_status = await self.check_page_status() + logger.info(f"页面状态检查结果: {'正常' if page_status else '异常'}") + + if not items: + logger.error("最后一页未找到任何视频内容") + # 获取页面源码以供调试 + page_content = await self.page.content() + logger.debug(f"页面源码片段: {page_content[:500]}...") + + except Exception as e: + logger.error(f"最后一页内容加载失败: {str(e)}") + # 记录当前页面状态 + try: + screenshot_path = f"error_screenshot_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png" + await self.page.screenshot(path=screenshot_path) + logger.error(f"已保存错误截图: {screenshot_path}") + except Exception as screenshot_error: + logger.error(f"保存错误截图失败: {str(screenshot_error)}") + return videos + else: + logger.error("未找到最后一页页码元素") + return videos + + # 获取总页数和已爬取页数 + total_pages = await self.get_total_pages() + if total_pages <= 0: + logger.error("获取总页数失败") + return videos + + crawled_pages = await self.get_crawled_page_count() + + if max_pages == -1: + max_pages = total_pages + else: + max_pages = min(max_pages, total_pages) + + logger.info(f"总页数: {total_pages}, 已爬取: {crawled_pages} 页, 计划抓取: {max_pages} 页") + + # 确定起始页和结束页 + if self.reverse: + start_page = max_pages + end_page = 1 # 修改这里:倒序爬取时,结束页永远是第1页 + step = -1 + logger.info(f"倒序爬取: 从第 {start_page} 页到第 {end_page} 页") + else: + start_page = 1 + end_page = max_pages + step = 1 + logger.info(f"正序爬取: 从第 {start_page} 页到第 {end_page} 页") + + current_page = start_page + + # 如果是倒序且不是最后一页,先跳转到起始页 + if self.reverse and current_page != total_pages: + logger.info(f"尝试跳转到起始页: {start_page}") + if not await self.jump_to_page(start_page): + logger.error(f"跳转到第 {start_page} 页失败") + return videos + + # 等待新页面内容加载 + try: + await self.page.wait_for_selector("div[class*='post-feed-item']", timeout=20000) + logger.info("起始页内容已加载") + except Exception as e: + logger.error(f"起始页内容加载失败: {str(e)}") + return videos + + while (step > 0 and current_page <= end_page) or (step < 0 and current_page >= end_page): + # 检查页面状态 + if not await self.check_page_status(): + logger.error(f"页面状态异常,停止采集") + break + + logger.info(f"正在采集第 {current_page}/{max_pages} 页...") + + # 等待页面加载,使用更通用的选择器 + try: + await self.page.wait_for_selector("div[class*='post-feed-item']", timeout=20000) + items = await self.page.query_selector_all("div[class*='post-feed-item']") + logger.info(f"找到 {len(items)} 个视频项") + except Exception as e: + logger.error(f"页面内容加载失败: {str(e)}") + break + + if not items: + logger.error("未找到任何视频项") + break + + # 处理当前页的视频 + for idx, item in enumerate(items, 1): + try: + logger.info(f"正在处理第 {idx}/{len(items)} 个视频...") + video_info = await self.retry_on_error(self._extract_video_info, item) + if video_info: + videos.append(video_info) + logger.info(f"成功提取视频信息: {video_info['title']}") + # 每处理一个视频后短暂延迟 + await asyncio.sleep(random.uniform(0.5, 1.5)) + else: + logger.warning(f"第 {idx} 个视频信息提取失败或已存在") + except Exception as e: + logger.error(f"处理第 {idx} 个视频时出错: {str(e)}") + continue + + # 处理翻页 + if current_page != end_page: + logger.info(f"准备翻页: 当前第 {current_page} 页") + if step > 0: + has_next = await self._goto_next_page() + logger.info("尝试前往下一页...") + else: + has_next = await self._goto_prev_page() + logger.info("尝试前往上一页...") + + if not has_next: + logger.info("没有更多页面了") + break + + # 翻页后等待 + logger.info("翻页成功,等待页面稳定...") + await self.random_delay() + + current_page += step + logger.info(f"页码更新: {current_page}") + + except Exception as e: + logger.error(f"获取视频列表失败: {str(e)}") + + logger.info(f"爬取任务完成,共获取 {len(videos)} 个视频信息") + return videos + + async def _get_image_as_base64(self, url: str) -> Optional[str]: + """ + 将图片URL转换为base64编码 + + Args: + url: 图片URL + + Returns: + Optional[str]: base64编码的图片数据,失败返回None + """ + try: + # 创建新页面获取图片 + page = await self.context.new_page() + try: + # 获取图片数据 + response = await page.goto(url) + if response and response.ok: + # 获取图片二进制数据 + image_data = await response.body() + # 转换为base64 + base64_data = base64.b64encode(image_data).decode('utf-8') + # 获取Content-Type + content_type = response.headers.get('content-type', 'image/jpeg') + # 返回完整的base64图片数据 + return f"data:{content_type};base64,{base64_data}" + return None + finally: + await page.close() + except Exception as e: + logger.error(f"获取图片base64数据失败: {str(e)}") + return None + + async def _extract_video_info(self, item) -> Optional[Dict[str, Any]]: + """ + 提取单个视频信息 + + Args: + item: 视频元素 + + Returns: + Dict: 视频信息字典,包含标题内容、标签列表和艾特用户列表 + """ + try: + # 获取视频标题,使用更通用的选择器 + title_element = await item.query_selector("div[class*='post-title']") + full_text = await title_element.text_content() if title_element else "" + full_text = full_text.strip() + + # 从完整文本中提取标签和@用户 + # 使用正则表达式提取所有@用户 + mentions = re.findall(r'@([a-zA-Z0-9\u4e00-\u9fa5]+?)(?=[@#]|\s|$)', full_text) + mentions = [mention.strip() for mention in mentions if mention.strip()] + + # 使用正则表达式提取所有#标签 + tags = re.findall(r'#([a-zA-Z0-9\u4e00-\u9fa5]+?)(?=[@#]|\s|$)', full_text) + tags = [tag.strip() for tag in tags if tag.strip()] + + # 提取标题(@或#前的所有内容) + main_title = "" + first_special_char_index = -1 + + # 查找第一个@或#的位置 + at_index = full_text.find('@') + hash_index = full_text.find('#') + + if at_index >= 0 and hash_index >= 0: + first_special_char_index = min(at_index, hash_index) + elif at_index >= 0: + first_special_char_index = at_index + elif hash_index >= 0: + first_special_char_index = hash_index + + # 如果找到了@或#,且它们前面有内容,则提取标题 + if first_special_char_index > 0: + main_title = full_text[:first_special_char_index].strip() + + # 如果标题为空,设置默认值为"null" + if not main_title: + main_title = "null" + logger.info("标题为空,使用默认值: null") + + logger.info(f"解析结果 - 完整文本: {full_text}") + logger.info(f"解析结果 - 标题: {main_title}") + logger.info(f"解析结果 - 标签: {tags}") + logger.info(f"解析结果 - 艾特用户: {mentions}") + + # 获取封面图片,使用更通用的选择器 + thumb = await item.query_selector("div[class*='media'] img") + thumb_base64 = None + if thumb and self.save_thumb_as_base64: + src = await thumb.get_attribute("src") + if src: + thumb_base64 = await self._get_image_as_base64(src) + + # 获取发布时间,使用更通用的选择器 + time_element = await item.query_selector("div[class*='post-time'] span") + publish_time = await time_element.text_content() if time_element else "" + + # 获取视频状态,使用更通用的选择器 + status_element = await item.query_selector("div[class*='bandage'] span") + status = await status_element.text_content() if status_element else "" + + # 获取播放数据,使用更通用的选择器 + play_count = await item.query_selector("div[class*='post-data'] .data-item:nth-child(1) .count") + play_text = await play_count.text_content() if play_count else "0" + + # 获取点赞数据 + like_count = await item.query_selector("div[class*='post-data'] .data-item:nth-child(2) .count") + like_text = await like_count.text_content() if like_count else "0" + + # 获取评论数据 + comment_count = await item.query_selector("div[class*='post-data'] .data-item:nth-child(3) .count") + comment_text = await comment_count.text_content() if comment_count else "0" + + # 获取分享数据 + share_count = await item.query_selector("div[class*='post-data'] .data-item:nth-child(4) .count") + share_text = await share_count.text_content() if share_count else "0" + + try: + # 转换数值 + plays = convert_number_text(play_text) + likes = convert_number_text(like_text) + comments = convert_number_text(comment_text) + shares = convert_number_text(share_text) + + # 构建新的内容数据 + new_content = { + "account_id": self.account_id, + "title": main_title, + "thumb_base64": thumb_base64, + "publish_time": publish_time.strip(), + "status": status.strip(), + "plays": plays, + "likes": likes, + "comments": comments, + "shares": shares, + "tags": tags, + "mentions": mentions + } + + # 从数据库获取已存在的内容 + existing_content = self.db.get_video_content_by_title(self.account_id, main_title) + + # 使用去重工具判断 + dedup = ContentDeduplication() + if existing_content: + if dedup.is_content_duplicate(new_content, existing_content): + # 检查是否需要更新 + if dedup.should_update_content(new_content, existing_content): + # 更新内容 + video_id = self.db.update_video_content( + existing_content['id'], + **new_content + ) + logger.info(f"更新视频内容: {main_title}") + else: + logger.info(f"内容无变化,跳过: {main_title}") + return None + else: + # 内容不重复,作为新内容保存 + video_id = self.db.add_video_content(**new_content) + logger.info(f"保存新视频内容: {main_title}") + else: + # 不存在,直接保存 + video_id = self.db.add_video_content(**new_content) + logger.info(f"保存新视频内容: {main_title}") + + # 为了保持返回数据的一致性,重新构建带有 stats 的内容 + return_content = { + **new_content, + "id": video_id, + "stats": { + "plays": plays, + "likes": likes, + "comments": comments, + "shares": shares + } + } + del return_content["plays"] + del return_content["likes"] + del return_content["comments"] + del return_content["shares"] + + return return_content + + except Exception as e: + logger.error(f"保存视频内容失败: {str(e)}") + return None + + except Exception as e: + logger.error(f"提取视频信息失败: {str(e)}") + return None + + async def _goto_next_page(self) -> bool: + """ + 翻到下一页 + + Returns: + bool: 是否成功翻页 + """ + try: + # 获取当前页码 + current_page = await self.get_current_page() + total_pages = await self.get_total_pages() + + # 如果已经是最后一页,返回False + if current_page >= total_pages: + logger.info("已经是最后一页") + return False + + # 检查下一页按钮 + next_button = await self.page.query_selector( + ".weui-desktop-pagination__nav .weui-desktop-btn_mini:has-text('下一页')" + ) + if not next_button: + logger.error("未找到下一页按钮") + return False + + # 检查按钮是否可点击 + button_class = await next_button.get_attribute("class") + if "weui-desktop-btn_disabled" in (button_class or ""): + logger.error("下一页按钮已禁用") + return False + + # 点击前确保按钮可见且可点击 + await next_button.scroll_into_view_if_needed() + await next_button.click() + + # 等待页面加载完成 + await self.page.wait_for_load_state("networkidle") + + # 等待新页面加载 + try: + await self.page.wait_for_selector(".post-feed-item", timeout=20000) + except Exception as e: + logger.error(f"等待新页面加载失败: {str(e)}") + return False + + # 验证页码是否改变 + new_page = await self.get_current_page() + if new_page <= current_page: + logger.error(f"页码未增加: {current_page} -> {new_page}") + return False + + # 检查页面状态 + if not await self.check_page_status(): + return False + + return True + + except Exception as e: + logger.error(f"翻页失败: {str(e)}") + return False + + async def _goto_prev_page(self) -> bool: + """ + 翻到上一页 + + Returns: + bool: 是否成功翻页 + """ + try: + # 获取当前页码 + current_page = await self.get_current_page() + + # 如果已经是第一页,返回False + if current_page <= 1: + logger.info("已经是第一页") + return False + + # 检查上一页按钮 + prev_button = await self.page.query_selector( + ".weui-desktop-pagination__nav .weui-desktop-btn_mini:has-text('上一页')" + ) + if not prev_button: + logger.error("未找到上一页按钮") + return False + + # 检查按钮是否可点击 + button_class = await prev_button.get_attribute("class") + if "weui-desktop-btn_disabled" in (button_class or ""): + logger.error("上一页按钮已禁用") + return False + + # 点击前确保按钮可见且可点击 + await prev_button.scroll_into_view_if_needed() + await prev_button.click() + + # 等待页面加载完成 + await self.page.wait_for_load_state("networkidle") + + # 等待新页面加载 + try: + await self.page.wait_for_selector(".post-feed-item", timeout=20000) + except Exception as e: + logger.error(f"等待新页面加载失败: {str(e)}") + return False + + # 验证页码是否改变 + new_page = await self.get_current_page() + if new_page >= current_page: + logger.error(f"页码未减少: {current_page} -> {new_page}") + return False + + # 检查页面状态 + if not await self.check_page_status(): + return False + + return True + + except Exception as e: + logger.error(f"翻页失败: {str(e)}") + return False \ No newline at end of file diff --git a/uploader/tencent_uploader/modules/cookie.py b/uploader/tencent_uploader/modules/cookie.py new file mode 100644 index 00000000..eed74952 --- /dev/null +++ b/uploader/tencent_uploader/modules/cookie.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- +from pathlib import Path +from playwright.async_api import async_playwright +from utils.log import tencent_logger +from utils.social_media_db import SocialMediaDB +from utils.base_social_media import set_init_script +from .account import get_account_info + +async def get_tencent_cookie(account_file): + """ + 异步获取腾讯的cookie,通过启动浏览器并手动登录实现。 + """ + try: + async with async_playwright() as playwright: + options = { + 'args': ['--lang en-GB'], + 'headless': False, + } + browser = await playwright.chromium.launch(**options) + context = await browser.new_context() + context = await set_init_script(context) + page = await context.new_page() + + # 访问登录页面 + await page.goto("https://channels.weixin.qq.com") + + # 等待登录成功 + tencent_logger.info("[+]等待扫码登录...") + try: + # 等待登录成功后的重定向 + await page.wait_for_url("https://channels.weixin.qq.com/platform", timeout=120000) # 2分钟超时 + except Exception as e: + tencent_logger.error("[+]登录超时,请在2分钟内完成扫码") + return None + + # 确保页面完全加载 + await page.wait_for_load_state('networkidle') + + # 获取账号信息 + account_info = await get_account_info(page) + if account_info: + # 创建以账号昵称命名的cookie文件 + cookie_dir = Path(account_file).parent + cookie_file = cookie_dir / f"{account_info['nickname']}.json" + + # 保存cookie + await context.storage_state(path=str(cookie_file)) + tencent_logger.success(f'[+]成功获取账号 {account_info["nickname"]} 的cookie') + tencent_logger.info(f' [-]视频数: {account_info["video_count"]}') + tencent_logger.info(f' [-]粉丝数: {account_info["follower_count"]}') + tencent_logger.info(f' [-]视频号ID: {account_info["id"]}') + + # 添加cookie到数据库 + try: + db = SocialMediaDB() + db.add_cookie("tencent", account_info['id'], str(cookie_file)) + db.close() + except Exception as e: + tencent_logger.error(f"添加cookie到数据库失败: {str(e)}") + + return str(cookie_file) + else: + tencent_logger.error("[+]未能获取账号信息") + return None + + except Exception as e: + tencent_logger.error(f"获取cookie失败: {str(e)}") + return None + finally: + try: + await browser.close() + except: + pass \ No newline at end of file diff --git a/uploader/tencent_uploader/modules/utils.py b/uploader/tencent_uploader/modules/utils.py new file mode 100644 index 00000000..d5775f78 --- /dev/null +++ b/uploader/tencent_uploader/modules/utils.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +def format_str_for_short_title(origin_title: str) -> str: + """ + 格式化标题字符串,用于生成短标题。 + + 该函数会移除标题中的非字母数字字符和特定的特殊字符,然后根据长度要求进行截断或填充, + 以生成符合规范的短标题。 + + 参数: + origin_title: 原始标题字符串。 + + 返回值: + 格式化后的短标题字符串。 + """ + # 定义允许的特殊字符 + allowed_special_chars = "《》" "+?%°" + + # 移除不允许的特殊字符 + filtered_chars = [ + ( + char + if char.isalnum() or char in allowed_special_chars + else " " if char == "," else "" + ) + for char in origin_title + ] + formatted_string = "".join(filtered_chars) + + # 调整字符串长度 + if len(formatted_string) > 16: + # 截断字符串 + formatted_string = formatted_string[:16] + elif len(formatted_string) < 6: + # 使用空格来填充字符串 + formatted_string += " " * (6 - len(formatted_string)) + + return formatted_string \ No newline at end of file diff --git a/uploader/tencent_uploader/modules/video.py b/uploader/tencent_uploader/modules/video.py new file mode 100644 index 00000000..8118992e --- /dev/null +++ b/uploader/tencent_uploader/modules/video.py @@ -0,0 +1,484 @@ +# -*- coding: utf-8 -*- +from datetime import datetime +import asyncio +from pathlib import Path + +from playwright.async_api import async_playwright +from utils.log import tencent_logger +from utils.base_social_media import set_init_script +from conf import LOCAL_CHROME_PATH +from .account import get_account_info +from .utils import format_str_for_short_title + +def format_str_for_short_title(origin_title: str) -> str: + """ + 格式化标题字符串,用于生成短标题。 + + 该函数会移除标题中的非字母数字字符和特定的特殊字符,然后根据长度要求进行截断或填充, + 以生成符合规范的短标题。 + + 参数: + origin_title: 原始标题字符串。 + + 返回值: + 格式化后的短标题字符串。 + """ + # 定义允许的特殊字符 + allowed_special_chars = "《》" "+?%°" + + # 移除不允许的特殊字符 + filtered_chars = [ + ( + char + if char.isalnum() or char in allowed_special_chars + else " " if char == "," else "" + ) + for char in origin_title + ] + formatted_string = "".join(filtered_chars) + + # 调整字符串长度 + if len(formatted_string) > 16: + # 截断字符串 + formatted_string = formatted_string[:16] + elif len(formatted_string) < 6: + # 使用空格来填充字符串 + formatted_string += " " * (6 - len(formatted_string)) + + return formatted_string + +class TencentVideo: + def __init__( + self, + title, + file_path, + tags, + publish_date: datetime, + account_files: list, + category=None, + cover_path=None, + friends=None, + location=None, + ): + self.title = title # 视频标题 + self.file_path = file_path + self.tags = tags + self.publish_date = publish_date + self.account_files = account_files + self.category = category + self.cover_path = cover_path # 添加封面路径 + self.friends = friends or [] # 添加好友列表 + self.location = location # 添加位置信息 + self.local_executable_path = LOCAL_CHROME_PATH + + async def set_schedule_time_tencent(self, page, publish_date): + """设置腾讯视频的定时发布时间""" + # 点击定时发布 + label_element = page.locator("label").filter(has_text="定时").nth(1) + await label_element.click() + + await page.click('input[placeholder="请选择发表时间"]') + + str_month = ( + str(publish_date.month) + if publish_date.month > 9 + else "0" + str(publish_date.month) + ) + current_month = str_month + "月" + # 获取当前的月份 + page_month = await page.inner_text( + 'span.weui-desktop-picker__panel__label:has-text("月")' + ) + + # 检查当前月份是否与目标月份相同 + if page_month != current_month: + await page.click("button.weui-desktop-btn__icon__right") + + # 获取页面元素 + elements = await page.query_selector_all("table.weui-desktop-picker__table a") + + # 遍历元素并点击匹配的元素 + for element in elements: + if "weui-desktop-picker__disabled" in await element.evaluate( + "el => el.className" + ): + continue + text = await element.inner_text() + if text.strip() == str(publish_date.day): + await element.click() + break + + # 输入小时部分 + await page.click('input[placeholder="请选择时间"]') + await page.keyboard.press("Control+KeyA") + await page.keyboard.type(str(publish_date.hour)) + + # 选择标题栏(令定时时间生效) + await page.locator("div.input-editor").click() + + async def handle_upload_error(self, page): + """处理视频上传错误""" + tencent_logger.info("视频出错了,重新上传中") + await page.locator( + 'div.media-status-content div.tag-inner:has-text("删除")' + ).click() + await page.get_by_role("button", name="删除", exact=True).click() + file_input = page.locator('input[type="file"]') + await file_input.set_input_files(self.file_path) + + async def detect_upload_status(self, page, account_name: str): + """检测视频上传状态""" + max_retries = 180 # 最大等待时间30分钟 (180 * 10秒) + retry_count = 0 + + while retry_count < max_retries: + try: + # 检查是否出现错误状态 + error_msg = page.locator("div.status-msg.error") + delete_button = page.locator( + 'div.media-opr div.finder-tag-wrap div.tag-inner:has-text("删除")' + ) + + if await error_msg.count() and await delete_button.count(): + tencent_logger.error(f"[{account_name}] 发现上传出错了...准备重试") + await self.handle_upload_error(page) + return False + + # 检查发表按钮是否可用 + publish_button = page.get_by_role("button", name="发表") + if await publish_button.count(): + button_class = await publish_button.get_attribute("class") + if "weui-desktop-btn_disabled" not in button_class: + # 检查封面更换按钮是否可用 + cover_button = page.locator( + 'div.finder-tag-wrap.btn:not(.disabled) div.tag-inner:has-text("更换封面")' + ) + if await cover_button.count(): + tencent_logger.info( + f"[{account_name}] 视频上传完毕,封面按钮可用" + ) + return True + + tencent_logger.info(f"[{account_name}] 正在上传视频中...") + await asyncio.sleep(5) # 每5秒检查一次 + retry_count += 1 + + except Exception as e: + tencent_logger.error(f" [-]检测上传状态出错: {str(e)}") + await asyncio.sleep(10) + retry_count += 1 + + tencent_logger.error(f" [-]视频上传超时") + return False + + async def upload_cover(self, page, account_name: str) -> None: + """上传视频封面""" + if not self.cover_path: + return + + try: + # 等待封面上传按钮出现,使用更精确的定位器 + cover_button = page.locator( + 'div.finder-tag-wrap.btn:not(.disabled) div.tag-inner:has-text("更换封面")' + ) + if await cover_button.count(): + await cover_button.click() + + # 等待编辑视频封面对话框出现 + edit_cover_dialog = page.locator( + 'h3.weui-desktop-dialog__title:has-text("编辑视频封面")' + ) + await edit_cover_dialog.wait_for() + + # 等待并点击上传封面按钮 + upload_button = page.locator( + "div[data-v-5fa289d1].img-wrap.initial-wrap" + ) + await upload_button.wait_for() + + # 使用新的文件选择器API + async with page.expect_file_chooser() as fc_info: + await upload_button.click() + file_chooser = await fc_info.value + await file_chooser.set_files(self.cover_path) + + # 等待裁剪对话框出现 + crop_dialog = page.locator( + 'h3.weui-desktop-dialog__title:has-text("裁剪封面图")' + ) + await crop_dialog.wait_for() + + # 点击裁剪对话框的确定按钮 + confirm_button = page.locator( + 'div.weui-desktop-btn_wrp button.weui-desktop-btn_primary:has-text("确定")' + ) + await confirm_button.click() + + # 等待编辑封面对话框的确认按钮出现 + await page.wait_for_selector("div.cover-set-footer") + + # 点击最终的确认按钮 + final_confirm = page.locator( + "div.cover-set-footer button.weui-desktop-btn_primary" + ) + await final_confirm.click() + + tencent_logger.info(f"[{account_name}] 封面上传成功") + # 等待封面上传完成 + await asyncio.sleep(2) + + except Exception as e: + tencent_logger.error(f"[{account_name}] 封面上传失败: {str(e)}") + + async def upload_single(self, account_file: str) -> None: + """单个账号的上传流程""" + async with async_playwright() as playwright: + try: + # 启动浏览器 + browser = await playwright.chromium.launch( + headless=False, executable_path=self.local_executable_path + ) + context = await browser.new_context(storage_state=str(account_file)) + context = await set_init_script(context) + page = await context.new_page() + + # 先访问主页获取账号信息 + await page.goto("https://channels.weixin.qq.com/platform/home") + await page.wait_for_load_state("networkidle") + + # 获取账号信息 + account_info = await get_account_info(page) + if not account_info: + raise Exception("无法获取账号信息") + + account_name = account_info["nickname"] + + # 访问发布页面 + await page.goto("https://channels.weixin.qq.com/platform/post/create") + tencent_logger.info(f"[+][{account_name}] 开始上传视频-------{self.title}") + await page.wait_for_url("https://channels.weixin.qq.com/platform/post/create") + + # 上传视频文件 + file_input = page.locator('input[type="file"]') + await file_input.set_input_files(self.file_path) + + # 填充标题和话题 + await self.add_title_tags(page, account_name) + # 添加位置信息 + await self.add_location(page, self.location, account_name) + # 设置定时发布时间 + if self.publish_date != 0: + await self.set_schedule_time_tencent(page, self.publish_date) + # 添加短标题 + await self.add_short_title(page) + # 原创选择 + await self.add_original(page) + + # 等待视频上传完成 + upload_success = await self.detect_upload_status(page, account_name) + + if upload_success: + await self.upload_cover(page, account_name) + await self.click_publish(page, account_name) + # 直接更新原cookie文件 + await context.storage_state(path=str(account_file)) + tencent_logger.success(f" [-]账号 {account_name} cookie已更新!") + + await asyncio.sleep(2) + await context.close() + + except Exception as e: + tencent_logger.error(f"账号 {Path(account_file).stem} 上传失败: {str(e)}") + finally: + if "browser" in locals(): + await browser.close() + + async def main(self): + """主入口方法,并发处理多个账号的上传""" + tasks = [] + for account_file in self.account_files: + task = asyncio.create_task(self.upload_single(account_file)) + tasks.append(task) + await asyncio.gather(*tasks) + + async def add_short_title(self, page): + """添加短标题""" + short_title_element = ( + page.get_by_text("短标题", exact=True) + .locator("..") + .locator("xpath=following-sibling::div") + .locator('span input[type="text"]') + ) + + if await short_title_element.count(): + short_title = format_str_for_short_title(self.title) + await short_title_element.fill(short_title) + + async def click_publish(self, page, account_name: str): + """点击发表""" + max_retries = 30 # 最多尝试30次 + retry_count = 0 + + while retry_count < max_retries: + try: + publish_button = page.locator('div.form-btns button:has-text("发表")') + if await publish_button.count(): + await publish_button.click() + + # 等待页面跳转,增加超时时间到30秒 + try: + await page.wait_for_url( + "https://channels.weixin.qq.com/platform/post/list", + timeout=30000, # 30秒超时 + wait_until="networkidle", # 等待网络请求完成 + ) + tencent_logger.success(f" [-]账号 {account_name} 视频发布成功") + return + except Exception as e: + # 检查当前URL,即使超时也可能已经成功 + if ( + "https://channels.weixin.qq.com/platform/post/list" + in page.url + ): + tencent_logger.success( + f" [-]账号 {account_name} 视频发布成功" + ) + return + + tencent_logger.info( + f" [-]账号 {account_name} 视频正在发布中..." + ) + await asyncio.sleep(2) # 增加等待时间 + retry_count += 1 + + except Exception as e: + tencent_logger.error(f" [-]账号 {account_name} 发布出错: {str(e)}") + await asyncio.sleep(2) + retry_count += 1 + + tencent_logger.error(f" [-]账号 {account_name} 发布超时") + + async def add_title_tags(self, page, account_name: str): + """添加标题、话题和好友标记""" + # 步骤1: 添加标题 + await page.locator("div.input-editor").click() + await page.keyboard.type(self.title) + await page.keyboard.press("Enter") + + # 步骤2: 添加标签 + if self.tags: + for tag in self.tags: + await page.keyboard.type("#" + tag) + await page.keyboard.press("Space") + tencent_logger.info(f"[{account_name}] 成功添加话题: {len(self.tags)}") + + # 步骤3: 添加好友标记 + if self.friends: + for friend in self.friends: + await page.keyboard.type("@" + friend) + await page.keyboard.press("Space") + tencent_logger.info(f"[{account_name}] 成功@好友: {len(self.friends)}") + + async def add_collection(self, page): + """添加合集""" + collection_elements = ( + page.get_by_text("添加到合集") + .locator("xpath=following-sibling::div") + .locator(".option-list-wrap > div") + ) + + if await collection_elements.count() > 1: + await page.get_by_text("添加到合集").locator( + "xpath=following-sibling::div" + ).click() + await collection_elements.first.click() + + async def add_original(self, page): + """添加原创""" + if await page.get_by_label("视频为原创").count(): + await page.get_by_label("视频为原创").check() + + # 检查 "我已阅读并同意 《视频号原创声明使用条款》" 元素是否存在 + label_locator = await page.locator( + 'label:has-text("我已阅读并同意 《视频号原创声明使用条款》")' + ).is_visible() + if label_locator: + await page.get_by_label("我已阅读并同意 《视频号原创声明使用条款》").check() + await page.get_by_role("button", name="声明原创").click() + # 2023年11月20日 wechat更新: 可能新账号或者改版账号,出现新的选择页面 + if ( + await page.locator('div.label span:has-text("声明原创")').count() + and self.category + ): + # 因处罚无法勾选原创,故先判断是否可用 + if not await page.locator( + "div.declare-original-checkbox input.ant-checkbox-input" + ).is_disabled(): + await page.locator( + "div.declare-original-checkbox input.ant-checkbox-input" + ).click() + if not await page.locator( + "div.declare-original-dialog label.ant-checkbox-wrapper.ant-checkbox-wrapper-checked:visible" + ).count(): + await page.locator( + "div.declare-original-dialog input.ant-checkbox-input:visible" + ).click() + if await page.locator( + 'div.original-type-form > div.form-label:has-text("原创类型"):visible' + ).count(): + await page.locator("div.form-content:visible").click() # 下拉菜单 + await page.locator( + f'div.form-content:visible ul.weui-desktop-dropdown__list li.weui-desktop-dropdown__list-ele:has-text("{self.category}")' + ).first.click() + await page.wait_for_timeout(1000) + if await page.locator('button:has-text("声明原创"):visible').count(): + await page.locator('button:has-text("声明原创"):visible').click() + + async def add_location( + self, page, location: str = None, account_name: str = None + ) -> None: + """添加位置信息""" + try: + # 点击位置选择区域 + position_display = page.locator("div.position-display-wrap") + if await position_display.count(): + await position_display.click() + + if location is None: + # 选择"不显示位置"选项 + no_location = page.locator( + 'div.location-item:has-text("不显示位置")' + ) + if await no_location.count(): + await no_location.click() + tencent_logger.info(f"[{account_name}] 已设置为不显示位置") + else: + # 等待位置搜索框出现 + search_input = page.locator( + 'input.weui-desktop-form__input[placeholder*="搜索"]' + ) + await search_input.wait_for() + + # 输入位置并等待 + await search_input.fill(location) + await asyncio.sleep(1) # 等待搜索结果 + + # 选择第一个搜索结果 + first_result = page.locator("div.location-item").first + if await first_result.count(): + await first_result.click() + tencent_logger.info( + f"[{account_name}] 成功添加位置: {location}" + ) + else: + tencent_logger.warning( + f"[{account_name}] 未找到位置: {location}" + ) + + except Exception as e: + tencent_logger.error(f"[{account_name}] 添加位置失败: {str(e)}") + + @staticmethod + async def get_account_info(page) -> dict: + """代理到全局get_account_info函数""" + return await get_account_info(page) \ No newline at end of file diff --git a/uploader/tencent_uploader/test_crawler.py b/uploader/tencent_uploader/test_crawler.py new file mode 100644 index 00000000..319e8ca3 --- /dev/null +++ b/uploader/tencent_uploader/test_crawler.py @@ -0,0 +1,189 @@ +""" +测试视频号内容抓取功能 +""" +import asyncio +import json +import sys +from pathlib import Path + +# 添加项目根目录到Python路径 +ROOT_DIR = Path(__file__).parent.parent.parent +sys.path.append(str(ROOT_DIR)) + +from uploader.tencent_uploader.modules.content_crawler import VideoContentCrawler +from uploader.tencent_uploader.modules.account import cookie_auth +from utils.social_media_db import SocialMediaDB +from utils.video_content_db import VideoContentDB +from utils.log import tencent_logger +from typing import Dict, Any, Optional, List + +async def main() -> None: + # 从数据库获取所有腾讯视频号账号信息 + db = SocialMediaDB() + video_db = VideoContentDB() + crawler = None # 初始化crawler变量 + + try: + tencent_logger.info("开始获取账号信息...") + accounts: List[Dict[str, Any]] = db.get_all_accounts("tencent") + + if not accounts: + tencent_logger.error("未找到任何腾讯视频号账号") + return + + tencent_logger.info(f"找到 {len(accounts)} 个腾讯视频号账号") + + # 遍历所有账号 + for account in accounts: + try: + tencent_logger.info(f"\n=== 开始处理账号: {account['nickname']} ===") + + # 获取最新的cookie文件路径 + cookie_paths: List[str] = account.get('cookie_paths', []) + if not cookie_paths: + tencent_logger.error(f"账号 {account['nickname']} 没有可用的cookie文件,跳过") + continue + + cookie_file: str = cookie_paths[0] # 使用最新的cookie文件 + tencent_logger.info(f"使用cookie文件: {cookie_file}") + + # 检查账号上次验证时间 + from datetime import datetime, timedelta + + last_check = db.get_account_verification_time("tencent", account['nickname']) + current_time = datetime.now() + + if last_check: + # 将字符串转换为datetime对象 + last_check_time = datetime.strptime(last_check, "%Y-%m-%d %H:%M:%S.%f") + time_diff = current_time - last_check_time + + if time_diff <= timedelta(hours=1): + tencent_logger.info(f"账号在1小时内已验证过 (上次验证时间: {last_check}), 跳过cookie验证") + cookie_valid = True + else: + tencent_logger.info(f"距离上次验证已超过1小时 (上次验证时间: {last_check}), 需要重新验证cookie") + cookie_valid = False + else: + tencent_logger.info("账号未记录验证时间,需要验证cookie") + cookie_valid = False + + # 验证cookie是否有效 + if not cookie_valid: + tencent_logger.info("开始验证cookie...") + try: + await cookie_auth(cookie_file) + tencent_logger.info("cookie验证通过") + except Exception as e: + tencent_logger.error(f"账号 {account['nickname']} cookie验证失败: {str(e)}") + continue + else: + tencent_logger.info("cookie已验证通过,跳过验证") + + # 获取已爬取的视频数量 + video_count = video_db.get_video_count(account['id']) + tencent_logger.info(f"账号 {account['nickname']} 已保存 {video_count} 个视频") + + tencent_logger.info("开始初始化爬虫...") + try: + tencent_logger.info("=== 进入async with块 ===") + async with VideoContentCrawler( + account_file=cookie_file, + account_id=account['id'], + headless=False, # 显示浏览器窗口,方便调试 + save_thumb_as_base64=True, # 保存封面图片 + min_delay=3.0, # 最小延迟3秒 + max_delay=7.0, # 最大延迟7秒 + max_retries=3, # 最大重试次数 + reverse=True # 倒序爬取,优先获取最新内容 + ) as crawler: + tencent_logger.info("=== async with块初始化完成 ===") + tencent_logger.info("开始调用get_video_list方法...") + try: + # 获取所有未爬取的视频 + tencent_logger.info("准备调用get_video_list...") + videos = await crawler.get_video_list(max_pages=-1) + tencent_logger.info("get_video_list方法调用完成") + except Exception as e: + tencent_logger.error(f"get_video_list方法执行失败: {str(e)}") + import traceback + tencent_logger.error(f"get_video_list错误堆栈:\n{traceback.format_exc()}") + continue # 继续处理下一个账号 + + tencent_logger.info("=== 开始处理get_video_list结果 ===") + + # 打印结果 + tencent_logger.info(f"\n账号 {account['nickname']} 本次新增 {len(videos)} 个视频:") + tencent_logger.info("-" * 50) + + for idx, video in enumerate(videos, 1): + tencent_logger.info(f"\n视频 {idx}:") + tencent_logger.info(f"标题: {video['title']}") + tencent_logger.info(f"标签: #{' #'.join(video['tags'])}" if video['tags'] else "标签: 无") + tencent_logger.info(f"艾特用户: @{' @'.join(video['mentions'])}" if video['mentions'] else "艾特用户: 无") + tencent_logger.info(f"发布时间: {video['publish_time']}") + tencent_logger.info(f"状态: {video['status']}") + tencent_logger.info(f"播放量: {video['stats']['plays']}") + tencent_logger.info(f"点赞数: {video['stats']['likes']}") + tencent_logger.info(f"评论数: {video['stats']['comments']}") + tencent_logger.info(f"分享数: {video['stats']['shares']}") + tencent_logger.info(f"封面图片: {'已保存为base64' if video['thumb_base64'] else '获取失败'}") + tencent_logger.info("-" * 50) + + # 保存结果到文件 + if videos: + output_file = Path(f"data/video_list_{account['nickname']}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json") + output_file.parent.mkdir(parents=True, exist_ok=True) + + with open(output_file, "w", encoding="utf-8") as f: + json.dump(videos, f, ensure_ascii=False, indent=2) + + tencent_logger.info(f"\n结果已保存到: {output_file}") + + # 显示最终统计 + final_count = video_db.get_video_count(account['id']) + tencent_logger.info(f"\n账号 {account['nickname']} 统计信息:") + tencent_logger.info("-" * 50) + tencent_logger.info(f"原有视频数: {video_count}") + tencent_logger.info(f"新增视频数: {len(videos)}") + tencent_logger.info(f"当前总数: {final_count}") + tencent_logger.info(f"预计页数: {(final_count + crawler.VIDEOS_PER_PAGE - 1) // crawler.VIDEOS_PER_PAGE}") + + except Exception as e: + tencent_logger.error(f"账号 {account['nickname']} 爬虫初始化或执行过程中出错: {str(e)}") + import traceback + tencent_logger.error(f"错误堆栈:\n{traceback.format_exc()}") + continue # 继续处理下一个账号 + + except Exception as e: + tencent_logger.error(f"处理账号 {account['nickname']} 时出现错误: {str(e)}") + import traceback + tencent_logger.error(f"错误堆栈:\n{traceback.format_exc()}") + continue # 继续处理下一个账号 + + except Exception as e: + tencent_logger.error(f"爬取过程中出现错误: {str(e)}") + import traceback + tencent_logger.error(f"错误堆栈:\n{traceback.format_exc()}") + finally: + # 确保资源正确关闭 + tencent_logger.info("开始清理资源...") + if crawler: + try: + tencent_logger.info("关闭爬虫资源...") + await crawler.close() + tencent_logger.info("爬虫资源已关闭") + except Exception as e: + tencent_logger.error(f"关闭爬虫资源时出错: {str(e)}") + + tencent_logger.info("关闭数据库连接...") + try: + db.close() + video_db.close() + tencent_logger.info("数据库连接已关闭") + except Exception as e: + tencent_logger.error(f"关闭数据库连接时出错: {str(e)}") + +if __name__ == "__main__": + tencent_logger.info("启动视频号内容抓取程序...") + asyncio.run(main()) \ No newline at end of file diff --git a/uploader/tencent_uploader/video_processor.py b/uploader/tencent_uploader/video_processor.py new file mode 100644 index 00000000..c17fea9b --- /dev/null +++ b/uploader/tencent_uploader/video_processor.py @@ -0,0 +1,12 @@ +class VideoPreprocessor: + @staticmethod + def add_watermark(input_path, output_path): + """使用FFmpeg添加水印""" + + @staticmethod + def generate_thumbnail(video_path): + """自动生成视频封面""" + + @staticmethod + def compress_video(input_path, output_path): + """视频压缩处理""" \ No newline at end of file diff --git a/uploader/tk_uploader/__init__.py b/uploader/tk_uploader/__init__.py index 262e9042..96c20e84 100644 --- a/uploader/tk_uploader/__init__.py +++ b/uploader/tk_uploader/__init__.py @@ -1,5 +1,5 @@ -from pathlib import Path - -from conf import BASE_DIR - +from pathlib import Path + +from conf import BASE_DIR + Path(BASE_DIR / "cookies" / "tk_uploader").mkdir(exist_ok=True) \ No newline at end of file diff --git a/uploader/tk_uploader/main.py b/uploader/tk_uploader/main.py index 6a9500d5..38891b88 100644 --- a/uploader/tk_uploader/main.py +++ b/uploader/tk_uploader/main.py @@ -1,265 +1,265 @@ -# -*- coding: utf-8 -*- -import re -from datetime import datetime - -from playwright.async_api import Playwright, async_playwright -import os -import asyncio -from uploader.tk_uploader.tk_config import Tk_Locator -from utils.base_social_media import set_init_script -from utils.files_times import get_absolute_path -from utils.log import tiktok_logger - - -async def cookie_auth(account_file): - async with async_playwright() as playwright: - browser = await playwright.firefox.launch(headless=True) - context = await browser.new_context(storage_state=account_file) - context = await set_init_script(context) - # 创建一个新的页面 - page = await context.new_page() - # 访问指定的 URL - await page.goto("https://www.tiktok.com/tiktokstudio/upload?lang=en") - await page.wait_for_load_state('networkidle') - try: - # 选择所有的 select 元素 - select_elements = await page.query_selector_all('select') - for element in select_elements: - class_name = await element.get_attribute('class') - # 使用正则表达式匹配特定模式的 class 名称 - if re.match(r'tiktok-.*-SelectFormContainer.*', class_name): - tiktok_logger.error("[+] cookie expired") - return False - tiktok_logger.success("[+] cookie valid") - return True - except: - tiktok_logger.success("[+] cookie valid") - return True - - -async def tiktok_setup(account_file, handle=False): - account_file = get_absolute_path(account_file, "tk_uploader") - if not os.path.exists(account_file) or not await cookie_auth(account_file): - if not handle: - return False - tiktok_logger.info('[+] cookie file is not existed or expired. Now open the browser auto. Please login with your way(gmail phone, whatever, the cookie file will generated after login') - await get_tiktok_cookie(account_file) - return True - - -async def get_tiktok_cookie(account_file): - async with async_playwright() as playwright: - options = { - 'args': [ - '--lang en-GB', - ], - 'headless': False, # Set headless option here - } - # Make sure to run headed. - browser = await playwright.firefox.launch(**options) - # Setup context however you like. - context = await browser.new_context() # Pass any options - context = await set_init_script(context) - # Pause the page, and start recording manually. - page = await context.new_page() - await page.goto("https://www.tiktok.com/login?lang=en") - await page.pause() - # 点击调试器的继续,保存cookie - await context.storage_state(path=account_file) - - -class TiktokVideo(object): - def __init__(self, title, file_path, tags, publish_date, account_file): - self.title = title - self.file_path = file_path - self.tags = tags - self.publish_date = publish_date - self.account_file = account_file - self.locator_base = None - - - async def set_schedule_time(self, page, publish_date): - schedule_input_element = self.locator_base.get_by_label('Schedule') - await schedule_input_element.wait_for(state='visible') # 确保按钮可见 - - await schedule_input_element.click() - scheduled_picker = self.locator_base.locator('div.scheduled-picker') - await scheduled_picker.locator('div.TUXInputBox').nth(1).click() - - calendar_month = await self.locator_base.locator('div.calendar-wrapper span.month-title').inner_text() - - n_calendar_month = datetime.strptime(calendar_month, '%B').month - - schedule_month = publish_date.month - - if n_calendar_month != schedule_month: - if n_calendar_month < schedule_month: - arrow = self.locator_base.locator('div.calendar-wrapper span.arrow').nth(-1) - else: - arrow = self.locator_base.locator('div.calendar-wrapper span.arrow').nth(0) - await arrow.click() - - # day set - valid_days_locator = self.locator_base.locator( - 'div.calendar-wrapper span.day.valid') - valid_days = await valid_days_locator.count() - for i in range(valid_days): - day_element = valid_days_locator.nth(i) - text = await day_element.inner_text() - if text.strip() == str(publish_date.day): - await day_element.click() - break - # time set - await scheduled_picker.locator('div.TUXInputBox').nth(0).click() - - hour_str = publish_date.strftime("%H") - correct_minute = int(publish_date.minute / 5) - minute_str = f"{correct_minute:02d}" - - hour_selector = f"span.tiktok-timepicker-left:has-text('{hour_str}')" - minute_selector = f"span.tiktok-timepicker-right:has-text('{minute_str}')" - - # pick hour first - await self.locator_base.locator(hour_selector).click() - # click time button again - # 等待某个特定的元素出现或状态变化,表明UI已更新 - await page.wait_for_timeout(1000) # 等待500毫秒 - await scheduled_picker.locator('div.TUXInputBox').nth(0).click() - # pick minutes after - await self.locator_base.locator(minute_selector).click() - - # click title to remove the focus. - await self.locator_base.locator("h1:has-text('Upload video')").click() - - async def handle_upload_error(self, page): - tiktok_logger.info("video upload error retrying.") - select_file_button = self.locator_base.locator('button[aria-label="Select file"]') - async with page.expect_file_chooser() as fc_info: - await select_file_button.click() - file_chooser = await fc_info.value - await file_chooser.set_files(self.file_path) - - async def upload(self, playwright: Playwright) -> None: - browser = await playwright.firefox.launch(headless=False) - context = await browser.new_context(storage_state=f"{self.account_file}") - context = await set_init_script(context) - page = await context.new_page() - - await page.goto("https://www.tiktok.com/creator-center/upload") - tiktok_logger.info(f'[+]Uploading-------{self.title}.mp4') - - await page.wait_for_url("https://www.tiktok.com/tiktokstudio/upload", timeout=10000) - - try: - await page.wait_for_selector('iframe[data-tt="Upload_index_iframe"], div.upload-container', timeout=10000) - tiktok_logger.info("Either iframe or div appeared.") - except Exception as e: - tiktok_logger.error("Neither iframe nor div appeared within the timeout.") - - await self.choose_base_locator(page) - - upload_button = self.locator_base.locator( - 'button:has-text("Select video"):visible') - await upload_button.wait_for(state='visible') # 确保按钮可见 - - async with page.expect_file_chooser() as fc_info: - await upload_button.click() - file_chooser = await fc_info.value - await file_chooser.set_files(self.file_path) - - await self.add_title_tags(page) - # detact upload status - await self.detect_upload_status(page) - if self.publish_date != 0: - await self.set_schedule_time(page, self.publish_date) - - await self.click_publish(page) - - await context.storage_state(path=f"{self.account_file}") # save cookie - tiktok_logger.info(' [-] update cookie!') - await asyncio.sleep(2) # close delay for look the video status - # close all - await context.close() - await browser.close() - - async def add_title_tags(self, page): - - editor_locator = self.locator_base.locator('div.public-DraftEditor-content') - await editor_locator.click() - - await page.keyboard.press("End") - - await page.keyboard.press("Control+A") - - await page.keyboard.press("Delete") - - await page.keyboard.press("End") - - await page.wait_for_timeout(1000) # 等待1秒 - - await page.keyboard.insert_text(self.title) - await page.wait_for_timeout(1000) # 等待1秒 - await page.keyboard.press("End") - - await page.keyboard.press("Enter") - - # tag part - for index, tag in enumerate(self.tags, start=1): - tiktok_logger.info("Setting the %s tag" % index) - await page.keyboard.press("End") - await page.wait_for_timeout(1000) # 等待1秒 - await page.keyboard.insert_text("#" + tag + " ") - await page.keyboard.press("Space") - await page.wait_for_timeout(1000) # 等待1秒 - - await page.keyboard.press("Backspace") - await page.keyboard.press("End") - - async def click_publish(self, page): - success_flag_div = '#\\:r9\\:' - while True: - try: - publish_button = self.locator_base.locator('div.btn-post') - if await publish_button.count(): - await publish_button.click() - - await self.locator_base.locator(success_flag_div).wait_for(state="visible", timeout=3000) - tiktok_logger.success(" [-] video published success") - break - except Exception as e: - if await self.locator_base.locator(success_flag_div).count(): - tiktok_logger.success(" [-]video published success") - break - else: - tiktok_logger.exception(f" [-] Exception: {e}") - tiktok_logger.info(" [-] video publishing") - await page.screenshot(full_page=True) - await asyncio.sleep(0.5) - - async def detect_upload_status(self, page): - while True: - try: - if await self.locator_base.locator('div.btn-post > button').get_attribute("disabled") is None: - tiktok_logger.info(" [-]video uploaded.") - break - else: - tiktok_logger.info(" [-] video uploading...") - await asyncio.sleep(2) - if await self.locator_base.locator('button[aria-label="Select file"]').count(): - tiktok_logger.info(" [-] found some error while uploading now retry...") - await self.handle_upload_error(page) - except: - tiktok_logger.info(" [-] video uploading...") - await asyncio.sleep(2) - - async def choose_base_locator(self, page): - # await page.wait_for_selector('div.upload-container') - if await page.locator('iframe[data-tt="Upload_index_iframe"]').count(): - self.locator_base = self.locator_base - else: - self.locator_base = page.locator(Tk_Locator.default) - - async def main(self): - async with async_playwright() as playwright: - await self.upload(playwright) - +# -*- coding: utf-8 -*- +import re +from datetime import datetime + +from playwright.async_api import Playwright, async_playwright +import os +import asyncio +from uploader.tk_uploader.tk_config import Tk_Locator +from utils.base_social_media import set_init_script +from utils.files_times import get_absolute_path +from utils.log import tiktok_logger + + +async def cookie_auth(account_file): + async with async_playwright() as playwright: + browser = await playwright.firefox.launch(headless=True) + context = await browser.new_context(storage_state=account_file) + context = await set_init_script(context) + # 创建一个新的页面 + page = await context.new_page() + # 访问指定的 URL + await page.goto("https://www.tiktok.com/tiktokstudio/upload?lang=en") + await page.wait_for_load_state('networkidle') + try: + # 选择所有的 select 元素 + select_elements = await page.query_selector_all('select') + for element in select_elements: + class_name = await element.get_attribute('class') + # 使用正则表达式匹配特定模式的 class 名称 + if re.match(r'tiktok-.*-SelectFormContainer.*', class_name): + tiktok_logger.error("[+] cookie expired") + return False + tiktok_logger.success("[+] cookie valid") + return True + except: + tiktok_logger.success("[+] cookie valid") + return True + + +async def tiktok_setup(account_file, handle=False): + account_file = get_absolute_path(account_file, "tk_uploader") + if not os.path.exists(account_file) or not await cookie_auth(account_file): + if not handle: + return False + tiktok_logger.info('[+] cookie file is not existed or expired. Now open the browser auto. Please login with your way(gmail phone, whatever, the cookie file will generated after login') + await get_tiktok_cookie(account_file) + return True + + +async def get_tiktok_cookie(account_file): + async with async_playwright() as playwright: + options = { + 'args': [ + '--lang en-GB', + ], + 'headless': False, # Set headless option here + } + # Make sure to run headed. + browser = await playwright.firefox.launch(**options) + # Setup context however you like. + context = await browser.new_context() # Pass any options + context = await set_init_script(context) + # Pause the page, and start recording manually. + page = await context.new_page() + await page.goto("https://www.tiktok.com/login?lang=en") + await page.pause() + # 点击调试器的继续,保存cookie + await context.storage_state(path=account_file) + + +class TiktokVideo(object): + def __init__(self, title, file_path, tags, publish_date, account_file): + self.title = title + self.file_path = file_path + self.tags = tags + self.publish_date = publish_date + self.account_file = account_file + self.locator_base = None + + + async def set_schedule_time(self, page, publish_date): + schedule_input_element = self.locator_base.get_by_label('Schedule') + await schedule_input_element.wait_for(state='visible') # 确保按钮可见 + + await schedule_input_element.click() + scheduled_picker = self.locator_base.locator('div.scheduled-picker') + await scheduled_picker.locator('div.TUXInputBox').nth(1).click() + + calendar_month = await self.locator_base.locator('div.calendar-wrapper span.month-title').inner_text() + + n_calendar_month = datetime.strptime(calendar_month, '%B').month + + schedule_month = publish_date.month + + if n_calendar_month != schedule_month: + if n_calendar_month < schedule_month: + arrow = self.locator_base.locator('div.calendar-wrapper span.arrow').nth(-1) + else: + arrow = self.locator_base.locator('div.calendar-wrapper span.arrow').nth(0) + await arrow.click() + + # day set + valid_days_locator = self.locator_base.locator( + 'div.calendar-wrapper span.day.valid') + valid_days = await valid_days_locator.count() + for i in range(valid_days): + day_element = valid_days_locator.nth(i) + text = await day_element.inner_text() + if text.strip() == str(publish_date.day): + await day_element.click() + break + # time set + await scheduled_picker.locator('div.TUXInputBox').nth(0).click() + + hour_str = publish_date.strftime("%H") + correct_minute = int(publish_date.minute / 5) + minute_str = f"{correct_minute:02d}" + + hour_selector = f"span.tiktok-timepicker-left:has-text('{hour_str}')" + minute_selector = f"span.tiktok-timepicker-right:has-text('{minute_str}')" + + # pick hour first + await self.locator_base.locator(hour_selector).click() + # click time button again + # 等待某个特定的元素出现或状态变化,表明UI已更新 + await page.wait_for_timeout(1000) # 等待500毫秒 + await scheduled_picker.locator('div.TUXInputBox').nth(0).click() + # pick minutes after + await self.locator_base.locator(minute_selector).click() + + # click title to remove the focus. + await self.locator_base.locator("h1:has-text('Upload video')").click() + + async def handle_upload_error(self, page): + tiktok_logger.info("video upload error retrying.") + select_file_button = self.locator_base.locator('button[aria-label="Select file"]') + async with page.expect_file_chooser() as fc_info: + await select_file_button.click() + file_chooser = await fc_info.value + await file_chooser.set_files(self.file_path) + + async def upload(self, playwright: Playwright) -> None: + browser = await playwright.firefox.launch(headless=False) + context = await browser.new_context(storage_state=f"{self.account_file}") + context = await set_init_script(context) + page = await context.new_page() + + await page.goto("https://www.tiktok.com/creator-center/upload") + tiktok_logger.info(f'[+]Uploading-------{self.title}.mp4') + + await page.wait_for_url("https://www.tiktok.com/tiktokstudio/upload", timeout=10000) + + try: + await page.wait_for_selector('iframe[data-tt="Upload_index_iframe"], div.upload-container', timeout=10000) + tiktok_logger.info("Either iframe or div appeared.") + except Exception as e: + tiktok_logger.error("Neither iframe nor div appeared within the timeout.") + + await self.choose_base_locator(page) + + upload_button = self.locator_base.locator( + 'button:has-text("Select video"):visible') + await upload_button.wait_for(state='visible') # 确保按钮可见 + + async with page.expect_file_chooser() as fc_info: + await upload_button.click() + file_chooser = await fc_info.value + await file_chooser.set_files(self.file_path) + + await self.add_title_tags(page) + # detact upload status + await self.detect_upload_status(page) + if self.publish_date != 0: + await self.set_schedule_time(page, self.publish_date) + + await self.click_publish(page) + + await context.storage_state(path=f"{self.account_file}") # save cookie + tiktok_logger.info(' [-] update cookie!') + await asyncio.sleep(2) # close delay for look the video status + # close all + await context.close() + await browser.close() + + async def add_title_tags(self, page): + + editor_locator = self.locator_base.locator('div.public-DraftEditor-content') + await editor_locator.click() + + await page.keyboard.press("End") + + await page.keyboard.press("Control+A") + + await page.keyboard.press("Delete") + + await page.keyboard.press("End") + + await page.wait_for_timeout(1000) # 等待1秒 + + await page.keyboard.insert_text(self.title) + await page.wait_for_timeout(1000) # 等待1秒 + await page.keyboard.press("End") + + await page.keyboard.press("Enter") + + # tag part + for index, tag in enumerate(self.tags, start=1): + tiktok_logger.info("Setting the %s tag" % index) + await page.keyboard.press("End") + await page.wait_for_timeout(1000) # 等待1秒 + await page.keyboard.insert_text("#" + tag + " ") + await page.keyboard.press("Space") + await page.wait_for_timeout(1000) # 等待1秒 + + await page.keyboard.press("Backspace") + await page.keyboard.press("End") + + async def click_publish(self, page): + success_flag_div = '#\\:r9\\:' + while True: + try: + publish_button = self.locator_base.locator('div.btn-post') + if await publish_button.count(): + await publish_button.click() + + await self.locator_base.locator(success_flag_div).wait_for(state="visible", timeout=3000) + tiktok_logger.success(" [-] video published success") + break + except Exception as e: + if await self.locator_base.locator(success_flag_div).count(): + tiktok_logger.success(" [-]video published success") + break + else: + tiktok_logger.exception(f" [-] Exception: {e}") + tiktok_logger.info(" [-] video publishing") + await page.screenshot(full_page=True) + await asyncio.sleep(0.5) + + async def detect_upload_status(self, page): + while True: + try: + if await self.locator_base.locator('div.btn-post > button').get_attribute("disabled") is None: + tiktok_logger.info(" [-]video uploaded.") + break + else: + tiktok_logger.info(" [-] video uploading...") + await asyncio.sleep(2) + if await self.locator_base.locator('button[aria-label="Select file"]').count(): + tiktok_logger.info(" [-] found some error while uploading now retry...") + await self.handle_upload_error(page) + except: + tiktok_logger.info(" [-] video uploading...") + await asyncio.sleep(2) + + async def choose_base_locator(self, page): + # await page.wait_for_selector('div.upload-container') + if await page.locator('iframe[data-tt="Upload_index_iframe"]').count(): + self.locator_base = self.locator_base + else: + self.locator_base = page.locator(Tk_Locator.default) + + async def main(self): + async with async_playwright() as playwright: + await self.upload(playwright) + diff --git a/uploader/tk_uploader/main_chrome.py b/uploader/tk_uploader/main_chrome.py index 77c3f08d..26f04490 100644 --- a/uploader/tk_uploader/main_chrome.py +++ b/uploader/tk_uploader/main_chrome.py @@ -1,304 +1,304 @@ -# -*- coding: utf-8 -*- -import re -from datetime import datetime - -from playwright.async_api import Playwright, async_playwright -import os -import asyncio - -from conf import LOCAL_CHROME_PATH -from uploader.tk_uploader.tk_config import Tk_Locator -from utils.base_social_media import set_init_script -from utils.files_times import get_absolute_path -from utils.log import tiktok_logger - - -async def cookie_auth(account_file): - async with async_playwright() as playwright: - browser = await playwright.chromium.launch(headless=True) - context = await browser.new_context(storage_state=account_file) - context = await set_init_script(context) - # 创建一个新的页面 - page = await context.new_page() - # 访问指定的 URL - await page.goto("https://www.tiktok.com/tiktokstudio/upload?lang=en") - await page.wait_for_load_state('networkidle') - try: - # 选择所有的 select 元素 - select_elements = await page.query_selector_all('select') - for element in select_elements: - class_name = await element.get_attribute('class') - # 使用正则表达式匹配特定模式的 class 名称 - if re.match(r'tiktok-.*-SelectFormContainer.*', class_name): - tiktok_logger.error("[+] cookie expired") - return False - tiktok_logger.success("[+] cookie valid") - return True - except: - tiktok_logger.success("[+] cookie valid") - return True - - -async def tiktok_setup(account_file, handle=False): - account_file = get_absolute_path(account_file, "tk_uploader") - if not os.path.exists(account_file) or not await cookie_auth(account_file): - if not handle: - return False - tiktok_logger.info('[+] cookie file is not existed or expired. Now open the browser auto. Please login with your way(gmail phone, whatever, the cookie file will generated after login') - await get_tiktok_cookie(account_file) - return True - - -async def get_tiktok_cookie(account_file): - async with async_playwright() as playwright: - options = { - 'args': [ - '--lang en-GB', - ], - 'headless': False, # Set headless option here - } - # Make sure to run headed. - browser = await playwright.chromium.launch(**options) - # Setup context however you like. - context = await browser.new_context() # Pass any options - context = await set_init_script(context) - # Pause the page, and start recording manually. - page = await context.new_page() - await page.goto("https://www.tiktok.com/login?lang=en") - await page.pause() - # 点击调试器的继续,保存cookie - await context.storage_state(path=account_file) - - -class TiktokVideo(object): - def __init__(self, title, file_path, tags, publish_date, account_file, thumbnail_path=None): - self.title = title - self.file_path = file_path - self.tags = tags - self.publish_date = publish_date - self.thumbnail_path = thumbnail_path - self.account_file = account_file - self.local_executable_path = LOCAL_CHROME_PATH - self.locator_base = None - - async def set_schedule_time(self, page, publish_date): - schedule_input_element = self.locator_base.get_by_label('Schedule') - await schedule_input_element.wait_for(state='visible') # 确保按钮可见 - - await schedule_input_element.click() - if await self.locator_base.locator('div.TUXButton-content >> text=Allow').count(): - await self.locator_base.locator('div.TUXButton-content >> text=Allow').click() - - scheduled_picker = self.locator_base.locator('div.scheduled-picker') - await scheduled_picker.locator('div.TUXInputBox').nth(1).click() - - calendar_month = await self.locator_base.locator( - 'div.calendar-wrapper span.month-title').inner_text() - - n_calendar_month = datetime.strptime(calendar_month, '%B').month - - schedule_month = publish_date.month - - if n_calendar_month != schedule_month: - if n_calendar_month < schedule_month: - arrow = self.locator_base.locator('div.calendar-wrapper span.arrow').nth(-1) - else: - arrow = self.locator_base.locator('div.calendar-wrapper span.arrow').nth(0) - await arrow.click() - - # day set - valid_days_locator = self.locator_base.locator( - 'div.calendar-wrapper span.day.valid') - valid_days = await valid_days_locator.count() - for i in range(valid_days): - day_element = valid_days_locator.nth(i) - text = await day_element.inner_text() - if text.strip() == str(publish_date.day): - await day_element.click() - break - # time set - await scheduled_picker.locator('div.TUXInputBox').nth(0).click() - - hour_str = publish_date.strftime("%H") - correct_minute = int(publish_date.minute / 5) - minute_str = f"{correct_minute:02d}" - - hour_selector = f"span.tiktok-timepicker-left:has-text('{hour_str}')" - minute_selector = f"span.tiktok-timepicker-right:has-text('{minute_str}')" - - # pick hour first - await page.wait_for_timeout(500) # 等待500毫秒 - await self.locator_base.locator(hour_selector).click() - # click time button again - await page.wait_for_timeout(500) # 等待500毫秒 - await scheduled_picker.locator('div.TUXInputBox').nth(0).click() - await page.wait_for_timeout(500) # 等待500毫秒 - # pick minutes after - await scheduled_picker.locator('div.TUXInputBox').nth(0).click() - await page.wait_for_timeout(500) # 等待500毫秒 - await self.locator_base.locator(minute_selector).click() - - # click title to remove the focus. - # await self.locator_base.locator("h1:has-text('Upload video')").click() - - async def handle_upload_error(self, page): - tiktok_logger.info("video upload error retrying.") - select_file_button = self.locator_base.locator('button[aria-label="Select file"]') - async with page.expect_file_chooser() as fc_info: - await select_file_button.click() - file_chooser = await fc_info.value - await file_chooser.set_files(self.file_path) - - async def upload(self, playwright: Playwright) -> None: - browser = await playwright.chromium.launch(headless=False, executable_path=self.local_executable_path) - context = await browser.new_context(storage_state=f"{self.account_file}") - context = await set_init_script(context) - page = await context.new_page() - - # change language to eng first - await self.change_language(page) - await page.goto("https://www.tiktok.com/tiktokstudio/upload") - tiktok_logger.info(f'[+]Uploading-------{self.title}.mp4') - - await page.wait_for_url("https://www.tiktok.com/tiktokstudio/upload", timeout=10000) - - try: - await page.wait_for_selector('iframe[data-tt="Upload_index_iframe"], div.upload-container', timeout=10000) - tiktok_logger.info("Either iframe or div appeared.") - except Exception as e: - tiktok_logger.error("Neither iframe nor div appeared within the timeout.") - - await self.choose_base_locator(page) - - upload_button = self.locator_base.locator( - 'button:has-text("Select video"):visible') - await upload_button.wait_for(state='visible') # 确保按钮可见 - - async with page.expect_file_chooser() as fc_info: - await upload_button.click() - file_chooser = await fc_info.value - await file_chooser.set_files(self.file_path) - - await self.add_title_tags(page) - # detect upload status - await self.detect_upload_status(page) - if self.thumbnail_path: - tiktok_logger.info(f'[+] Uploading thumbnail file {self.title}.png') - await self.upload_thumbnails(page) - - if self.publish_date != 0: - await self.set_schedule_time(page, self.publish_date) - - await self.click_publish(page) - - await context.storage_state(path=f"{self.account_file}") # save cookie - tiktok_logger.info(' [-] update cookie!') - await asyncio.sleep(2) # close delay for look the video status - # close all - await context.close() - await browser.close() - - async def add_title_tags(self, page): - - editor_locator = self.locator_base.locator('div.public-DraftEditor-content') - await editor_locator.click() - - await page.keyboard.press("End") - - await page.keyboard.press("Control+A") - - await page.keyboard.press("Delete") - - await page.keyboard.press("End") - - await page.wait_for_timeout(1000) # 等待1秒 - - await page.keyboard.insert_text(self.title) - await page.wait_for_timeout(1000) # 等待1秒 - await page.keyboard.press("End") - - await page.keyboard.press("Enter") - - # tag part - for index, tag in enumerate(self.tags, start=1): - tiktok_logger.info("Setting the %s tag" % index) - await page.keyboard.press("End") - await page.wait_for_timeout(1000) # 等待1秒 - await page.keyboard.insert_text("#" + tag + " ") - await page.keyboard.press("Space") - await page.wait_for_timeout(1000) # 等待1秒 - - await page.keyboard.press("Backspace") - await page.keyboard.press("End") - - async def upload_thumbnails(self, page): - await self.locator_base.locator(".cover-container").click() - await self.locator_base.locator(".cover-edit-container >> text=Upload cover").click() - async with page.expect_file_chooser() as fc_info: - await self.locator_base.locator(".upload-image-upload-area").click() - file_chooser = await fc_info.value - await file_chooser.set_files(self.thumbnail_path) - await self.locator_base.locator('div.cover-edit-panel:not(.hide-panel)').get_by_role( - "button", name="Confirm").click() - await page.wait_for_timeout(3000) # wait 3s, fix it later - - async def change_language(self, page): - # set the language to english - await page.goto("https://www.tiktok.com") - await page.wait_for_url("https://www.tiktok.com/", timeout=100000) - await page.wait_for_selector('#header-more-menu-icon') - - await page.locator('#header-more-menu-icon').hover() - await page.locator('[data-e2e="language-select"]').click() - await page.locator('#lang-setting-popup-list >> text=English').click() - - async def click_publish(self, page): - success_flag_div = 'div.common-modal-confirm-modal' - while True: - try: - publish_button = self.locator_base.locator('div.button-group button').nth(0) - if await publish_button.count(): - await publish_button.click() - - await self.locator_base.locator(success_flag_div).wait_for(state="visible", timeout=3000) - tiktok_logger.success(" [-] video published success") - break - except Exception as e: - if await self.locator_base.locator(success_flag_div).count(): - tiktok_logger.success(" [-]video published success") - break - else: - tiktok_logger.exception(f" [-] Exception: {e}") - tiktok_logger.info(" [-] video publishing") - await page.screenshot(full_page=True) - await asyncio.sleep(0.5) - - async def detect_upload_status(self, page): - while True: - try: - # if await self.locator_base.locator('div.btn-post > button').get_attribute("disabled") is None: - if await self.locator_base.locator( - 'div.button-group > button >> text=Post').get_attribute("disabled") is None: - tiktok_logger.info(" [-]video uploaded.") - break - else: - tiktok_logger.info(" [-] video uploading...") - await asyncio.sleep(2) - if await self.locator_base.locator( - 'button[aria-label="Select file"]').count(): - tiktok_logger.info(" [-] found some error while uploading now retry...") - await self.handle_upload_error(page) - except: - tiktok_logger.info(" [-] video uploading...") - await asyncio.sleep(2) - - async def choose_base_locator(self, page): - # await page.wait_for_selector('div.upload-container') - if await page.locator('iframe[data-tt="Upload_index_iframe"]').count(): - self.locator_base = page.frame_locator(Tk_Locator.tk_iframe) - else: - self.locator_base = page.locator(Tk_Locator.default) - - async def main(self): - async with async_playwright() as playwright: - await self.upload(playwright) +# -*- coding: utf-8 -*- +import re +from datetime import datetime + +from playwright.async_api import Playwright, async_playwright +import os +import asyncio + +from conf import LOCAL_CHROME_PATH +from uploader.tk_uploader.tk_config import Tk_Locator +from utils.base_social_media import set_init_script +from utils.files_times import get_absolute_path +from utils.log import tiktok_logger + + +async def cookie_auth(account_file): + async with async_playwright() as playwright: + browser = await playwright.chromium.launch(headless=True) + context = await browser.new_context(storage_state=account_file) + context = await set_init_script(context) + # 创建一个新的页面 + page = await context.new_page() + # 访问指定的 URL + await page.goto("https://www.tiktok.com/tiktokstudio/upload?lang=en") + await page.wait_for_load_state('networkidle') + try: + # 选择所有的 select 元素 + select_elements = await page.query_selector_all('select') + for element in select_elements: + class_name = await element.get_attribute('class') + # 使用正则表达式匹配特定模式的 class 名称 + if re.match(r'tiktok-.*-SelectFormContainer.*', class_name): + tiktok_logger.error("[+] cookie expired") + return False + tiktok_logger.success("[+] cookie valid") + return True + except: + tiktok_logger.success("[+] cookie valid") + return True + + +async def tiktok_setup(account_file, handle=False): + account_file = get_absolute_path(account_file, "tk_uploader") + if not os.path.exists(account_file) or not await cookie_auth(account_file): + if not handle: + return False + tiktok_logger.info('[+] cookie file is not existed or expired. Now open the browser auto. Please login with your way(gmail phone, whatever, the cookie file will generated after login') + await get_tiktok_cookie(account_file) + return True + + +async def get_tiktok_cookie(account_file): + async with async_playwright() as playwright: + options = { + 'args': [ + '--lang en-GB', + ], + 'headless': False, # Set headless option here + } + # Make sure to run headed. + browser = await playwright.chromium.launch(**options) + # Setup context however you like. + context = await browser.new_context() # Pass any options + context = await set_init_script(context) + # Pause the page, and start recording manually. + page = await context.new_page() + await page.goto("https://www.tiktok.com/login?lang=en") + await page.pause() + # 点击调试器的继续,保存cookie + await context.storage_state(path=account_file) + + +class TiktokVideo(object): + def __init__(self, title, file_path, tags, publish_date, account_file, thumbnail_path=None): + self.title = title + self.file_path = file_path + self.tags = tags + self.publish_date = publish_date + self.thumbnail_path = thumbnail_path + self.account_file = account_file + self.local_executable_path = LOCAL_CHROME_PATH + self.locator_base = None + + async def set_schedule_time(self, page, publish_date): + schedule_input_element = self.locator_base.get_by_label('Schedule') + await schedule_input_element.wait_for(state='visible') # 确保按钮可见 + + await schedule_input_element.click() + if await self.locator_base.locator('div.TUXButton-content >> text=Allow').count(): + await self.locator_base.locator('div.TUXButton-content >> text=Allow').click() + + scheduled_picker = self.locator_base.locator('div.scheduled-picker') + await scheduled_picker.locator('div.TUXInputBox').nth(1).click() + + calendar_month = await self.locator_base.locator( + 'div.calendar-wrapper span.month-title').inner_text() + + n_calendar_month = datetime.strptime(calendar_month, '%B').month + + schedule_month = publish_date.month + + if n_calendar_month != schedule_month: + if n_calendar_month < schedule_month: + arrow = self.locator_base.locator('div.calendar-wrapper span.arrow').nth(-1) + else: + arrow = self.locator_base.locator('div.calendar-wrapper span.arrow').nth(0) + await arrow.click() + + # day set + valid_days_locator = self.locator_base.locator( + 'div.calendar-wrapper span.day.valid') + valid_days = await valid_days_locator.count() + for i in range(valid_days): + day_element = valid_days_locator.nth(i) + text = await day_element.inner_text() + if text.strip() == str(publish_date.day): + await day_element.click() + break + # time set + await scheduled_picker.locator('div.TUXInputBox').nth(0).click() + + hour_str = publish_date.strftime("%H") + correct_minute = int(publish_date.minute / 5) + minute_str = f"{correct_minute:02d}" + + hour_selector = f"span.tiktok-timepicker-left:has-text('{hour_str}')" + minute_selector = f"span.tiktok-timepicker-right:has-text('{minute_str}')" + + # pick hour first + await page.wait_for_timeout(500) # 等待500毫秒 + await self.locator_base.locator(hour_selector).click() + # click time button again + await page.wait_for_timeout(500) # 等待500毫秒 + await scheduled_picker.locator('div.TUXInputBox').nth(0).click() + await page.wait_for_timeout(500) # 等待500毫秒 + # pick minutes after + await scheduled_picker.locator('div.TUXInputBox').nth(0).click() + await page.wait_for_timeout(500) # 等待500毫秒 + await self.locator_base.locator(minute_selector).click() + + # click title to remove the focus. + # await self.locator_base.locator("h1:has-text('Upload video')").click() + + async def handle_upload_error(self, page): + tiktok_logger.info("video upload error retrying.") + select_file_button = self.locator_base.locator('button[aria-label="Select file"]') + async with page.expect_file_chooser() as fc_info: + await select_file_button.click() + file_chooser = await fc_info.value + await file_chooser.set_files(self.file_path) + + async def upload(self, playwright: Playwright) -> None: + browser = await playwright.chromium.launch(headless=False, executable_path=self.local_executable_path) + context = await browser.new_context(storage_state=f"{self.account_file}") + context = await set_init_script(context) + page = await context.new_page() + + # change language to eng first + await self.change_language(page) + await page.goto("https://www.tiktok.com/tiktokstudio/upload") + tiktok_logger.info(f'[+]Uploading-------{self.title}.mp4') + + await page.wait_for_url("https://www.tiktok.com/tiktokstudio/upload", timeout=10000) + + try: + await page.wait_for_selector('iframe[data-tt="Upload_index_iframe"], div.upload-container', timeout=10000) + tiktok_logger.info("Either iframe or div appeared.") + except Exception as e: + tiktok_logger.error("Neither iframe nor div appeared within the timeout.") + + await self.choose_base_locator(page) + + upload_button = self.locator_base.locator( + 'button:has-text("Select video"):visible') + await upload_button.wait_for(state='visible') # 确保按钮可见 + + async with page.expect_file_chooser() as fc_info: + await upload_button.click() + file_chooser = await fc_info.value + await file_chooser.set_files(self.file_path) + + await self.add_title_tags(page) + # detect upload status + await self.detect_upload_status(page) + if self.thumbnail_path: + tiktok_logger.info(f'[+] Uploading thumbnail file {self.title}.png') + await self.upload_thumbnails(page) + + if self.publish_date != 0: + await self.set_schedule_time(page, self.publish_date) + + await self.click_publish(page) + + await context.storage_state(path=f"{self.account_file}") # save cookie + tiktok_logger.info(' [-] update cookie!') + await asyncio.sleep(2) # close delay for look the video status + # close all + await context.close() + await browser.close() + + async def add_title_tags(self, page): + + editor_locator = self.locator_base.locator('div.public-DraftEditor-content') + await editor_locator.click() + + await page.keyboard.press("End") + + await page.keyboard.press("Control+A") + + await page.keyboard.press("Delete") + + await page.keyboard.press("End") + + await page.wait_for_timeout(1000) # 等待1秒 + + await page.keyboard.insert_text(self.title) + await page.wait_for_timeout(1000) # 等待1秒 + await page.keyboard.press("End") + + await page.keyboard.press("Enter") + + # tag part + for index, tag in enumerate(self.tags, start=1): + tiktok_logger.info("Setting the %s tag" % index) + await page.keyboard.press("End") + await page.wait_for_timeout(1000) # 等待1秒 + await page.keyboard.insert_text("#" + tag + " ") + await page.keyboard.press("Space") + await page.wait_for_timeout(1000) # 等待1秒 + + await page.keyboard.press("Backspace") + await page.keyboard.press("End") + + async def upload_thumbnails(self, page): + await self.locator_base.locator(".cover-container").click() + await self.locator_base.locator(".cover-edit-container >> text=Upload cover").click() + async with page.expect_file_chooser() as fc_info: + await self.locator_base.locator(".upload-image-upload-area").click() + file_chooser = await fc_info.value + await file_chooser.set_files(self.thumbnail_path) + await self.locator_base.locator('div.cover-edit-panel:not(.hide-panel)').get_by_role( + "button", name="Confirm").click() + await page.wait_for_timeout(3000) # wait 3s, fix it later + + async def change_language(self, page): + # set the language to english + await page.goto("https://www.tiktok.com") + await page.wait_for_url("https://www.tiktok.com/", timeout=100000) + await page.wait_for_selector('#header-more-menu-icon') + + await page.locator('#header-more-menu-icon').hover() + await page.locator('[data-e2e="language-select"]').click() + await page.locator('#lang-setting-popup-list >> text=English').click() + + async def click_publish(self, page): + success_flag_div = 'div.common-modal-confirm-modal' + while True: + try: + publish_button = self.locator_base.locator('div.button-group button').nth(0) + if await publish_button.count(): + await publish_button.click() + + await self.locator_base.locator(success_flag_div).wait_for(state="visible", timeout=3000) + tiktok_logger.success(" [-] video published success") + break + except Exception as e: + if await self.locator_base.locator(success_flag_div).count(): + tiktok_logger.success(" [-]video published success") + break + else: + tiktok_logger.exception(f" [-] Exception: {e}") + tiktok_logger.info(" [-] video publishing") + await page.screenshot(full_page=True) + await asyncio.sleep(0.5) + + async def detect_upload_status(self, page): + while True: + try: + # if await self.locator_base.locator('div.btn-post > button').get_attribute("disabled") is None: + if await self.locator_base.locator( + 'div.button-group > button >> text=Post').get_attribute("disabled") is None: + tiktok_logger.info(" [-]video uploaded.") + break + else: + tiktok_logger.info(" [-] video uploading...") + await asyncio.sleep(2) + if await self.locator_base.locator( + 'button[aria-label="Select file"]').count(): + tiktok_logger.info(" [-] found some error while uploading now retry...") + await self.handle_upload_error(page) + except: + tiktok_logger.info(" [-] video uploading...") + await asyncio.sleep(2) + + async def choose_base_locator(self, page): + # await page.wait_for_selector('div.upload-container') + if await page.locator('iframe[data-tt="Upload_index_iframe"]').count(): + self.locator_base = page.frame_locator(Tk_Locator.tk_iframe) + else: + self.locator_base = page.locator(Tk_Locator.default) + + async def main(self): + async with async_playwright() as playwright: + await self.upload(playwright) diff --git a/uploader/tk_uploader/tk_config.py b/uploader/tk_uploader/tk_config.py index 26a2cd4e..6c975bac 100644 --- a/uploader/tk_uploader/tk_config.py +++ b/uploader/tk_uploader/tk_config.py @@ -1,4 +1,4 @@ - -class Tk_Locator(object): - tk_iframe = '[data-tt="Upload_index_iframe"]' - default = 'body' + +class Tk_Locator(object): + tk_iframe = '[data-tt="Upload_index_iframe"]' + default = 'body' diff --git a/uploader/xhs_uploader/accounts.ini b/uploader/xhs_uploader/accounts.ini index cd6d387b..e0967779 100644 --- a/uploader/xhs_uploader/accounts.ini +++ b/uploader/xhs_uploader/accounts.ini @@ -1,2 +1,2 @@ -[account1] -cookies = changeme +[account1] +cookies = changeme diff --git a/uploader/xhs_uploader/basic_sign_server.py b/uploader/xhs_uploader/basic_sign_server.py new file mode 100644 index 00000000..64bf15ee --- /dev/null +++ b/uploader/xhs_uploader/basic_sign_server.py @@ -0,0 +1,65 @@ +import time + +from flask import Flask, request +from gevent import monkey +from playwright.sync_api import sync_playwright +from utils.base_social_media import get_stealth_js_path + +monkey.patch_all() + +app = Flask(__name__) + +A1 = "" + + +def get_context_page(instance, stealth_js_path): + chromium = instance.chromium + browser = chromium.launch(headless=True) + context = browser.new_context() + context.add_init_script(path=stealth_js_path) + page = context.new_page() + return context, page + + +stealth_js_path = get_stealth_js_path() +print("正在启动 playwright") +playwright = sync_playwright().start() +browser_context, context_page = get_context_page(playwright, stealth_js_path) +context_page.goto("https://www.xiaohongshu.com") +print("正在跳转至小红书首页") +time.sleep(5) +context_page.reload() +time.sleep(1) +cookies = browser_context.cookies() +for cookie in cookies: + if cookie["name"] == "a1": + A1 = cookie["value"] + print("当前浏览器 cookie 中 a1 值为:" + cookie["value"] + ",请将需要使用的 a1 设置成一样方可签名成功") +print("跳转小红书首页成功,等待调用") + + +def sign(uri, data, a1, web_session): + encrypt_params = context_page.evaluate("([url, data]) => window._webmsxyw(url, data)", [uri, data]) + return { + "x-s": encrypt_params["X-s"], + "x-t": str(encrypt_params["X-t"]) + } + + +@app.route("/sign", methods=["POST"]) +def hello_world(): + json = request.json + uri = json["uri"] + data = json["data"] + a1 = json["a1"] + web_session = json["web_session"] + return sign(uri, data, a1, web_session) + + +@app.route("/a1", methods=["GET"]) +def get_a1(): + return {'a1': A1} + + +if __name__ == '__main__': + app.run(host="0.0.0.0", port=5005) \ No newline at end of file diff --git a/uploader/xhs_uploader/main.py b/uploader/xhs_uploader/main.py index dea9b145..4d110bca 100644 --- a/uploader/xhs_uploader/main.py +++ b/uploader/xhs_uploader/main.py @@ -1,58 +1,58 @@ -import configparser -import json -import pathlib -from time import sleep - -import requests -from playwright.sync_api import sync_playwright - -from conf import BASE_DIR, XHS_SERVER - -config = configparser.RawConfigParser() -config.read('accounts.ini') - - -def sign_local(uri, data=None, a1="", web_session=""): - for _ in range(10): - try: - with sync_playwright() as playwright: - stealth_js_path = pathlib.Path(BASE_DIR / "utils/stealth.min.js") - chromium = playwright.chromium - - # 如果一直失败可尝试设置成 False 让其打开浏览器,适当添加 sleep 可查看浏览器状态 - browser = chromium.launch(headless=True) - - browser_context = browser.new_context() - browser_context.add_init_script(path=stealth_js_path) - context_page = browser_context.new_page() - context_page.goto("https://www.xiaohongshu.com") - browser_context.add_cookies([ - {'name': 'a1', 'value': a1, 'domain': ".xiaohongshu.com", 'path': "/"}] - ) - context_page.reload() - # 这个地方设置完浏览器 cookie 之后,如果这儿不 sleep 一下签名获取就失败了,如果经常失败请设置长一点试试 - sleep(2) - encrypt_params = context_page.evaluate("([url, data]) => window._webmsxyw(url, data)", [uri, data]) - return { - "x-s": encrypt_params["X-s"], - "x-t": str(encrypt_params["X-t"]) - } - except Exception: - # 这儿有时会出现 window._webmsxyw is not a function 或未知跳转错误,因此加一个失败重试趴 - pass - raise Exception("重试了这么多次还是无法签名成功,寄寄寄") - - -def sign(uri, data=None, a1="", web_session=""): - # 填写自己的 flask 签名服务端口地址 - res = requests.post(f"{XHS_SERVER}/sign", - json={"uri": uri, "data": data, "a1": a1, "web_session": web_session}) - signs = res.json() - return { - "x-s": signs["x-s"], - "x-t": signs["x-t"] - } - - -def beauty_print(data: dict): - print(json.dumps(data, ensure_ascii=False, indent=2)) +import configparser +import json +import pathlib +from time import sleep + +import requests +from playwright.sync_api import sync_playwright + +from conf import BASE_DIR, XHS_SERVER + +config = configparser.RawConfigParser() +config.read('accounts.ini') + + +def sign_local(uri, data=None, a1="", web_session=""): + for _ in range(10): + try: + with sync_playwright() as playwright: + stealth_js_path = pathlib.Path(BASE_DIR / "utils/stealth.min.js") + chromium = playwright.chromium + + # 如果一直失败可尝试设置成 False 让其打开浏览器,适当添加 sleep 可查看浏览器状态 + browser = chromium.launch(headless=True) + + browser_context = browser.new_context() + browser_context.add_init_script(path=stealth_js_path) + context_page = browser_context.new_page() + context_page.goto("https://www.xiaohongshu.com") + browser_context.add_cookies([ + {'name': 'a1', 'value': a1, 'domain': ".xiaohongshu.com", 'path': "/"}] + ) + context_page.reload() + # 这个地方设置完浏览器 cookie 之后,如果这儿不 sleep 一下签名获取就失败了,如果经常失败请设置长一点试试 + sleep(2) + encrypt_params = context_page.evaluate("([url, data]) => window._webmsxyw(url, data)", [uri, data]) + return { + "x-s": encrypt_params["X-s"], + "x-t": str(encrypt_params["X-t"]) + } + except Exception: + # 这儿有时会出现 window._webmsxyw is not a function 或未知跳转错误,因此加一个失败重试趴 + pass + raise Exception("重试了这么多次还是无法签名成功,寄寄寄") + + +def sign(uri, data=None, a1="", web_session=""): + # 填写自己的 flask 签名服务端口地址 + res = requests.post(f"{XHS_SERVER}/sign", + json={"uri": uri, "data": data, "a1": a1, "web_session": web_session}) + signs = res.json() + return { + "x-s": signs["x-s"], + "x-t": signs["x-t"] + } + + +def beauty_print(data: dict): + print(json.dumps(data, ensure_ascii=False, indent=2)) diff --git a/uploader/xhs_uploader/xhs_login_qrcode.py b/uploader/xhs_uploader/xhs_login_qrcode.py index b6a7d064..a880dc5a 100644 --- a/uploader/xhs_uploader/xhs_login_qrcode.py +++ b/uploader/xhs_uploader/xhs_login_qrcode.py @@ -1,34 +1,34 @@ -import datetime -import json -import qrcode -from time import sleep - -from xhs import XhsClient - -from uploader.xhs_uploader.main import sign - -# pip install qrcode -if __name__ == '__main__': - xhs_client = XhsClient(sign=sign, timeout=60) - print(datetime.datetime.now()) - qr_res = xhs_client.get_qrcode() - qr_id = qr_res["qr_id"] - qr_code = qr_res["code"] - - qr = qrcode.QRCode(version=1, error_correction=qrcode.ERROR_CORRECT_L, - box_size=50, - border=1) - qr.add_data(qr_res["url"]) - qr.make() - qr.print_ascii() - - while True: - check_qrcode = xhs_client.check_qrcode(qr_id, qr_code) - print(check_qrcode) - sleep(1) - if check_qrcode["code_status"] == 2: - print(json.dumps(check_qrcode["login_info"], indent=4)) - print("当前 cookie:" + xhs_client.cookie) - break - +import datetime +import json +import qrcode +from time import sleep + +from xhs import XhsClient + +from uploader.xhs_uploader.main import sign + +# pip install qrcode +if __name__ == '__main__': + xhs_client = XhsClient(sign=sign, timeout=60) + print(datetime.datetime.now()) + qr_res = xhs_client.get_qrcode() + qr_id = qr_res["qr_id"] + qr_code = qr_res["code"] + + qr = qrcode.QRCode(version=1, error_correction=qrcode.ERROR_CORRECT_L, + box_size=50, + border=1) + qr.add_data(qr_res["url"]) + qr.make() + qr.print_ascii() + + while True: + check_qrcode = xhs_client.check_qrcode(qr_id, qr_code) + print(check_qrcode) + sleep(1) + if check_qrcode["code_status"] == 2: + print(json.dumps(check_qrcode["login_info"], indent=4)) + print("当前 cookie:" + xhs_client.cookie) + break + print(json.dumps(xhs_client.get_self_info(), indent=4)) \ No newline at end of file diff --git a/utils/base_social_media.py b/utils/base_social_media.py index 76978753..b03ba080 100644 --- a/utils/base_social_media.py +++ b/utils/base_social_media.py @@ -1,24 +1,81 @@ -from pathlib import Path -from typing import List - -from conf import BASE_DIR - -SOCIAL_MEDIA_DOUYIN = "douyin" -SOCIAL_MEDIA_TENCENT = "tencent" -SOCIAL_MEDIA_TIKTOK = "tiktok" -SOCIAL_MEDIA_BILIBILI = "bilibili" -SOCIAL_MEDIA_KUAISHOU = "kuaishou" - - -def get_supported_social_media() -> List[str]: - return [SOCIAL_MEDIA_DOUYIN, SOCIAL_MEDIA_TENCENT, SOCIAL_MEDIA_TIKTOK, SOCIAL_MEDIA_KUAISHOU] - - -def get_cli_action() -> List[str]: - return ["upload", "login", "watch"] - - -async def set_init_script(context): - stealth_js_path = Path(BASE_DIR / "utils/stealth.min.js") - await context.add_init_script(path=stealth_js_path) - return context +from pathlib import Path +from typing import List +import os +import requests + +from conf import BASE_DIR + +SOCIAL_MEDIA_DOUYIN = "douyin" +SOCIAL_MEDIA_TENCENT = "tencent" +SOCIAL_MEDIA_TIKTOK = "tiktok" +SOCIAL_MEDIA_BILIBILI = "bilibili" +SOCIAL_MEDIA_KUAISHOU = "kuaishou" + + +def get_supported_social_media() -> List[str]: + """ + 获取当前支持的社交媒体平台列表。 + + Returns: + List[str]: 支持的社交媒体平台名称列表,包括抖音、腾讯、TikTok和快手。 + """ + return [SOCIAL_MEDIA_DOUYIN, SOCIAL_MEDIA_TENCENT, SOCIAL_MEDIA_TIKTOK, SOCIAL_MEDIA_KUAISHOU] + + +def get_cli_action() -> List[str]: + """ + 获取CLI(命令行界面)可执行的操作列表。 + + Returns: + List[str]: 包含可用CLI操作的列表,如 ['upload', 'login', 'watch']。 + """ + return ["upload", "login", "watch"] + + +async def set_init_script(context): + """ + 异步函数,用于在给定的上下文中设置初始化脚本。 + + 参数: + - context: 上下文对象,用于执行初始化脚本的环境。 + + 返回: + - 返回更新后的上下文对象。 + """ + # 获取stealth.js脚本的路径 + stealth_js_path = get_stealth_js_path() + + # 在上下文中添加初始化脚本,使用stealth.js路径 + await context.add_init_script(path=stealth_js_path) + + # 返回更新后的上下文对象 + return context + + +def get_stealth_js_path() -> Path: + """ + 获取或下载 stealth.min.js 文件并返回其路径 + + 如果 utils 目录下不存在 stealth.min.js 文件,则从指定的 URL 下载并保存到该目录中 + 使用缓存的文件可以避免重复下载,提高效率 + + Returns: + Path: stealth.min.js 文件的路径 + """ + # 定义 stealth.min.js 文件的路径 + stealth_js_path = Path(BASE_DIR) / "utils" / "stealth.min.js" + + # 检查文件是否已经存在 + if not stealth_js_path.exists(): + # stealth.min.js 文件的下载 URL + url = "https://cdn.jsdelivr.net/gh/requireCool/stealth.min.js/stealth.min.js" + # 发起 HTTP 请求,下载文件 + response = requests.get(url) + # 确保 utils 目录存在,如果不存在则创建 + stealth_js_path.parent.mkdir(exist_ok=True) + # 将下载的文件内容写入到 stealth.min.js 文件中 + with open(stealth_js_path, "wb") as f: + f.write(response.content) + + # 返回 stealth.min.js 文件的路径 + return stealth_js_path diff --git a/utils/constant.py b/utils/constant.py index 01b90962..efa32d0b 100644 --- a/utils/constant.py +++ b/utils/constant.py @@ -1,309 +1,309 @@ -import enum - - -class TencentZoneTypes(enum.Enum): - LIFESTYLE = '生活' - CUTE_KIDS = '萌娃' - MUSIC = '音乐' - KNOWLEDGE = '知识' - EMOTION = '情感' - TRAVEL_SCENERY = '旅行风景' - FASHION = '时尚' - FOOD = '美食' - LIFE_HACKS = '生活技巧' - DANCE = '舞蹈' - MOVIES_TV_SHOWS = '影视综艺' - SPORTS = '运动' - FUNNY = '搞笑' - CELEBRITIES = '明星名人' - NEWS_INFO = '新闻资讯' - GAMING = '游戏' - AUTOMOTIVE = '车' - ANIME = '二次元' - TALENT = '才艺' - CUTE_PETS = '萌宠' - INDUSTRY_MACHINERY_CONSTRUCTION = '机械' - ANIMALS = '动物' - PARENTING = '育儿' - TECHNOLOGY = '科技' - -class VideoZoneTypes(enum.Enum): - """ - 所有分区枚举 - - - MAINPAGE: 主页 - - ANIME: 番剧 - - ANIME_SERIAL: 连载中番剧 - - ANIME_FINISH: 已完结番剧 - - ANIME_INFORMATION: 资讯 - - ANIME_OFFICAL: 官方延伸 - - MOVIE: 电影 - - GUOCHUANG: 国创 - - GUOCHUANG_CHINESE: 国产动画 - - GUOCHUANG_ORIGINAL: 国产原创相关 - - GUOCHUANG_PUPPETRY: 布袋戏 - - GUOCHUANG_MOTIONCOMIC: 动态漫·广播剧 - - GUOCHUANG_INFORMATION: 资讯 - - TELEPLAY: 电视剧 - - DOCUMENTARY: 纪录片 - - DOUGA: 动画 - - DOUGA_MAD: MAD·AMV - - DOUGA_MMD: MMD·3D - - DOUGA_VOICE: 短片·手书·配音 - - DOUGA_GARAGE_KIT: 手办·模玩 - - DOUGA_TOKUSATSU: 特摄 - - DOUGA_ACGNTALKS: 动漫杂谈 - - DOUGA_OTHER: 综合 - - GAME: 游戏 - - GAME_STAND_ALONE: 单机游戏 - - GAME_ESPORTS: 电子竞技 - - GAME_MOBILE: 手机游戏 - - GAME_ONLINE: 网络游戏 - - GAME_BOARD: 桌游棋牌 - - GAME_GMV: GMV - - GAME_MUSIC: 音游 - - GAME_MUGEN: Mugen - - KICHIKU: 鬼畜 - - KICHIKU_GUIDE: 鬼畜调教 - - KICHIKU_MAD: 音MAD - - KICHIKU_MANUAL_VOCALOID: 人力VOCALOID - - KICHIKU_THEATRE: 鬼畜剧场 - - KICHIKU_COURSE: 教程演示 - - MUSIC: 音乐 - - MUSIC_ORIGINAL: 原创音乐 - - MUSIC_COVER: 翻唱 - - MUSIC_PERFORM: 演奏 - - MUSIC_VOCALOID: VOCALOID·UTAU - - MUSIC_LIVE: 音乐现场 - - MUSIC_MV: MV - - MUSIC_COMMENTARY: 乐评盘点 - - MUSIC_TUTORIAL: 音乐教学 - - MUSIC_OTHER: 音乐综合 - - DANCE: 舞蹈 - - DANCE_OTAKU: 宅舞 - - DANCE_HIPHOP: 街舞 - - DANCE_STAR: 明星舞蹈 - - DANCE_CHINA: 中国舞 - - DANCE_THREE_D: 舞蹈综合 - - DANCE_DEMO: 舞蹈教程 - - CINEPHILE: 影视 - - CINEPHILE_CINECISM: 影视杂谈 - - CINEPHILE_MONTAGE: 影视剪辑 - - CINEPHILE_SHORTFILM: 小剧场 - - CINEPHILE_TRAILER_INFO: 预告·资讯 - - ENT: 娱乐 - - ENT_VARIETY: 综艺 - - ENT_TALKER: 娱乐杂谈 - - ENT_FANS: 粉丝创作 - - ENT_CELEBRITY: 明星综合 - - KNOWLEDGE: 知识 - - KNOWLEDGE_SCIENCE: 科学科普 - - KNOWLEDGE_SOCIAL_SCIENCE: 社科·法律·心理 - - KNOWLEDGE_HUMANITY_HISTORY: 人文历史 - - KNOWLEDGE_BUSINESS: 财经商业 - - KNOWLEDGE_CAMPUS: 校园学习 - - KNOWLEDGE_CAREER: 职业职场 - - KNOWLEDGE_DESIGN: 设计·创意 - - KNOWLEDGE_SKILL: 野生技能协会 - - TECH: 科技 - - TECH_DIGITAL: 数码 - - TECH_APPLICATION: 软件应用 - - TECH_COMPUTER_TECH: 计算机技术 - - TECH_INDUSTRY: 科工机械 - - INFORMATION: 资讯 - - INFORMATION_HOTSPOT: 热点 - - INFORMATION_GLOBAL: 环球 - - INFORMATION_SOCIAL: 社会 - - INFORMATION_MULTIPLE: 综合 - - FOOD: 美食 - - FOOD_MAKE: 美食制作 - - FOOD_DETECTIVE: 美食侦探 - - FOOD_MEASUREMENT: 美食测评 - - FOOD_RURAL: 田园美食 - - FOOD_RECORD: 美食记录 - - LIFE: 生活 - - LIFE_FUNNY: 搞笑 - - LIFE_TRAVEL: 出行 - - LIFE_RURALLIFE: 三农 - - LIFE_HOME: 家居房产 - - LIFE_HANDMAKE: 手工 - - LIFE_PAINTING: 绘画 - - LIFE_DAILY: 日常 - - CAR: 汽车 - - CAR_RACING: 赛车 - - CAR_MODIFIEDVEHICLE: 改装玩车 - - CAR_NEWENERGYVEHICLE: 新能源车 - - CAR_TOURINGCAR: 房车 - - CAR_MOTORCYCLE: 摩托车 - - CAR_STRATEGY: 购车攻略 - - CAR_LIFE: 汽车生活 - - FASHION: 时尚 - - FASHION_MAKEUP: 美妆护肤 - - FASHION_COS: 仿妆cos - - FASHION_CLOTHING: 穿搭 - - FASHION_TREND: 时尚潮流 - - SPORTS: 运动 - - SPORTS_BASKETBALL: 篮球 - - SPORTS_FOOTBALL: 足球 - - SPORTS_AEROBICS: 健身 - - SPORTS_ATHLETIC: 竞技体育 - - SPORTS_CULTURE: 运动文化 - - SPORTS_COMPREHENSIVE: 运动综合 - - ANIMAL: 动物圈 - - ANIMAL_CAT: 喵星人 - - ANIMAL_DOG: 汪星人 - - ANIMAL_PANDA: 大熊猫 - - ANIMAL_WILD_ANIMAL: 野生动物 - - ANIMAL_REPTILES: 爬宠 - - ANIMAL_COMPOSITE: 动物综合 - - VLOG: VLOG - """ - - MAINPAGE = 0 - - ANIME = 13 - ANIME_SERIAL = 33 - ANIME_FINISH = 32 - ANIME_INFORMATION = 51 - ANIME_OFFICAL = 152 - - MOVIE = 23 - - GUOCHUANG = 167 - GUOCHUANG_CHINESE = 153 - GUOCHUANG_ORIGINAL = 168 - GUOCHUANG_PUPPETRY = 169 - GUOCHUANG_MOTIONCOMIC = 195 - GUOCHUANG_INFORMATION = 170 - - TELEPLAY = 11 - - DOCUMENTARY = 177 - - DOUGA = 1 - DOUGA_MAD = 24 - DOUGA_MMD = 25 - DOUGA_VOICE = 47 - DOUGA_GARAGE_KIT = 210 - DOUGA_TOKUSATSU = 86 - DOUGA_ACGNTALKS = 253 - DOUGA_OTHER = 27 - - GAME = 4 - GAME_STAND_ALONE = 17 - GAME_ESPORTS = 171 - GAME_MOBILE = 172 - GAME_ONLINE = 65 - GAME_BOARD = 173 - GAME_GMV = 121 - GAME_MUSIC = 136 - GAME_MUGEN = 19 - - KICHIKU = 119 - KICHIKU_GUIDE = 22 - KICHIKU_MAD = 26 - KICHIKU_MANUAL_VOCALOID = 126 - KICHIKU_THEATRE = 216 - KICHIKU_COURSE = 127 - - MUSIC = 3 - MUSIC_ORIGINAL = 28 - MUSIC_COVER = 31 - MUSIC_PERFORM = 59 - MUSIC_VOCALOID = 30 - MUSIC_LIVE = 29 - MUSIC_MV = 193 - MUSIC_COMMENTARY = 243 - MUSIC_TUTORIAL = 244 - MUSIC_OTHER = 130 - - DANCE = 129 - DANCE_OTAKU = 20 - DANCE_HIPHOP = 198 - DANCE_STAR = 199 - DANCE_CHINA = 200 - DANCE_THREE_D = 154 - DANCE_DEMO = 156 - - CINEPHILE = 181 - CINEPHILE_CINECISM = 182 - CINEPHILE_MONTAGE = 183 - CINEPHILE_SHORTFILM = 85 - CINEPHILE_TRAILER_INFO = 184 - - ENT = 5 - ENT_VARIETY = 71 - ENT_TALKER = 241 - ENT_FANS = 242 - ENT_CELEBRITY = 137 - - KNOWLEDGE = 36 - KNOWLEDGE_SCIENCE = 201 - KNOWLEDGE_SOCIAL_SCIENCE = 124 - KNOWLEDGE_HUMANITY_HISTORY = 228 - KNOWLEDGE_BUSINESS = 207 - KNOWLEDGE_CAMPUS = 208 - KNOWLEDGE_CAREER = 209 - KNOWLEDGE_DESIGN = 229 - KNOWLEDGE_SKILL = 122 - - TECH = 188 - TECH_DIGITAL = 95 - TECH_APPLICATION = 230 - TECH_COMPUTER_TECH = 231 - TECH_INDUSTRY = 232 - - INFORMATION = 202 - INFORMATION_HOTSPOT = 203 - INFORMATION_GLOBAL = 204 - INFORMATION_SOCIAL = 205 - INFORMATION_MULTIPLE = 206 - - FOOD = 211 - FOOD_MAKE = 76 - FOOD_DETECTIVE = 212 - FOOD_MEASUREMENT = 213 - FOOD_RURAL = 214 - FOOD_RECORD = 215 - - LIFE = 160 - LIFE_FUNNY = 138 - LIFE_TRAVEL = 250 - LIFE_RURALLIFE = 251 - LIFE_HOME = 239 - LIFE_HANDMAKE = 161 - LIFE_PAINTING = 162 - LIFE_DAILY = 21 - - CAR = 223 - CAR_RACING = 245 - CAR_MODIFIEDVEHICLE = 246 - CAR_NEWENERGYVEHICLE = 247 - CAR_TOURINGCAR = 248 - CAR_MOTORCYCLE = 240 - CAR_STRATEGY = 227 - CAR_LIFE = 176 - - FASHION = 155 - FASHION_MAKEUP = 157 - FASHION_COS = 252 - FASHION_CLOTHING = 158 - FASHION_TREND = 159 - - SPORTS = 234 - SPORTS_BASKETBALL = 235 - SPORTS_FOOTBALL = 249 - SPORTS_AEROBICS = 164 - SPORTS_ATHLETIC = 236 - SPORTS_CULTURE = 237 - SPORTS_COMPREHENSIVE = 238 - - ANIMAL = 217 - ANIMAL_CAT = 218 - ANIMAL_DOG = 219 - ANIMAL_PANDA = 220 - ANIMAL_WILD_ANIMAL = 221 - ANIMAL_REPTILES = 222 - ANIMAL_COMPOSITE = 75 - - VLOG = 19 +import enum + + +class TencentZoneTypes(enum.Enum): + LIFESTYLE = '生活' + CUTE_KIDS = '萌娃' + MUSIC = '音乐' + KNOWLEDGE = '知识' + EMOTION = '情感' + TRAVEL_SCENERY = '旅行风景' + FASHION = '时尚' + FOOD = '美食' + LIFE_HACKS = '生活技巧' + DANCE = '舞蹈' + MOVIES_TV_SHOWS = '影视综艺' + SPORTS = '运动' + FUNNY = '搞笑' + CELEBRITIES = '明星名人' + NEWS_INFO = '新闻资讯' + GAMING = '游戏' + AUTOMOTIVE = '车' + ANIME = '二次元' + TALENT = '才艺' + CUTE_PETS = '萌宠' + INDUSTRY_MACHINERY_CONSTRUCTION = '机械' + ANIMALS = '动物' + PARENTING = '育儿' + TECHNOLOGY = '科技' + +class VideoZoneTypes(enum.Enum): + """ + 所有分区枚举 + + - MAINPAGE: 主页 + - ANIME: 番剧 + - ANIME_SERIAL: 连载中番剧 + - ANIME_FINISH: 已完结番剧 + - ANIME_INFORMATION: 资讯 + - ANIME_OFFICAL: 官方延伸 + - MOVIE: 电影 + - GUOCHUANG: 国创 + - GUOCHUANG_CHINESE: 国产动画 + - GUOCHUANG_ORIGINAL: 国产原创相关 + - GUOCHUANG_PUPPETRY: 布袋戏 + - GUOCHUANG_MOTIONCOMIC: 动态漫·广播剧 + - GUOCHUANG_INFORMATION: 资讯 + - TELEPLAY: 电视剧 + - DOCUMENTARY: 纪录片 + - DOUGA: 动画 + - DOUGA_MAD: MAD·AMV + - DOUGA_MMD: MMD·3D + - DOUGA_VOICE: 短片·手书·配音 + - DOUGA_GARAGE_KIT: 手办·模玩 + - DOUGA_TOKUSATSU: 特摄 + - DOUGA_ACGNTALKS: 动漫杂谈 + - DOUGA_OTHER: 综合 + - GAME: 游戏 + - GAME_STAND_ALONE: 单机游戏 + - GAME_ESPORTS: 电子竞技 + - GAME_MOBILE: 手机游戏 + - GAME_ONLINE: 网络游戏 + - GAME_BOARD: 桌游棋牌 + - GAME_GMV: GMV + - GAME_MUSIC: 音游 + - GAME_MUGEN: Mugen + - KICHIKU: 鬼畜 + - KICHIKU_GUIDE: 鬼畜调教 + - KICHIKU_MAD: 音MAD + - KICHIKU_MANUAL_VOCALOID: 人力VOCALOID + - KICHIKU_THEATRE: 鬼畜剧场 + - KICHIKU_COURSE: 教程演示 + - MUSIC: 音乐 + - MUSIC_ORIGINAL: 原创音乐 + - MUSIC_COVER: 翻唱 + - MUSIC_PERFORM: 演奏 + - MUSIC_VOCALOID: VOCALOID·UTAU + - MUSIC_LIVE: 音乐现场 + - MUSIC_MV: MV + - MUSIC_COMMENTARY: 乐评盘点 + - MUSIC_TUTORIAL: 音乐教学 + - MUSIC_OTHER: 音乐综合 + - DANCE: 舞蹈 + - DANCE_OTAKU: 宅舞 + - DANCE_HIPHOP: 街舞 + - DANCE_STAR: 明星舞蹈 + - DANCE_CHINA: 中国舞 + - DANCE_THREE_D: 舞蹈综合 + - DANCE_DEMO: 舞蹈教程 + - CINEPHILE: 影视 + - CINEPHILE_CINECISM: 影视杂谈 + - CINEPHILE_MONTAGE: 影视剪辑 + - CINEPHILE_SHORTFILM: 小剧场 + - CINEPHILE_TRAILER_INFO: 预告·资讯 + - ENT: 娱乐 + - ENT_VARIETY: 综艺 + - ENT_TALKER: 娱乐杂谈 + - ENT_FANS: 粉丝创作 + - ENT_CELEBRITY: 明星综合 + - KNOWLEDGE: 知识 + - KNOWLEDGE_SCIENCE: 科学科普 + - KNOWLEDGE_SOCIAL_SCIENCE: 社科·法律·心理 + - KNOWLEDGE_HUMANITY_HISTORY: 人文历史 + - KNOWLEDGE_BUSINESS: 财经商业 + - KNOWLEDGE_CAMPUS: 校园学习 + - KNOWLEDGE_CAREER: 职业职场 + - KNOWLEDGE_DESIGN: 设计·创意 + - KNOWLEDGE_SKILL: 野生技能协会 + - TECH: 科技 + - TECH_DIGITAL: 数码 + - TECH_APPLICATION: 软件应用 + - TECH_COMPUTER_TECH: 计算机技术 + - TECH_INDUSTRY: 科工机械 + - INFORMATION: 资讯 + - INFORMATION_HOTSPOT: 热点 + - INFORMATION_GLOBAL: 环球 + - INFORMATION_SOCIAL: 社会 + - INFORMATION_MULTIPLE: 综合 + - FOOD: 美食 + - FOOD_MAKE: 美食制作 + - FOOD_DETECTIVE: 美食侦探 + - FOOD_MEASUREMENT: 美食测评 + - FOOD_RURAL: 田园美食 + - FOOD_RECORD: 美食记录 + - LIFE: 生活 + - LIFE_FUNNY: 搞笑 + - LIFE_TRAVEL: 出行 + - LIFE_RURALLIFE: 三农 + - LIFE_HOME: 家居房产 + - LIFE_HANDMAKE: 手工 + - LIFE_PAINTING: 绘画 + - LIFE_DAILY: 日常 + - CAR: 汽车 + - CAR_RACING: 赛车 + - CAR_MODIFIEDVEHICLE: 改装玩车 + - CAR_NEWENERGYVEHICLE: 新能源车 + - CAR_TOURINGCAR: 房车 + - CAR_MOTORCYCLE: 摩托车 + - CAR_STRATEGY: 购车攻略 + - CAR_LIFE: 汽车生活 + - FASHION: 时尚 + - FASHION_MAKEUP: 美妆护肤 + - FASHION_COS: 仿妆cos + - FASHION_CLOTHING: 穿搭 + - FASHION_TREND: 时尚潮流 + - SPORTS: 运动 + - SPORTS_BASKETBALL: 篮球 + - SPORTS_FOOTBALL: 足球 + - SPORTS_AEROBICS: 健身 + - SPORTS_ATHLETIC: 竞技体育 + - SPORTS_CULTURE: 运动文化 + - SPORTS_COMPREHENSIVE: 运动综合 + - ANIMAL: 动物圈 + - ANIMAL_CAT: 喵星人 + - ANIMAL_DOG: 汪星人 + - ANIMAL_PANDA: 大熊猫 + - ANIMAL_WILD_ANIMAL: 野生动物 + - ANIMAL_REPTILES: 爬宠 + - ANIMAL_COMPOSITE: 动物综合 + - VLOG: VLOG + """ + + MAINPAGE = 0 + + ANIME = 13 + ANIME_SERIAL = 33 + ANIME_FINISH = 32 + ANIME_INFORMATION = 51 + ANIME_OFFICAL = 152 + + MOVIE = 23 + + GUOCHUANG = 167 + GUOCHUANG_CHINESE = 153 + GUOCHUANG_ORIGINAL = 168 + GUOCHUANG_PUPPETRY = 169 + GUOCHUANG_MOTIONCOMIC = 195 + GUOCHUANG_INFORMATION = 170 + + TELEPLAY = 11 + + DOCUMENTARY = 177 + + DOUGA = 1 + DOUGA_MAD = 24 + DOUGA_MMD = 25 + DOUGA_VOICE = 47 + DOUGA_GARAGE_KIT = 210 + DOUGA_TOKUSATSU = 86 + DOUGA_ACGNTALKS = 253 + DOUGA_OTHER = 27 + + GAME = 4 + GAME_STAND_ALONE = 17 + GAME_ESPORTS = 171 + GAME_MOBILE = 172 + GAME_ONLINE = 65 + GAME_BOARD = 173 + GAME_GMV = 121 + GAME_MUSIC = 136 + GAME_MUGEN = 19 + + KICHIKU = 119 + KICHIKU_GUIDE = 22 + KICHIKU_MAD = 26 + KICHIKU_MANUAL_VOCALOID = 126 + KICHIKU_THEATRE = 216 + KICHIKU_COURSE = 127 + + MUSIC = 3 + MUSIC_ORIGINAL = 28 + MUSIC_COVER = 31 + MUSIC_PERFORM = 59 + MUSIC_VOCALOID = 30 + MUSIC_LIVE = 29 + MUSIC_MV = 193 + MUSIC_COMMENTARY = 243 + MUSIC_TUTORIAL = 244 + MUSIC_OTHER = 130 + + DANCE = 129 + DANCE_OTAKU = 20 + DANCE_HIPHOP = 198 + DANCE_STAR = 199 + DANCE_CHINA = 200 + DANCE_THREE_D = 154 + DANCE_DEMO = 156 + + CINEPHILE = 181 + CINEPHILE_CINECISM = 182 + CINEPHILE_MONTAGE = 183 + CINEPHILE_SHORTFILM = 85 + CINEPHILE_TRAILER_INFO = 184 + + ENT = 5 + ENT_VARIETY = 71 + ENT_TALKER = 241 + ENT_FANS = 242 + ENT_CELEBRITY = 137 + + KNOWLEDGE = 36 + KNOWLEDGE_SCIENCE = 201 + KNOWLEDGE_SOCIAL_SCIENCE = 124 + KNOWLEDGE_HUMANITY_HISTORY = 228 + KNOWLEDGE_BUSINESS = 207 + KNOWLEDGE_CAMPUS = 208 + KNOWLEDGE_CAREER = 209 + KNOWLEDGE_DESIGN = 229 + KNOWLEDGE_SKILL = 122 + + TECH = 188 + TECH_DIGITAL = 95 + TECH_APPLICATION = 230 + TECH_COMPUTER_TECH = 231 + TECH_INDUSTRY = 232 + + INFORMATION = 202 + INFORMATION_HOTSPOT = 203 + INFORMATION_GLOBAL = 204 + INFORMATION_SOCIAL = 205 + INFORMATION_MULTIPLE = 206 + + FOOD = 211 + FOOD_MAKE = 76 + FOOD_DETECTIVE = 212 + FOOD_MEASUREMENT = 213 + FOOD_RURAL = 214 + FOOD_RECORD = 215 + + LIFE = 160 + LIFE_FUNNY = 138 + LIFE_TRAVEL = 250 + LIFE_RURALLIFE = 251 + LIFE_HOME = 239 + LIFE_HANDMAKE = 161 + LIFE_PAINTING = 162 + LIFE_DAILY = 21 + + CAR = 223 + CAR_RACING = 245 + CAR_MODIFIEDVEHICLE = 246 + CAR_NEWENERGYVEHICLE = 247 + CAR_TOURINGCAR = 248 + CAR_MOTORCYCLE = 240 + CAR_STRATEGY = 227 + CAR_LIFE = 176 + + FASHION = 155 + FASHION_MAKEUP = 157 + FASHION_COS = 252 + FASHION_CLOTHING = 158 + FASHION_TREND = 159 + + SPORTS = 234 + SPORTS_BASKETBALL = 235 + SPORTS_FOOTBALL = 249 + SPORTS_AEROBICS = 164 + SPORTS_ATHLETIC = 236 + SPORTS_CULTURE = 237 + SPORTS_COMPREHENSIVE = 238 + + ANIMAL = 217 + ANIMAL_CAT = 218 + ANIMAL_DOG = 219 + ANIMAL_PANDA = 220 + ANIMAL_WILD_ANIMAL = 221 + ANIMAL_REPTILES = 222 + ANIMAL_COMPOSITE = 75 + + VLOG = 19 diff --git a/utils/content_deduplication.py b/utils/content_deduplication.py new file mode 100644 index 00000000..2eaf0e65 --- /dev/null +++ b/utils/content_deduplication.py @@ -0,0 +1,134 @@ +""" +内容去重工具 +专注于视频内容的多维度去重判断 +""" +from typing import List, Dict, Any, Optional +import logging + +logger = logging.getLogger(__name__) + +class ContentDeduplication: + """内容去重工具类""" + + @staticmethod + def is_content_duplicate( + new_content: Dict[str, Any], + existing_content: Optional[Dict[str, Any]] + ) -> bool: + """ + 判断内容是否重复 + + Args: + new_content: 新抓取的内容,包含: + - account_id: 账号ID + - title: 标题 + - tags: 标签列表 + - mentions: @用户列表 + existing_content: 数据库中已存在的内容,包含相同的字段 + 如果为None,表示数据库中不存在该内容 + + Returns: + bool: 是否重复 + """ + # 如果数据库中不存在,则一定不重复 + if not existing_content: + return False + + try: + # 1. 检查账号ID + if new_content['account_id'] != existing_content['account_id']: + return False + + # 2. 检查标题、标签列表、@用户列表和发布时间是否都匹配 + # 将标题、标签、@用户都转换为小写进行比较,避免大小写差异 + new_title = new_content['title'].lower().strip() + existing_title = existing_content['title'].lower().strip() + + # 检查发布时间 + new_publish_time = new_content.get('publish_time', '').strip() + existing_publish_time = existing_content.get('publish_time', '').strip() + + # 确保 tags 和 mentions 字段存在且为列表 + new_tags = set(tag.lower().strip() for tag in (new_content.get('tags') or [])) + existing_tags = set(tag.lower().strip() for tag in (existing_content.get('tags') or [])) + + new_mentions = set(mention.lower().strip() for mention in (new_content.get('mentions') or [])) + existing_mentions = set(mention.lower().strip() for mention in (existing_content.get('mentions') or [])) + + # 所有字段都必须匹配 + title_match = new_title == existing_title + publish_time_match = new_publish_time == existing_publish_time + tags_match = new_tags == existing_tags + mentions_match = new_mentions == existing_mentions + + is_duplicate = title_match and publish_time_match and tags_match and mentions_match + + if is_duplicate: + logger.info(f"检测到完全重复内容:") + logger.info(f"- 标题匹配: {new_title}") + logger.info(f"- 发布时间匹配: {new_publish_time}") + logger.info(f"- 标签匹配: {new_tags}") + logger.info(f"- @用户匹配: {new_mentions}") + else: + # 记录不匹配的原因 + if not title_match: + logger.info(f"标题不匹配: 新标题[{new_title}] != 已存在标题[{existing_title}]") + if not publish_time_match: + logger.info(f"发布时间不匹配: 新发布时间[{new_publish_time}] != 已存在发布时间[{existing_publish_time}]") + if not tags_match: + logger.info(f"标签不匹配: 新标签{new_tags} != 已存在标签{existing_tags}") + if not mentions_match: + logger.info(f"@用户不匹配: 新@用户{new_mentions} != 已存在@用户{existing_mentions}") + + logger.debug(f"新内容: {new_content}") + logger.debug(f"已存在内容: {existing_content}") + + return is_duplicate + + except KeyError as e: + logger.error(f"内容格式错误,缺少必要字段: {str(e)}") + return False + except Exception as e: + logger.error(f"去重判断过程出错: {str(e)}") + return False + + @staticmethod + def should_update_content( + new_content: Dict[str, Any], + existing_content: Dict[str, Any] + ) -> bool: + """ + 判断是否需要更新内容 + + Args: + new_content: 新抓取的内容 + existing_content: 数据库中已存在的内容 + + Returns: + bool: 是否需要更新 + """ + try: + # 检查统计数据是否有变化 + if 'stats' in new_content and 'stats' in existing_content: + new_stats = new_content['stats'] + existing_stats = existing_content['stats'] + + # 任何一个统计数据变化,都需要更新 + if (new_stats.get('plays', 0) != existing_stats.get('plays', 0) or + new_stats.get('likes', 0) != existing_stats.get('likes', 0) or + new_stats.get('comments', 0) != existing_stats.get('comments', 0) or + new_stats.get('shares', 0) != existing_stats.get('shares', 0)): + logger.info(f"检测到统计数据变化,需要更新: {new_content['title']}") + return True + + # 检查状态是否变化 + if new_content.get('status') != existing_content.get('status'): + logger.info(f"检测到状态变化,需要更新: {new_content['title']}") + return True + + logger.info(f"内容无需更新: {new_content['title']}") + return False + + except Exception as e: + logger.error(f"更新判断过程出错: {str(e)}") + return False \ No newline at end of file diff --git a/utils/cookie_helper.py b/utils/cookie_helper.py new file mode 100644 index 00000000..d6f4be65 --- /dev/null +++ b/utils/cookie_helper.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- +import json +import shutil +from pathlib import Path +from typing import Optional +from datetime import datetime + +from utils.log import logger + +class CookieHelper: + """Cookie 工具类""" + + @staticmethod + def backup_cookie_file(cookie_path: str) -> Optional[str]: + """备份 Cookie 文件 + + Args: + cookie_path: Cookie 文件路径 + + Returns: + Optional[str]: 备份文件路径,失败返回 None + """ + try: + cookie_file = Path(cookie_path) + if not cookie_file.exists(): + return None + + backup_path = f"{cookie_path}.{datetime.now().strftime('%Y%m%d_%H%M%S')}.bak" + shutil.copy2(cookie_path, backup_path) + logger.info(f"已创建Cookie文件备份: {backup_path}") + return backup_path + except Exception as e: + logger.error(f"备份Cookie文件失败: {str(e)}") + return None + + @staticmethod + def validate_cookie_file(cookie_path: str, max_size: int = 100 * 1024) -> bool: + """验证 Cookie 文件 + + Args: + cookie_path: Cookie 文件路径 + max_size: 最大文件大小(字节) + + Returns: + bool: 文件是否有效 + """ + try: + cookie_file = Path(cookie_path) + + # 检查文件是否存在 + if not cookie_file.exists(): + logger.error(f"Cookie文件不存在: {cookie_path}") + return False + + # 检查文件大小 + file_size = cookie_file.stat().st_size + if file_size > max_size: + logger.error(f"Cookie文件过大: {file_size} bytes") + return False + + # 检查是否为空 + if file_size == 0: + logger.error("Cookie文件为空") + return False + + # 验证JSON格式 + with open(cookie_file, 'r', encoding='utf-8') as f: + json.load(f) + + return True + except json.JSONDecodeError: + logger.error(f"Cookie文件不是有效的JSON格式: {cookie_path}") + return False + except Exception as e: + logger.error(f"验证Cookie文件失败: {str(e)}") + return False \ No newline at end of file diff --git a/utils/files_times.py b/utils/files_times.py index a1a6cccf..c93efd8d 100644 --- a/utils/files_times.py +++ b/utils/files_times.py @@ -1,83 +1,91 @@ -from datetime import timedelta - -from datetime import datetime -from pathlib import Path - -from conf import BASE_DIR - - -def get_absolute_path(relative_path: str, base_dir: str = None) -> str: - # Convert the relative path to an absolute path - absolute_path = Path(BASE_DIR) / base_dir / relative_path - return str(absolute_path) - - -def get_title_and_hashtags(filename): - """ - 获取视频标题和 hashtag - - Args: - filename: 视频文件名 - - Returns: - 视频标题和 hashtag 列表 - """ - - # 获取视频标题和 hashtag txt 文件名 - txt_filename = filename.replace(".mp4", ".txt") - - # 读取 txt 文件 - with open(txt_filename, "r", encoding="utf-8") as f: - content = f.read() - - # 获取标题和 hashtag - splite_str = content.strip().split("\n") - title = splite_str[0] - hashtags = splite_str[1].replace("#", "").split(" ") - - return title, hashtags - - -def generate_schedule_time_next_day(total_videos, videos_per_day, daily_times=None, timestamps=False, start_days=0): - """ - Generate a schedule for video uploads, starting from the next day. - - Args: - - total_videos: Total number of videos to be uploaded. - - videos_per_day: Number of videos to be uploaded each day. - - daily_times: Optional list of specific times of the day to publish the videos. - - timestamps: Boolean to decide whether to return timestamps or datetime objects. - - start_days: Start from after start_days. - - Returns: - - A list of scheduling times for the videos, either as timestamps or datetime objects. - """ - if videos_per_day <= 0: - raise ValueError("videos_per_day should be a positive integer") - - if daily_times is None: - # Default times to publish videos if not provided - daily_times = [6, 11, 14, 16, 22] - - if videos_per_day > len(daily_times): - raise ValueError("videos_per_day should not exceed the length of daily_times") - - # Generate timestamps - schedule = [] - current_time = datetime.now() - - for video in range(total_videos): - day = video // videos_per_day + start_days + 1 # +1 to start from the next day - daily_video_index = video % videos_per_day - - # Calculate the time for the current video - hour = daily_times[daily_video_index] - time_offset = timedelta(days=day, hours=hour - current_time.hour, minutes=-current_time.minute, - seconds=-current_time.second, microseconds=-current_time.microsecond) - timestamp = current_time + time_offset - - schedule.append(timestamp) - - if timestamps: - schedule = [int(time.timestamp()) for time in schedule] - return schedule +from datetime import timedelta + +from datetime import datetime +from pathlib import Path + +from conf import BASE_DIR + + +def get_absolute_path(relative_path: str, base_dir: str = None) -> str: + # Convert the relative path to an absolute path + absolute_path = Path(BASE_DIR) / base_dir / relative_path + return str(absolute_path) + + +def get_title_and_hashtags(file_path: str) -> tuple[str, list, list]: + """ + 从文件名中提取标题和标签 + + Args: + file_path: 文件路径 + + Returns: + tuple: (标题, 标签列表, @提及列表) + """ + # 获取文件名(不含扩展名) + filename = Path(file_path).stem + + # 分离标题和标签 + parts = filename.split('#') + title = parts[0].strip() + + # 提取标签和@提及 + tags = [] + mentions = [] + + if len(parts) > 1: + # 处理标签部分 + tag_part = parts[1] + # 分离标签和@提及 + for item in tag_part.split(): + if item.startswith('@'): + mentions.append(item[1:]) # 去掉@符号 + else: + tags.append(item) + + return title, tags, mentions + + +def generate_schedule_time_next_day(total_videos, videos_per_day, daily_times=None, timestamps=False, start_days=0): + """ + 生成从第二天开始的视频上传时间表。 + + Args: + - total_videos: 需要上传的视频总数。 + - videos_per_day: 每天上传的视频数量。 + - daily_times: 可选的每天发布视频的具体时间列表。 + - timestamps: 布尔值,决定是返回时间戳还是 datetime 对象。 + - start_days: 从多少天后开始。 + + Returns: + - 视频的计划时间列表,可以是时间戳或 datetime 对象。 + """ + if videos_per_day <= 0: + raise ValueError("videos_per_day 应该是一个正整数") + + if daily_times is None: + # 如果未提供发布时间,使用默认时间 + daily_times = [6, 11, 14, 16, 22] + + if videos_per_day > len(daily_times): + raise ValueError("每天上传的视频数量不能超过每日发布时间点的数量") + + # 生成时间戳 + schedule = [] + current_time = datetime.now() + + for video in range(total_videos): + day = video // videos_per_day + start_days + 1 # +1 表示从第二天开始 + daily_video_index = video % videos_per_day + + # 计算当前视频的发布时间 + hour = daily_times[daily_video_index] + time_offset = timedelta(days=day, hours=hour - current_time.hour, minutes=-current_time.minute, + seconds=-current_time.second, microseconds=-current_time.microsecond) + timestamp = current_time + time_offset + + schedule.append(timestamp) + + if timestamps: + schedule = [int(time.timestamp()) for time in schedule] + return schedule diff --git a/utils/log.py b/utils/log.py index fa63a8ca..b6484ec9 100644 --- a/utils/log.py +++ b/utils/log.py @@ -1,51 +1,52 @@ -from pathlib import Path -from sys import stdout -from loguru import logger - -from conf import BASE_DIR - - -def log_formatter(record: dict) -> str: - """ - Formatter for log records. - :param dict record: Log object containing log metadata & message. - :returns: str - """ - colors = { - "TRACE": "#cfe2f3", - "INFO": "#9cbfdd", - "DEBUG": "#8598ea", - "WARNING": "#dcad5a", - "SUCCESS": "#3dd08d", - "ERROR": "#ae2c2c" - } - color = colors.get(record["level"].name, "#b3cfe7") - return f"{{time:YYYY-MM-DD HH:mm:ss}} | {{level}}: {{message}}\n" - - -def create_logger(log_name: str, file_path: str): - """ - Create custom logger for different business modules. - :param str log_name: name of log - :param str file_path: Optional path to log file - :returns: Configured logger - """ - def filter_record(record): - return record["extra"].get("business_name") == log_name - - Path(BASE_DIR / file_path).parent.mkdir(exist_ok=True) - logger.add(Path(BASE_DIR / file_path), filter=filter_record, level="INFO", rotation="10 MB", retention="10 days", backtrace=True, diagnose=True) - return logger.bind(business_name=log_name) - - -# Remove all existing handlers -logger.remove() -# Add a standard console handler -logger.add(stdout, colorize=True, format=log_formatter) - -douyin_logger = create_logger('douyin', 'logs/douyin.log') -tencent_logger = create_logger('tencent', 'logs/tencent.log') -xhs_logger = create_logger('xhs', 'logs/xhs.log') -tiktok_logger = create_logger('tiktok', 'logs/tiktok.log') -bilibili_logger = create_logger('bilibili', 'logs/bilibili.log') -kuaishou_logger = create_logger('kuaishou', 'logs/kuaishou.log') +from pathlib import Path +from sys import stdout +from loguru import logger + +from conf import BASE_DIR + + +def log_formatter(record: dict) -> str: + """ + Formatter for log records. + :param dict record: Log object containing log metadata & message. + :returns: str + """ + colors = { + "TRACE": "#cfe2f3", + "INFO": "#9cbfdd", + "DEBUG": "#8598ea", + "WARNING": "#dcad5a", + "SUCCESS": "#3dd08d", + "ERROR": "#ae2c2c" + } + color = colors.get(record["level"].name, "#b3cfe7") + return f"{{time:YYYY-MM-DD HH:mm:ss}} | {{level}}: {{message}}\n" + + +def create_logger(log_name: str, file_path: str): + """ + Create custom logger for different business modules. + :param str log_name: name of log + :param str file_path: Optional path to log file + :returns: Configured logger + """ + def filter_record(record): + return record["extra"].get("business_name") == log_name + + Path(BASE_DIR / file_path).parent.mkdir(exist_ok=True) + logger.add(Path(BASE_DIR / file_path), filter=filter_record, level="INFO", rotation="10 MB", retention="10 days", backtrace=True, diagnose=True) + return logger.bind(business_name=log_name) + + +# Remove all existing handlers +logger.remove() +# Add a standard console handler +logger.add(stdout, colorize=True, format=log_formatter) + +douyin_logger = create_logger('douyin', 'logs/douyin.log') +tencent_logger = create_logger('tencent', 'logs/tencent.log') +xhs_logger = create_logger('xhs', 'logs/xhs.log') +tiktok_logger = create_logger('tiktok', 'logs/tiktok.log') +bilibili_logger = create_logger('bilibili', 'logs/bilibili.log') +kuaishou_logger = create_logger('kuaishou', 'logs/kuaishou.log') +sqlite_logger = create_logger('sqlite', 'logs/sqlite.log') \ No newline at end of file diff --git a/utils/playwright_helper.py b/utils/playwright_helper.py new file mode 100644 index 00000000..2362d41f --- /dev/null +++ b/utils/playwright_helper.py @@ -0,0 +1,217 @@ +# -*- coding: utf-8 -*- +import subprocess +from typing import Optional, Dict +from pathlib import Path +from playwright.async_api import async_playwright +import asyncio +from contextlib import asynccontextmanager + +from utils.log import logger + +class PlaywrightHelper: + """Playwright 工具类""" + + def __init__(self): + """初始化 PlaywrightHelper""" + self._context = None + self._browser = None + self._playwright = None + + @staticmethod + def install_browser(browser_type: str = "chromium") -> bool: + """安装 Playwright 浏览器 + + Args: + browser_type: 浏览器类型,默认为 chromium + + Returns: + bool: 是否安装成功 + """ + try: + logger.info(f"正在安装 Playwright {browser_type} 浏览器...") + subprocess.run(["playwright", "install", browser_type], check=True) + logger.success(f"Playwright {browser_type} 浏览器安装成功!") + return True + except subprocess.CalledProcessError as e: + logger.error(f"安装失败: {str(e)}") + return False + except Exception as e: + logger.error(f"安装过程发生异常: {str(e)}") + return False + + async def initialize(self, browser_type: str = "chromium", user_data_dir: Optional[str] = None, storage_state: Optional[Dict] = None): + """初始化浏览器资源 + + Args: + browser_type: 浏览器类型,默认为 chromium + user_data_dir: 用户数据目录路径,如果提供则使用持久化上下文 + storage_state: 浏览器状态,包含 cookies 和 localStorage + """ + try: + self._playwright = await async_playwright().start() + browser_args = [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-infobars', + '--window-position=0,0', + '--ignore-certifcate-errors', + '--ignore-certifcate-errors-spki-list', + '--disable-background-networking', + '--disable-background-timer-throttling', + '--disable-backgrounding-occluded-windows', + '--disable-breakpad', + '--disable-client-side-phishing-detection', + '--disable-component-update', + '--disable-default-apps', + '--disable-dev-shm-usage', + '--disable-domain-reliability', + '--disable-extensions', + '--disable-features=AudioServiceOutOfProcess', + '--disable-hang-monitor', + '--disable-ipc-flooding-protection', + '--disable-notifications', + '--disable-offer-store-unmasked-wallet-cards', + '--disable-popup-blocking', + '--disable-print-preview', + '--disable-prompt-on-repost', + '--disable-renderer-backgrounding', + '--disable-speech-api', + '--disable-sync', + '--disable-web-security', + '--disk-cache-size=33554432', + '--hide-scrollbars', + '--ignore-gpu-blacklist', + '--metrics-recording-only', + '--mute-audio', + '--no-default-browser-check', + '--no-first-run', + '--no-pings', + '--no-zygote', + '--password-store=basic', + '--use-gl=swiftshader', + '--use-mock-keychain', + '--window-size=1920,1080', + ] + + context_params = { + 'viewport': {'width': 1920, 'height': 1080}, + 'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', + 'locale': 'zh-CN', + 'timezone_id': 'Asia/Shanghai', + 'geolocation': {'latitude': 39.9042, 'longitude': 116.4074}, # 北京坐标 + 'permissions': ['geolocation'], + 'color_scheme': 'light', + 'device_scale_factor': 1, + 'is_mobile': False, + 'has_touch': False, + 'java_script_enabled': True, + 'bypass_csp': True, + 'proxy': None # 如果需要代理可以在这里设置 + } + + if user_data_dir: + logger.info(f"尝试使用持久化上下文目录: {user_data_dir}") + if not Path(user_data_dir).exists(): + logger.warning(f"持久化目录不存在: {user_data_dir}") + else: + logger.info(f"持久化目录存在,检查内容...") + # 列出目录内容 + files = list(Path(user_data_dir).glob('*')) + logger.info(f"目录内容: {[f.name for f in files]}") + + # 使用持久化上下文 + persistent_context_params = { + 'user_data_dir': user_data_dir, + 'headless': False, # 默认关闭无头模式 + 'args': browser_args, + 'ignore_default_args': ['--enable-automation'], # 禁用自动化标记 + 'accept_downloads': True, # 允许下载 + 'bypass_csp': True, # 绕过内容安全策略 + 'viewport': context_params['viewport'], + 'locale': context_params['locale'], + 'timezone_id': context_params['timezone_id'], + 'permissions': context_params['permissions'], + 'persistent': True, # 确保是持久化的 + } + + logger.info("创建持久化上下文...") + self._context = await self._playwright.chromium.launch_persistent_context(**persistent_context_params) + self._browser = self._context.browser + logger.success("持久化上下文创建成功") + else: + # 创建普通浏览器实例 + logger.info("创建普通浏览器实例...") + self._browser = await self._playwright.chromium.launch( + headless=False, # 默认关闭无头模式 + args=browser_args, + ignore_default_args=['--enable-automation'] # 禁用自动化标记 + ) + + if storage_state: + logger.info("使用提供的存储状态...") + context_params['storage_state'] = storage_state + + self._context = await self._browser.new_context(**context_params) + logger.success("普通浏览器上下文创建成功") + + # 注入 stealth.js 脚本 + try: + # utils/stealth.min.js打开失败, + with open('utils/stealth.min.js', 'r', encoding='utf-8') as f: + stealth_js = f.read() + await self._context.add_init_script(stealth_js) + logger.success("注入 stealth.js 成功") + except Exception as e: + logger.error(f"注入 stealth.js 失败: {str(e)}") + raise + + # 检查上下文状态 + try: + state = await self._context.storage_state() + cookies_count = len(state.get('cookies', [])) + origins_count = len(state.get('origins', [])) + logger.info(f"上下文状态: {cookies_count} cookies, {origins_count} origins") + except Exception as e: + logger.error(f"检查上下文状态失败: {str(e)}") + + logger.success("浏览器资源初始化成功") + except Exception as e: + logger.error(f"初始化浏览器资源失败: {str(e)}") + await self.cleanup() + raise + + async def cleanup(self): + """清理浏览器资源""" + try: + if self._context: + # 不要关闭持久化上下文,只关闭页面 + if not isinstance(self._context.browser, type(None)): + # 这是普通上下文,可以关闭 + await self._context.close() + if self._browser and not isinstance(self._context.browser, type(None)): + # 只在非持久化上下文时关闭浏览器 + await self._browser.close() + if self._playwright: + await self._playwright.stop() + logger.success("浏览器资源清理完成") + except Exception as e: + logger.error(f"清理浏览器资源失败: {str(e)}") + raise + + @asynccontextmanager + async def get_context(self, browser_type: str = "chromium", user_data_dir: Optional[str] = None, storage_state: Optional[Dict] = None): + """获取浏览器上下文的上下文管理器 + + Args: + browser_type: 浏览器类型,默认为 chromium + user_data_dir: 用户数据目录路径,如果提供则使用持久化上下文 + storage_state: 浏览器状态,包含 cookies 和 localStorage + + Yields: + browser_context: 浏览器上下文 + """ + try: + await self.initialize(browser_type, user_data_dir, storage_state) + yield self._context + finally: + await self.cleanup() \ No newline at end of file diff --git a/utils/social_media_db.py b/utils/social_media_db.py new file mode 100644 index 00000000..ee9cd3f7 --- /dev/null +++ b/utils/social_media_db.py @@ -0,0 +1,482 @@ +"""社交媒体账号数据库管理模块 + +此模块提供了社交媒体账号信息的数据库操作接口,支持: +1. 账号信息的CRUD操作 +2. 账号状态管理 +3. Cookie文件关联 +4. 数据统计分析 +""" + +import json +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional, Union, Any + +from utils.sqlite_helper import SQLiteHelper +from utils.log import logger +from conf import BASE_DIR + +# Cookie文件大小限制 (100KB) +MAX_COOKIE_FILE_SIZE = 100 * 1024 + +class CookieError(Exception): + """Cookie相关错误""" + pass + +class SocialMediaDB: + """社交媒体账号数据库管理类 + + 提供了一系列方法来管理社交媒体账号信息,包括: + - 账号信息的增删改查 + - 账号状态更新 + - Cookie文件管理 + - 数据统计 + + 典型用法: + db = SocialMediaDB() + # 使用add_or_update_account来添加或更新账号 + db.add_or_update_account("tencent", "12345", {"nickname": "测试账号"}) + db.add_cookie("tencent", "12345", "cookies/test.json") + """ + + def __init__(self, db_path: Union[str, Path] = None): + """初始化数据库管理类 + + Args: + db_path: 数据库文件路径,默认为 BASE_DIR/data/social_media.db + """ + if db_path is None: + db_path = BASE_DIR / "data" / "social_media.db" + + self.db = SQLiteHelper(db_path) + self._initialize_db() + + def _initialize_db(self): + """初始化数据库表结构""" + # 创建账号表 + create_accounts_table_sql = """ + CREATE TABLE IF NOT EXISTS social_media_accounts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + platform TEXT NOT NULL, + account_id TEXT NOT NULL, + nickname TEXT NOT NULL, + video_count INTEGER DEFAULT 0, + follower_count INTEGER DEFAULT 0, + last_update TIMESTAMP, + status INTEGER DEFAULT 1, + extra TEXT, + UNIQUE(platform, account_id) + ) + """ + + # 创建cookie表 + create_cookies_table_sql = """ + CREATE TABLE IF NOT EXISTS account_cookies ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + platform TEXT NOT NULL, + account_id TEXT NOT NULL, + cookie_path TEXT NOT NULL, + is_valid INTEGER DEFAULT 1, + created_at TIMESTAMP, + last_check TIMESTAMP, + UNIQUE(platform, account_id, cookie_path), + FOREIGN KEY (platform, account_id) + REFERENCES social_media_accounts(platform, account_id) + ON DELETE CASCADE + ) + """ + + self.db.create_table(create_accounts_table_sql) + self.db.create_table(create_cookies_table_sql) + + def _validate_cookie_file(self, cookie_path: str) -> bool: + """验证cookie文件 + + 检查: + 1. 文件是否存在 + 2. 文件大小是否合理 + 3. 文件内容是否是有效的JSON + + Args: + cookie_path: cookie文件路径 + + Returns: + bool: 文件是否有效 + + Raises: + CookieError: 当cookie文件无效时 + """ + try: + cookie_file = Path(cookie_path) + if not cookie_file.exists(): + raise CookieError(f"Cookie文件不存在: {cookie_path}") + + # 检查文件大小 + if cookie_file.stat().st_size > MAX_COOKIE_FILE_SIZE: + raise CookieError(f"Cookie文件过大: {cookie_file.stat().st_size} bytes") + + # 验证JSON格式 + with open(cookie_file, 'r', encoding='utf-8') as f: + json.load(f) + + return True + except json.JSONDecodeError: + raise CookieError(f"Cookie文件不是有效的JSON格式: {cookie_path}") + except Exception as e: + raise CookieError(f"Cookie文件验证失败: {str(e)}") + + def add_cookie(self, platform: str, account_id: str, cookie_path: str) -> bool: + """添加账号的cookie信息 + + Args: + platform: 平台名称 + account_id: 平台账号ID + cookie_path: cookie文件路径 + + Returns: + bool: 是否添加成功 + """ + try: + # 检查账号是否存在 + if not self.get_account(platform, account_id): + logger.error(f"账号不存在: {platform}/{account_id}") + return False + + # 验证cookie文件 + try: + self._validate_cookie_file(cookie_path) + except CookieError as e: + logger.error(str(e)) + return False + + sql = """ + INSERT INTO account_cookies + (platform, account_id, cookie_path, created_at, last_check) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT(platform, account_id, cookie_path) + DO UPDATE SET + is_valid = 1, + last_check = excluded.last_check + """ + now = datetime.now() + self.db.execute(sql, (platform, account_id, cookie_path, now, now)) + return True + except Exception as e: + logger.error(f"添加cookie失败: {str(e)}") + return False + + def update_cookie_status(self, platform: str, account_id: str, + cookie_path: str, is_valid: bool) -> bool: + """更新cookie状态 + + Args: + platform: 平台名称 + account_id: 平台账号ID + cookie_path: cookie文件路径 + is_valid: 是否有效 + + Returns: + bool: 是否更新成功 + """ + try: + # 确保cookie_path是字符串 + cookie_path = str(cookie_path) + + sql = """ + UPDATE account_cookies + SET is_valid = ?, last_check = ? + WHERE platform = ? AND account_id = ? AND cookie_path = ? + """ + self.db.execute(sql, ( + 1 if is_valid else 0, + datetime.now(), + platform, account_id, cookie_path + )) + return True + except Exception as e: + logger.error(f"更新cookie状态失败: {str(e)}") + return False + + def get_valid_cookies(self, platform: str, account_id: str) -> List[str]: + """获取账号的有效cookie文件路径列表 + + Args: + platform: 平台名称 + account_id: 平台账号ID + + Returns: + List[str]: cookie文件路径列表 + """ + sql = """ + SELECT cookie_path + FROM account_cookies + WHERE platform = ? AND account_id = ? AND is_valid = 1 + ORDER BY last_check DESC + """ + results = self.db.query_all(sql, (platform, account_id)) + return [result['cookie_path'] for result in results] + + def update_account(self, platform: str, account_id: str, + updates: Dict[str, any]) -> bool: + """更新账号信息 + + Args: + platform: 平台名称 + account_id: 平台账号ID + updates: 需要更新的字段和值的字典 + + Returns: + bool: 是否更新成功 + """ + try: + # 构建UPDATE语句 + set_clause = ", ".join([f"{k} = ?" for k in updates.keys()]) + sql = f""" + UPDATE social_media_accounts + SET {set_clause}, last_update = ? + WHERE platform = ? AND account_id = ? + """ + + # 准备参数 + params = list(updates.values()) + params.extend([datetime.now(), platform, account_id]) + + self.db.execute(sql, tuple(params)) + return True + except Exception as e: + logger.error(f"更新账号失败: {str(e)}") + return False + + def get_account(self, platform: str, account_id: str) -> Optional[Dict]: + """获取账号信息 + + Args: + platform: 平台名称 + account_id: 平台账号ID + + Returns: + Dict: 账号信息字典,未找到返回None + """ + sql = """ + SELECT a.*, GROUP_CONCAT(c.cookie_path) as cookie_paths + FROM social_media_accounts a + LEFT JOIN account_cookies c + ON a.platform = c.platform + AND a.account_id = c.account_id + AND c.is_valid = 1 + WHERE a.platform = ? AND a.account_id = ? + GROUP BY a.platform, a.account_id + """ + result = self.db.query_one(sql, (platform, account_id)) + + if result: + if result.get('extra'): + result['extra'] = json.loads(result['extra']) + if result.get('cookie_paths'): + result['cookie_paths'] = result['cookie_paths'].split(',') + else: + result['cookie_paths'] = [] + + return result + + def get_all_accounts(self, platform: str = None) -> List[Dict]: + """获取所有账号信息 + + Args: + platform: 平台名称,为None时获取所有平台 + + Returns: + List[Dict]: 账号信息列表 + """ + if platform: + where_clause = "WHERE a.platform = ?" + params = (platform,) + else: + where_clause = "" + params = () + + sql = f""" + SELECT a.*, GROUP_CONCAT(c.cookie_path) as cookie_paths + FROM social_media_accounts a + LEFT JOIN account_cookies c + ON a.platform = c.platform + AND a.account_id = c.account_id + AND c.is_valid = 1 + {where_clause} + GROUP BY a.platform, a.account_id + """ + + results = self.db.query_all(sql, params) + + # 解析extra字段和cookie_paths + for result in results: + if result.get('extra'): + result['extra'] = json.loads(result['extra']) + if result.get('cookie_paths'): + result['cookie_paths'] = result['cookie_paths'].split(',') + else: + result['cookie_paths'] = [] + + return results + + def delete_account(self, platform: str, account_id: str) -> bool: + """删除账号 + + Args: + platform: 平台名称 + account_id: 平台账号ID + + Returns: + bool: 是否删除成功 + """ + try: + sql = "DELETE FROM social_media_accounts WHERE platform = ? AND account_id = ?" + self.db.execute(sql, (platform, account_id)) + return True + except Exception as e: + logger.error(f"删除账号失败: {str(e)}") + return False + + def update_account_status(self, platform: str, account_id: str, + status: int) -> bool: + """更新账号状态 + + Args: + platform: 平台名称 + account_id: 平台账号ID + status: 状态值(1:正常, 0:异常) + + Returns: + bool: 是否更新成功 + """ + return self.update_account(platform, account_id, {'status': status}) + + def get_platform_statistics(self, platform: str = None) -> Dict: + """获取平台账号统计信息 + + Args: + platform: 平台名称,为None时统计所有平台 + + Returns: + Dict: 统计信息字典 + """ + if platform: + where_clause = "WHERE platform = ?" + params = (platform,) + else: + where_clause = "" + params = () + + sql = f""" + SELECT + COUNT(*) as total_accounts, + SUM(video_count) as total_videos, + SUM(follower_count) as total_followers, + SUM(CASE WHEN status = 1 THEN 1 ELSE 0 END) as active_accounts + FROM social_media_accounts + {where_clause} + """ + + return self.db.query_one(sql, params) or {} + + + def get_account_verification_time(self, platform: str, nickname: str) -> Optional[datetime]: + """获取账号的验证时间 + + Args: + platform: 平台名称 + nickname: 账号昵称 + + Returns: + Optional[datetime]: 验证时间,未找到返回None + """ + sql = """ + SELECT ac.last_check + FROM account_cookies ac + INNER JOIN social_media_accounts sma + ON ac.platform = sma.platform + AND ac.account_id = sma.account_id + WHERE ac.platform = ? AND sma.nickname = ? + ORDER BY ac.last_check DESC + LIMIT 1 + """ + result = self.db.query_one(sql, (platform, nickname)) + return result['last_check'] if result else None + + def add_or_update_account(self, platform: str, account_id: str, info: Dict[str, Any]) -> bool: + """添加或更新账号信息 + + 如果账号不存在则添加新账号,如果已存在则更新账号信息 + + Args: + platform: 平台名称 + account_id: 账号ID + info: 账号信息字典,包含nickname, video_count, follower_count等字段 + + Returns: + bool: 是否操作成功 + """ + try: + # 检查账号是否存在 + sql_check = """ + SELECT COUNT(*) as count + FROM social_media_accounts + WHERE platform = ? AND account_id = ? + """ + result = self.db.query_one(sql_check, (platform, account_id)) + + if result['count'] > 0: + # 如果存在,执行更新 + updates = { + 'nickname': info.get('nickname', ''), + 'video_count': info.get('video_count', 0), + 'follower_count': info.get('follower_count', 0) + } + + # 如果有extra字段,添加到更新数据中 + if 'extra' in info: + updates['extra'] = json.dumps(info['extra'], ensure_ascii=False) + + # 构建UPDATE语句 + set_clause = ", ".join([f"{k} = ?" for k in updates.keys()]) + sql = f""" + UPDATE social_media_accounts + SET {set_clause}, last_update = ? + WHERE platform = ? AND account_id = ? + """ + + # 准备参数 + params = list(updates.values()) + params.extend([datetime.now(), platform, account_id]) + + self.db.execute(sql, tuple(params)) + logger.info(f"更新账号信息: {platform}/{account_id}") + + else: + # 如果不存在,执行插入 + sql = """ + INSERT INTO social_media_accounts + (platform, account_id, nickname, video_count, follower_count, last_update, extra) + VALUES (?, ?, ?, ?, ?, ?, ?) + """ + + self.db.execute(sql, ( + platform, + account_id, + info.get('nickname', ''), + info.get('video_count', 0), + info.get('follower_count', 0), + datetime.now(), + json.dumps(info.get('extra', {}), ensure_ascii=False) + )) + logger.info(f"添加新账号: {platform}/{account_id}") + + return True + + except Exception as e: + logger.error(f"添加/更新账号失败: {str(e)}") + return False + + def close(self): + """关闭数据库连接""" + self.db.close() \ No newline at end of file diff --git a/utils/sqlite_helper.py b/utils/sqlite_helper.py new file mode 100644 index 00000000..28b4e710 --- /dev/null +++ b/utils/sqlite_helper.py @@ -0,0 +1,390 @@ +"""SQLite数据库操作帮助类 + +此模块提供了同步和异步两种SQLite数据库操作接口,支持: +1. 数据库连接管理(同步/异步) +2. 事务处理(同步/异步) +3. 参数化查询 +4. 批量操作 +5. 线程安全操作 +""" + +import sqlite3 +import aiosqlite +from typing import Any, List, Dict, Optional, Union, Tuple +from contextlib import contextmanager, asynccontextmanager +import logging +from pathlib import Path +from threading import RLock +import asyncio + +from .log import sqlite_logger as logger + +class SQLiteHelper: + """同步SQLite数据库操作帮助类 + + 提供了一系列线程安全的同步SQLite数据库操作方法。 + 适用于同步代码环境或多线程场景。 + + 典型用法: + db = SQLiteHelper("example.db") + with db.connection() as conn: + db.execute("INSERT INTO users (name, age) VALUES (?, ?)", ("张三", 25)) + """ + + def __init__(self, db_path: Union[str, Path], check_same_thread: bool = True) -> None: + """初始化SQLite帮助类 + + Args: + db_path: 数据库文件路径 + check_same_thread: 是否检查同一线程 + """ + self.db_path = Path(db_path) + self.check_same_thread = check_same_thread + self._conn: Optional[sqlite3.Connection] = None + self._lock = RLock() # 使用重入锁,避免同一线程内重复加锁引起死锁 + self._initialize_db() + + def _initialize_db(self) -> None: + """初始化数据库连接""" + self.db_path.parent.mkdir(parents=True, exist_ok=True) + + @contextmanager + def connection(self): + """获取数据库连接的上下文管理器,线程安全 + + Returns: + sqlite3.Connection: 数据库连接对象 + + Example: + with db.connection() as conn: + conn.execute("SELECT * FROM users") + """ + with self._lock: # 使用线程锁保护连接操作 + if self._conn is None: + self._conn = sqlite3.connect( + self.db_path, + check_same_thread=self.check_same_thread + ) + self._conn.row_factory = sqlite3.Row + + try: + yield self._conn + except Exception as e: + self._conn.rollback() + logger.error(f"数据库操作错误: {str(e)}") + raise + + def execute(self, sql: str, parameters: Optional[Tuple[Any, ...]] = None) -> None: + """执行SQL语句,线程安全 + + Args: + sql: SQL语句 + parameters: SQL参数 + + Example: + db.execute("INSERT INTO users (name) VALUES (?)", ("张三",)) + """ + with self._lock: + with self.connection() as conn: + try: + if parameters: + conn.execute(sql, parameters) + else: + conn.execute(sql) + conn.commit() + except Exception as e: + logger.error(f"执行SQL错误: {sql}, 参数: {parameters}, 错误: {str(e)}") + raise + + def execute_many(self, sql: str, parameters: List[Tuple[Any, ...]]) -> None: + """批量执行SQL语句,线程安全 + + Args: + sql: SQL语句 + parameters: SQL参数列表 + + Example: + db.execute_many( + "INSERT INTO users (name, age) VALUES (?, ?)", + [("张三", 25), ("李四", 30)] + ) + """ + with self._lock: + with self.connection() as conn: + try: + conn.executemany(sql, parameters) + conn.commit() + except Exception as e: + logger.error(f"批量执行SQL错误: {sql}, 参数: {parameters}, 错误: {str(e)}") + raise + + def query_one(self, sql: str, parameters: Optional[Tuple[Any, ...]] = None) -> Optional[Dict[str, Any]]: + """查询单条记录,线程安全 + + Args: + sql: SQL查询语句 + parameters: SQL参数 + + Returns: + Dict[str, Any]: 查询结果字典,未找到返回None + + Example: + user = db.query_one("SELECT * FROM users WHERE id = ?", (1,)) + """ + with self._lock: + with self.connection() as conn: + try: + cursor = conn.execute(sql, parameters or ()) + row = cursor.fetchone() + return dict(row) if row else None + except Exception as e: + logger.error(f"查询单条记录错误: {sql}, 参数: {parameters}, 错误: {str(e)}") + raise + + def query_all(self, sql: str, parameters: Optional[Tuple[Any, ...]] = None) -> List[Dict[str, Any]]: + """查询多条记录,线程安全 + + Args: + sql: SQL查询语句 + parameters: SQL参数 + + Returns: + List[Dict[str, Any]]: 查询结果列表 + + Example: + users = db.query_all("SELECT * FROM users WHERE age > ?", (20,)) + """ + with self._lock: + with self.connection() as conn: + try: + cursor = conn.execute(sql, parameters or ()) + return [dict(row) for row in cursor.fetchall()] + except Exception as e: + logger.error(f"查询多条记录错误: {sql}, 参数: {parameters}, 错误: {str(e)}") + raise + + def table_exists(self, table_name: str) -> bool: + """检查表是否存在,线程安全 + + Args: + table_name: 表名 + + Returns: + bool: 表是否存在 + """ + sql = """ + SELECT name FROM sqlite_master + WHERE type='table' AND name=? + """ + return bool(self.query_one(sql, (table_name,))) + + def create_table(self, sql: str) -> None: + """创建表,线程安全 + + Args: + sql: 建表SQL语句 + + Example: + db.create_table(''' + CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + age INTEGER + ) + ''') + """ + with self._lock: + with self.connection() as conn: + try: + conn.execute(sql) + conn.commit() + except Exception as e: + logger.error(f"创建表错误: {sql}, 错误: {str(e)}") + raise + + def close(self) -> None: + """关闭数据库连接,线程安全""" + with self._lock: + if self._conn: + self._conn.close() + self._conn = None + +class AsyncSQLiteHelper: + """异步SQLite数据库操作帮助类 + + 提供了一系列异步的SQLite数据库操作方法,支持异步上下文管理器和事务处理。 + + 典型用法: + db = AsyncSQLiteHelper("example.db") + async with db.connection() as conn: + await db.execute("INSERT INTO users (name, age) VALUES (?, ?)", ("张三", 25)) + """ + + def __init__(self, db_path: Union[str, Path], check_same_thread: bool = True) -> None: + """初始化SQLite帮助类 + + Args: + db_path: 数据库文件路径 + check_same_thread: 是否检查同一线程 + """ + self.db_path = Path(db_path) + self.check_same_thread = check_same_thread + self._conn: Optional[aiosqlite.Connection] = None + self._lock = asyncio.Lock() # 添加异步锁保护连接 + self._initialize_db() + + def _initialize_db(self) -> None: + """初始化数据库连接""" + self.db_path.parent.mkdir(parents=True, exist_ok=True) + + @asynccontextmanager + async def connection(self): + """获取数据库连接的异步上下文管理器 + + Returns: + aiosqlite.Connection: 数据库连接对象 + + Example: + async with db.connection() as conn: + await conn.execute("SELECT * FROM users") + """ + async with self._lock: + if self._conn is None: + self._conn = await aiosqlite.connect( + self.db_path, + check_same_thread=self.check_same_thread + ) + self._conn.row_factory = aiosqlite.Row + + try: + yield self._conn + except Exception as e: + await self._conn.rollback() + logger.error(f"数据库操作错误: {str(e)}") + raise + + async def execute(self, sql: str, parameters: Optional[Tuple[Any, ...]] = None) -> None: + """异步执行SQL语句""" + async with self._lock: # 锁定整个操作过程 + async with self.connection() as conn: + try: + if parameters: + await conn.execute(sql, parameters) + else: + await conn.execute(sql) + await conn.commit() + except Exception as e: + logger.error( + f"执行SQL错误: {sql}, 参数: {parameters}, 错误: {str(e)}" + ) + raise + + async def execute_many(self, sql: str, parameters: List[Tuple[Any, ...]]) -> None: + """异步批量执行SQL语句 + + Args: + sql: SQL语句 + parameters: SQL参数列表 + + Example: + await db.execute_many( + "INSERT INTO users (name, age) VALUES (?, ?)", + [("张三", 25), ("李四", 30)] + ) + """ + async with self.connection() as conn: + try: + await conn.executemany(sql, parameters) + await conn.commit() + except Exception as e: + logger.error(f"批量执行SQL错误: {sql}, 参数: {parameters}, 错误: {str(e)}") + raise + + async def query_one(self, sql: str, parameters: Optional[Tuple[Any, ...]] = None) -> Optional[Dict[str, Any]]: + """异步查询单条记录 + + Args: + sql: SQL查询语句 + parameters: SQL参数 + + Returns: + Dict[str, Any]: 查询结果字典,未找到返回None + + Example: + user = await db.query_one("SELECT * FROM users WHERE id = ?", (1,)) + """ + async with self.connection() as conn: + try: + cursor = await conn.execute(sql, parameters or ()) + row = await cursor.fetchone() + return dict(row) if row else None + except Exception as e: + logger.error(f"查询单条记录错误: {sql}, 参数: {parameters}, 错误: {str(e)}") + raise + + async def query_all(self, sql: str, parameters: Optional[Tuple[Any, ...]] = None) -> List[Dict[str, Any]]: + """异步查询多条记录 + + Args: + sql: SQL查询语句 + parameters: SQL参数 + + Returns: + List[Dict[str, Any]]: 查询结果列表 + + Example: + users = await db.query_all("SELECT * FROM users WHERE age > ?", (20,)) + """ + async with self.connection() as conn: + try: + cursor = await conn.execute(sql, parameters or ()) + rows = await cursor.fetchall() + return [dict(row) for row in rows] + except Exception as e: + logger.error(f"查询多条记录错误: {sql}, 参数: {parameters}, 错误: {str(e)}") + raise + + async def table_exists(self, table_name: str) -> bool: + """异步检查表是否存在 + + Args: + table_name: 表名 + + Returns: + bool: 表是否存在 + """ + sql = """ + SELECT name FROM sqlite_master + WHERE type='table' AND name=? + """ + result = await self.query_one(sql, (table_name,)) + return bool(result) + + async def create_table(self, sql: str) -> None: + """异步创建表 + + Args: + sql: 建表SQL语句 + + Example: + await db.create_table(''' + CREATE TABLE IF NOT EXISTS users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + age INTEGER + ) + ''') + """ + async with self.connection() as conn: + try: + await conn.execute(sql) + await conn.commit() + except Exception as e: + logger.error(f"创建表错误: {sql}, 错误: {str(e)}") + raise + + async def close(self) -> None: + """异步关闭数据库连接""" + if self._conn: + await self._conn.close() + self._conn = None \ No newline at end of file diff --git a/utils/stealth.min.js b/utils/stealth.min.js index 166d1d74..3f114039 100644 --- a/utils/stealth.min.js +++ b/utils/stealth.min.js @@ -1,7 +1,7 @@ -/*! - * Note: Auto-generated, do not update manually. - * Generated by: https://github.com/berstend/puppeteer-extra/tree/master/packages/extract-stealth-evasions - * Generated on: Mon, 10 Jun 2024 06:21:08 GMT - * License: MIT - */ +/*! + * Note: Auto-generated, do not update manually. + * Generated by: https://github.com/berstend/puppeteer-extra/tree/master/packages/extract-stealth-evasions + * Generated on: Mon, 17 Feb 2025 06:23:51 GMT + * License: MIT + */ (({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:'utils => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, \'chrome\')`\n Object.defineProperty(window, \'chrome\', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We\'ll extend that later\n })\n }\n\n // That means we\'re running headful and don\'t need to mock anything\n if (\'app\' in window.chrome) {\n return // Nothing to do here\n }\n\n const makeError = {\n ErrorInInvocation: fn => {\n const err = new TypeError(`Error in invocation of app.${fn}()`)\n return utils.stripErrorWithAnchor(\n err,\n `at ${fn} (eval at `\n )\n }\n }\n\n // There\'s a some static data in that property which doesn\'t seem to change,\n // we should periodically check for updates: `JSON.stringify(window.app, null, 2)`\n const STATIC_DATA = JSON.parse(\n `\n{\n "isInstalled": false,\n "InstallState": {\n "DISABLED": "disabled",\n "INSTALLED": "installed",\n "NOT_INSTALLED": "not_installed"\n },\n "RunningState": {\n "CANNOT_RUN": "cannot_run",\n "READY_TO_RUN": "ready_to_run",\n "RUNNING": "running"\n }\n}\n `.trim()\n )\n\n window.chrome.app = {\n ...STATIC_DATA,\n\n get isInstalled() {\n return false\n },\n\n getDetails: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getDetails`)\n }\n return null\n },\n getIsInstalled: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`getIsInstalled`)\n }\n return false\n },\n runningState: function getDetails() {\n if (arguments.length) {\n throw makeError.ErrorInInvocation(`runningState`)\n }\n return \'cannot_run\'\n }\n }\n utils.patchToStringNested(window.chrome.app)\n }',_args:[]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"utils => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, 'chrome', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n })\n }\n\n // That means we're running headful and don't need to mock anything\n if ('csi' in window.chrome) {\n return // Nothing to do here\n }\n\n // Check that the Navigation Timing API v1 is available, we need that\n if (!window.performance || !window.performance.timing) {\n return\n }\n\n const { timing } = window.performance\n\n window.chrome.csi = function() {\n return {\n onloadT: timing.domContentLoadedEventEnd,\n startE: timing.navigationStart,\n pageT: Date.now() - timing.navigationStart,\n tran: 15 // Transition type or something\n }\n }\n utils.patchToString(window.chrome.csi)\n }",_args:[]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { opts }) => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, 'chrome', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n })\n }\n\n // That means we're running headful and don't need to mock anything\n if ('loadTimes' in window.chrome) {\n return // Nothing to do here\n }\n\n // Check that the Navigation Timing API v1 + v2 is available, we need that\n if (\n !window.performance ||\n !window.performance.timing ||\n !window.PerformancePaintTiming\n ) {\n return\n }\n\n const { performance } = window\n\n // Some stuff is not available on about:blank as it requires a navigation to occur,\n // let's harden the code to not fail then:\n const ntEntryFallback = {\n nextHopProtocol: 'h2',\n type: 'other'\n }\n\n // The API exposes some funky info regarding the connection\n const protocolInfo = {\n get connectionInfo() {\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ntEntry.nextHopProtocol\n },\n get npnNegotiatedProtocol() {\n // NPN is deprecated in favor of ALPN, but this implementation returns the\n // HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)\n ? ntEntry.nextHopProtocol\n : 'unknown'\n },\n get navigationType() {\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ntEntry.type\n },\n get wasAlternateProtocolAvailable() {\n // The Alternate-Protocol header is deprecated in favor of Alt-Svc\n // (https://www.mnot.net/blog/2016/03/09/alt-svc), so technically this\n // should always return false.\n return false\n },\n get wasFetchedViaSpdy() {\n // SPDY is deprecated in favor of HTTP/2, but this implementation returns\n // true for HTTP/2 or HTTP2+QUIC/39 as well.\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)\n },\n get wasNpnNegotiated() {\n // NPN is deprecated in favor of ALPN, but this implementation returns true\n // for HTTP/2 or HTTP2+QUIC/39 requests negotiated via ALPN.\n const ntEntry =\n performance.getEntriesByType('navigation')[0] || ntEntryFallback\n return ['h2', 'hq'].includes(ntEntry.nextHopProtocol)\n }\n }\n\n const { timing } = window.performance\n\n // Truncate number to specific number of decimals, most of the `loadTimes` stuff has 3\n function toFixed(num, fixed) {\n var re = new RegExp('^-?\\\\d+(?:.\\\\d{0,' + (fixed || -1) + '})?')\n return num.toString().match(re)[0]\n }\n\n const timingInfo = {\n get firstPaintAfterLoadTime() {\n // This was never actually implemented and always returns 0.\n return 0\n },\n get requestTime() {\n return timing.navigationStart / 1000\n },\n get startLoadTime() {\n return timing.navigationStart / 1000\n },\n get commitLoadTime() {\n return timing.responseStart / 1000\n },\n get finishDocumentLoadTime() {\n return timing.domContentLoadedEventEnd / 1000\n },\n get finishLoadTime() {\n return timing.loadEventEnd / 1000\n },\n get firstPaintTime() {\n const fpEntry = performance.getEntriesByType('paint')[0] || {\n startTime: timing.loadEventEnd / 1000 // Fallback if no navigation occured (`about:blank`)\n }\n return toFixed(\n (fpEntry.startTime + performance.timeOrigin) / 1000,\n 3\n )\n }\n }\n\n window.chrome.loadTimes = function() {\n return {\n ...protocolInfo,\n ...timingInfo\n }\n }\n utils.patchToString(window.chrome.loadTimes)\n }",_args:[{opts:{}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { opts, STATIC_DATA }) => {\n if (!window.chrome) {\n // Use the exact property descriptor found in headful Chrome\n // fetch it via `Object.getOwnPropertyDescriptor(window, 'chrome')`\n Object.defineProperty(window, 'chrome', {\n writable: true,\n enumerable: true,\n configurable: false, // note!\n value: {} // We'll extend that later\n })\n }\n\n // That means we're running headful and don't need to mock anything\n const existsAlready = 'runtime' in window.chrome\n // `chrome.runtime` is only exposed on secure origins\n const isNotSecure = !window.location.protocol.startsWith('https')\n if (existsAlready || (isNotSecure && !opts.runOnInsecureOrigins)) {\n return // Nothing to do here\n }\n\n window.chrome.runtime = {\n // There's a bunch of static data in that property which doesn't seem to change,\n // we should periodically check for updates: `JSON.stringify(window.chrome.runtime, null, 2)`\n ...STATIC_DATA,\n // `chrome.runtime.id` is extension related and returns undefined in Chrome\n get id() {\n return undefined\n },\n // These two require more sophisticated mocks\n connect: null,\n sendMessage: null\n }\n\n const makeCustomRuntimeErrors = (preamble, method, extensionId) => ({\n NoMatchingSignature: new TypeError(\n preamble + `No matching signature.`\n ),\n MustSpecifyExtensionID: new TypeError(\n preamble +\n `${method} called from a webpage must specify an Extension ID (string) for its first argument.`\n ),\n InvalidExtensionID: new TypeError(\n preamble + `Invalid extension id: '${extensionId}'`\n )\n })\n\n // Valid Extension IDs are 32 characters in length and use the letter `a` to `p`:\n // https://source.chromium.org/chromium/chromium/src/+/master:components/crx_file/id_util.cc;drc=14a055ccb17e8c8d5d437fe080faba4c6f07beac;l=90\n const isValidExtensionID = str =>\n str.length === 32 && str.toLowerCase().match(/^[a-p]+$/)\n\n /** Mock `chrome.runtime.sendMessage` */\n const sendMessageHandler = {\n apply: function(target, ctx, args) {\n const [extensionId, options, responseCallback] = args || []\n\n // Define custom errors\n const errorPreamble = `Error in invocation of runtime.sendMessage(optional string extensionId, any message, optional object options, optional function responseCallback): `\n const Errors = makeCustomRuntimeErrors(\n errorPreamble,\n `chrome.runtime.sendMessage()`,\n extensionId\n )\n\n // Check if the call signature looks ok\n const noArguments = args.length === 0\n const tooManyArguments = args.length > 4\n const incorrectOptions = options && typeof options !== 'object'\n const incorrectResponseCallback =\n responseCallback && typeof responseCallback !== 'function'\n if (\n noArguments ||\n tooManyArguments ||\n incorrectOptions ||\n incorrectResponseCallback\n ) {\n throw Errors.NoMatchingSignature\n }\n\n // At least 2 arguments are required before we even validate the extension ID\n if (args.length < 2) {\n throw Errors.MustSpecifyExtensionID\n }\n\n // Now let's make sure we got a string as extension ID\n if (typeof extensionId !== 'string') {\n throw Errors.NoMatchingSignature\n }\n\n if (!isValidExtensionID(extensionId)) {\n throw Errors.InvalidExtensionID\n }\n\n return undefined // Normal behavior\n }\n }\n utils.mockWithProxy(\n window.chrome.runtime,\n 'sendMessage',\n function sendMessage() {},\n sendMessageHandler\n )\n\n /**\n * Mock `chrome.runtime.connect`\n *\n * @see https://developer.chrome.com/apps/runtime#method-connect\n */\n const connectHandler = {\n apply: function(target, ctx, args) {\n const [extensionId, connectInfo] = args || []\n\n // Define custom errors\n const errorPreamble = `Error in invocation of runtime.connect(optional string extensionId, optional object connectInfo): `\n const Errors = makeCustomRuntimeErrors(\n errorPreamble,\n `chrome.runtime.connect()`,\n extensionId\n )\n\n // Behavior differs a bit from sendMessage:\n const noArguments = args.length === 0\n const emptyStringArgument = args.length === 1 && extensionId === ''\n if (noArguments || emptyStringArgument) {\n throw Errors.MustSpecifyExtensionID\n }\n\n const tooManyArguments = args.length > 2\n const incorrectConnectInfoType =\n connectInfo && typeof connectInfo !== 'object'\n\n if (tooManyArguments || incorrectConnectInfoType) {\n throw Errors.NoMatchingSignature\n }\n\n const extensionIdIsString = typeof extensionId === 'string'\n if (extensionIdIsString && extensionId === '') {\n throw Errors.MustSpecifyExtensionID\n }\n if (extensionIdIsString && !isValidExtensionID(extensionId)) {\n throw Errors.InvalidExtensionID\n }\n\n // There's another edge-case here: extensionId is optional so we might find a connectInfo object as first param, which we need to validate\n const validateConnectInfo = ci => {\n // More than a first param connectInfo as been provided\n if (args.length > 1) {\n throw Errors.NoMatchingSignature\n }\n // An empty connectInfo has been provided\n if (Object.keys(ci).length === 0) {\n throw Errors.MustSpecifyExtensionID\n }\n // Loop over all connectInfo props an check them\n Object.entries(ci).forEach(([k, v]) => {\n const isExpected = ['name', 'includeTlsChannelId'].includes(k)\n if (!isExpected) {\n throw new TypeError(\n errorPreamble + `Unexpected property: '${k}'.`\n )\n }\n const MismatchError = (propName, expected, found) =>\n TypeError(\n errorPreamble +\n `Error at property '${propName}': Invalid type: expected ${expected}, found ${found}.`\n )\n if (k === 'name' && typeof v !== 'string') {\n throw MismatchError(k, 'string', typeof v)\n }\n if (k === 'includeTlsChannelId' && typeof v !== 'boolean') {\n throw MismatchError(k, 'boolean', typeof v)\n }\n })\n }\n if (typeof extensionId === 'object') {\n validateConnectInfo(extensionId)\n throw Errors.MustSpecifyExtensionID\n }\n\n // Unfortunately even when the connect fails Chrome will return an object with methods we need to mock as well\n return utils.patchToStringNested(makeConnectResponse())\n }\n }\n utils.mockWithProxy(\n window.chrome.runtime,\n 'connect',\n function connect() {},\n connectHandler\n )\n\n function makeConnectResponse() {\n const onSomething = () => ({\n addListener: function addListener() {},\n dispatch: function dispatch() {},\n hasListener: function hasListener() {},\n hasListeners: function hasListeners() {\n return false\n },\n removeListener: function removeListener() {}\n })\n\n const response = {\n name: '',\n sender: undefined,\n disconnect: function disconnect() {},\n onDisconnect: onSomething(),\n onMessage: onSomething(),\n postMessage: function postMessage() {\n if (!arguments.length) {\n throw new TypeError(`Insufficient number of arguments.`)\n }\n throw new Error(`Attempting to use a disconnected port object`)\n }\n }\n return response\n }\n }",_args:[{opts:{runOnInsecureOrigins:!1},STATIC_DATA:{OnInstalledReason:{CHROME_UPDATE:"chrome_update",INSTALL:"install",SHARED_MODULE_UPDATE:"shared_module_update",UPDATE:"update"},OnRestartRequiredReason:{APP_UPDATE:"app_update",OS_UPDATE:"os_update",PERIODIC:"periodic"},PlatformArch:{ARM:"arm",ARM64:"arm64",MIPS:"mips",MIPS64:"mips64",X86_32:"x86-32",X86_64:"x86-64"},PlatformNaclArch:{ARM:"arm",MIPS:"mips",MIPS64:"mips64",X86_32:"x86-32",X86_64:"x86-64"},PlatformOs:{ANDROID:"android",CROS:"cros",LINUX:"linux",MAC:"mac",OPENBSD:"openbsd",WIN:"win"},RequestUpdateCheckStatus:{NO_UPDATE:"no_update",THROTTLED:"throttled",UPDATE_AVAILABLE:"update_available"}}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"utils => {\n /**\n * Input might look funky, we need to normalize it so e.g. whitespace isn't an issue for our spoofing.\n *\n * @example\n * video/webm; codecs=\"vp8, vorbis\"\n * video/mp4; codecs=\"avc1.42E01E\"\n * audio/x-m4a;\n * audio/ogg; codecs=\"vorbis\"\n * @param {String} arg\n */\n const parseInput = arg => {\n const [mime, codecStr] = arg.trim().split(';')\n let codecs = []\n if (codecStr && codecStr.includes('codecs=\"')) {\n codecs = codecStr\n .trim()\n .replace(`codecs=\"`, '')\n .replace(`\"`, '')\n .trim()\n .split(',')\n .filter(x => !!x)\n .map(x => x.trim())\n }\n return {\n mime,\n codecStr,\n codecs\n }\n }\n\n const canPlayType = {\n // Intercept certain requests\n apply: function(target, ctx, args) {\n if (!args || !args.length) {\n return target.apply(ctx, args)\n }\n const { mime, codecs } = parseInput(args[0])\n // This specific mp4 codec is missing in Chromium\n if (mime === 'video/mp4') {\n if (codecs.includes('avc1.42E01E')) {\n return 'probably'\n }\n }\n // This mimetype is only supported if no codecs are specified\n if (mime === 'audio/x-m4a' && !codecs.length) {\n return 'maybe'\n }\n\n // This mimetype is only supported if no codecs are specified\n if (mime === 'audio/aac' && !codecs.length) {\n return 'probably'\n }\n // Everything else as usual\n return target.apply(ctx, args)\n }\n }\n\n /* global HTMLMediaElement */\n utils.replaceWithProxy(\n HTMLMediaElement.prototype,\n 'canPlayType',\n canPlayType\n )\n }",_args:[]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { opts }) => {\n utils.replaceGetterWithProxy(\n Object.getPrototypeOf(navigator),\n 'hardwareConcurrency',\n utils.makeHandler().getterValue(opts.hardwareConcurrency)\n )\n }",_args:[{opts:{hardwareConcurrency:4}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { opts }) => {\n const languages = opts.languages.length\n ? opts.languages\n : ['en-US', 'en']\n utils.replaceGetterWithProxy(\n Object.getPrototypeOf(navigator),\n 'languages',\n utils.makeHandler().getterValue(Object.freeze([...languages]))\n )\n }",_args:[{opts:{languages:[]}}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, opts) => {\n const isSecure = document.location.protocol.startsWith('https')\n\n // In headful on secure origins the permission should be \"default\", not \"denied\"\n if (isSecure) {\n utils.replaceGetterWithProxy(Notification, 'permission', {\n apply() {\n return 'default'\n }\n })\n }\n\n // Another weird behavior:\n // On insecure origins in headful the state is \"denied\",\n // whereas in headless it's \"prompt\"\n if (!isSecure) {\n const handler = {\n apply(target, ctx, args) {\n const param = (args || [])[0]\n\n const isNotifications =\n param && param.name && param.name === 'notifications'\n if (!isNotifications) {\n return utils.cache.Reflect.apply(...arguments)\n }\n\n return Promise.resolve(\n Object.setPrototypeOf(\n {\n state: 'denied',\n onchange: null\n },\n PermissionStatus.prototype\n )\n )\n }\n }\n // Note: Don't use `Object.getPrototypeOf` here\n utils.replaceWithProxy(Permissions.prototype, 'query', handler)\n }\n }",_args:[{}]}),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, { fns, data }) => {\n fns = utils.materializeFns(fns)\n\n // That means we're running headful\n const hasPlugins = 'plugins' in navigator && navigator.plugins.length\n if (hasPlugins) {\n return // nothing to do here\n }\n\n const mimeTypes = fns.generateMimeTypeArray(utils, fns)(data.mimeTypes)\n const plugins = fns.generatePluginArray(utils, fns)(data.plugins)\n\n // Plugin and MimeType cross-reference each other, let's do that now\n // Note: We're looping through `data.plugins` here, not the generated `plugins`\n for (const pluginData of data.plugins) {\n pluginData.__mimeTypes.forEach((type, index) => {\n plugins[pluginData.name][index] = mimeTypes[type]\n\n Object.defineProperty(plugins[pluginData.name], type, {\n value: mimeTypes[type],\n writable: false,\n enumerable: false, // Not enumerable\n configurable: true\n })\n Object.defineProperty(mimeTypes[type], 'enabledPlugin', {\n value:\n type === 'application/x-pnacl'\n ? mimeTypes['application/x-nacl'].enabledPlugin // these reference the same plugin, so we need to re-use the Proxy in order to avoid leaks\n : new Proxy(plugins[pluginData.name], {}), // Prevent circular references\n writable: false,\n enumerable: false, // Important: `JSON.stringify(navigator.plugins)`\n configurable: true\n })\n })\n }\n\n const patchNavigator = (name, value) =>\n utils.replaceProperty(Object.getPrototypeOf(navigator), name, {\n get() {\n return value\n }\n })\n\n patchNavigator('mimeTypes', mimeTypes)\n patchNavigator('plugins', plugins)\n\n // All done\n }",_args:[{fns:{generateMimeTypeArray:"(utils, fns) => mimeTypesData => {\n return fns.generateMagicArray(utils, fns)(\n mimeTypesData,\n MimeTypeArray.prototype,\n MimeType.prototype,\n 'type'\n )\n}",generatePluginArray:"(utils, fns) => pluginsData => {\n return fns.generateMagicArray(utils, fns)(\n pluginsData,\n PluginArray.prototype,\n Plugin.prototype,\n 'name'\n )\n}",generateMagicArray:"(utils, fns) =>\n function(\n dataArray = [],\n proto = MimeTypeArray.prototype,\n itemProto = MimeType.prototype,\n itemMainProp = 'type'\n ) {\n // Quick helper to set props with the same descriptors vanilla is using\n const defineProp = (obj, prop, value) =>\n Object.defineProperty(obj, prop, {\n value,\n writable: false,\n enumerable: false, // Important for mimeTypes & plugins: `JSON.stringify(navigator.mimeTypes)`\n configurable: true\n })\n\n // Loop over our fake data and construct items\n const makeItem = data => {\n const item = {}\n for (const prop of Object.keys(data)) {\n if (prop.startsWith('__')) {\n continue\n }\n defineProp(item, prop, data[prop])\n }\n return patchItem(item, data)\n }\n\n const patchItem = (item, data) => {\n let descriptor = Object.getOwnPropertyDescriptors(item)\n\n // Special case: Plugins have a magic length property which is not enumerable\n // e.g. `navigator.plugins[i].length` should always be the length of the assigned mimeTypes\n if (itemProto === Plugin.prototype) {\n descriptor = {\n ...descriptor,\n length: {\n value: data.__mimeTypes.length,\n writable: false,\n enumerable: false,\n configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length`\n }\n }\n }\n\n // We need to spoof a specific `MimeType` or `Plugin` object\n const obj = Object.create(itemProto, descriptor)\n\n // Virtually all property keys are not enumerable in vanilla\n const blacklist = [...Object.keys(data), 'length', 'enabledPlugin']\n return new Proxy(obj, {\n ownKeys(target) {\n return Reflect.ownKeys(target).filter(k => !blacklist.includes(k))\n },\n getOwnPropertyDescriptor(target, prop) {\n if (blacklist.includes(prop)) {\n return undefined\n }\n return Reflect.getOwnPropertyDescriptor(target, prop)\n }\n })\n }\n\n const magicArray = []\n\n // Loop through our fake data and use that to create convincing entities\n dataArray.forEach(data => {\n magicArray.push(makeItem(data))\n })\n\n // Add direct property access based on types (e.g. `obj['application/pdf']`) afterwards\n magicArray.forEach(entry => {\n defineProp(magicArray, entry[itemMainProp], entry)\n })\n\n // This is the best way to fake the type to make sure this is false: `Array.isArray(navigator.mimeTypes)`\n const magicArrayObj = Object.create(proto, {\n ...Object.getOwnPropertyDescriptors(magicArray),\n\n // There's one ugly quirk we unfortunately need to take care of:\n // The `MimeTypeArray` prototype has an enumerable `length` property,\n // but headful Chrome will still skip it when running `Object.getOwnPropertyNames(navigator.mimeTypes)`.\n // To strip it we need to make it first `configurable` and can then overlay a Proxy with an `ownKeys` trap.\n length: {\n value: magicArray.length,\n writable: false,\n enumerable: false,\n configurable: true // Important to be able to use the ownKeys trap in a Proxy to strip `length`\n }\n })\n\n // Generate our functional function mocks :-)\n const functionMocks = fns.generateFunctionMocks(utils)(\n proto,\n itemMainProp,\n magicArray\n )\n\n // We need to overlay our custom object with a JS Proxy\n const magicArrayObjProxy = new Proxy(magicArrayObj, {\n get(target, key = '') {\n // Redirect function calls to our custom proxied versions mocking the vanilla behavior\n if (key === 'item') {\n return functionMocks.item\n }\n if (key === 'namedItem') {\n return functionMocks.namedItem\n }\n if (proto === PluginArray.prototype && key === 'refresh') {\n return functionMocks.refresh\n }\n // Everything else can pass through as normal\n return utils.cache.Reflect.get(...arguments)\n },\n ownKeys(target) {\n // There are a couple of quirks where the original property demonstrates \"magical\" behavior that makes no sense\n // This can be witnessed when calling `Object.getOwnPropertyNames(navigator.mimeTypes)` and the absense of `length`\n // My guess is that it has to do with the recent change of not allowing data enumeration and this being implemented weirdly\n // For that reason we just completely fake the available property names based on our data to match what regular Chrome is doing\n // Specific issues when not patching this: `length` property is available, direct `types` props (e.g. `obj['application/pdf']`) are missing\n const keys = []\n const typeProps = magicArray.map(mt => mt[itemMainProp])\n typeProps.forEach((_, i) => keys.push(`${i}`))\n typeProps.forEach(propName => keys.push(propName))\n return keys\n },\n getOwnPropertyDescriptor(target, prop) {\n if (prop === 'length') {\n return undefined\n }\n return Reflect.getOwnPropertyDescriptor(target, prop)\n }\n })\n\n return magicArrayObjProxy\n }",generateFunctionMocks:"utils => (\n proto,\n itemMainProp,\n dataArray\n) => ({\n /** Returns the MimeType object with the specified index. */\n item: utils.createProxy(proto.item, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'item' on '${\n proto[Symbol.toStringTag]\n }': 1 argument required, but only 0 present.`\n )\n }\n // Special behavior alert:\n // - Vanilla tries to cast strings to Numbers (only integers!) and use them as property index lookup\n // - If anything else than an integer (including as string) is provided it will return the first entry\n const isInteger = args[0] && Number.isInteger(Number(args[0])) // Cast potential string to number first, then check for integer\n // Note: Vanilla never returns `undefined`\n return (isInteger ? dataArray[Number(args[0])] : dataArray[0]) || null\n }\n }),\n /** Returns the MimeType object with the specified name. */\n namedItem: utils.createProxy(proto.namedItem, {\n apply(target, ctx, args) {\n if (!args.length) {\n throw new TypeError(\n `Failed to execute 'namedItem' on '${\n proto[Symbol.toStringTag]\n }': 1 argument required, but only 0 present.`\n )\n }\n return dataArray.find(mt => mt[itemMainProp] === args[0]) || null // Not `undefined`!\n }\n }),\n /** Does nothing and shall return nothing */\n refresh: proto.refresh\n ? utils.createProxy(proto.refresh, {\n apply(target, ctx, args) {\n return undefined\n }\n })\n : undefined\n})"},data:{mimeTypes:[{type:"application/pdf",suffixes:"pdf",description:"",__pluginName:"Chrome PDF Viewer"},{type:"application/x-google-chrome-pdf",suffixes:"pdf",description:"Portable Document Format",__pluginName:"Chrome PDF Plugin"},{type:"application/x-nacl",suffixes:"",description:"Native Client Executable",__pluginName:"Native Client"},{type:"application/x-pnacl",suffixes:"",description:"Portable Native Client Executable",__pluginName:"Native Client"}],plugins:[{name:"Chrome PDF Plugin",filename:"internal-pdf-viewer",description:"Portable Document Format",__mimeTypes:["application/x-google-chrome-pdf"]},{name:"Chrome PDF Viewer",filename:"mhjfbmdgcfjbbpaeojofohoefgiehjai",description:"",__mimeTypes:["application/pdf"]},{name:"Native Client",filename:"internal-nacl-plugin",description:"",__mimeTypes:["application/x-nacl","application/x-pnacl"]}]}}]}),!1===navigator.webdriver||void 0===navigator.webdriver||delete Object.getPrototypeOf(navigator).webdriver,(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, opts) => {\n const getParameterProxyHandler = {\n apply: function(target, ctx, args) {\n const param = (args || [])[0]\n const result = utils.cache.Reflect.apply(target, ctx, args)\n // UNMASKED_VENDOR_WEBGL\n if (param === 37445) {\n return opts.vendor || 'Intel Inc.' // default in headless: Google Inc.\n }\n // UNMASKED_RENDERER_WEBGL\n if (param === 37446) {\n return opts.renderer || 'Intel Iris OpenGL Engine' // default in headless: Google SwiftShader\n }\n return result\n }\n }\n\n // There's more than one WebGL rendering context\n // https://developer.mozilla.org/en-US/docs/Web/API/WebGL2RenderingContext#Browser_compatibility\n // To find out the original values here: Object.getOwnPropertyDescriptors(WebGLRenderingContext.prototype.getParameter)\n const addProxy = (obj, propName) => {\n utils.replaceWithProxy(obj, propName, getParameterProxyHandler)\n }\n // For whatever weird reason loops don't play nice with Object.defineProperty, here's the next best thing:\n addProxy(WebGLRenderingContext.prototype, 'getParameter')\n addProxy(WebGL2RenderingContext.prototype, 'getParameter')\n }",_args:[{}]}),(()=>{try{if(window.outerWidth&&window.outerHeight)return;const n=85;window.outerWidth=window.innerWidth,window.outerHeight=window.innerHeight+n}catch(n){}})(),(({_utilsFns:_utilsFns,_mainFunction:_mainFunction,_args:_args})=>{const utils=Object.fromEntries(Object.entries(_utilsFns).map((([key,value])=>[key,eval(value)])));utils.init(),eval(_mainFunction)(utils,..._args)})({_utilsFns:{init:"() => {\n utils.preloadCache()\n}",stripProxyFromErrors:"(handler = {}) => {\n const newHandler = {\n setPrototypeOf: function (target, proto) {\n if (proto === null)\n throw new TypeError('Cannot convert object to primitive value')\n if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {\n throw new TypeError('Cyclic __proto__ value')\n }\n return Reflect.setPrototypeOf(target, proto)\n }\n }\n // We wrap each trap in the handler in a try/catch and modify the error stack if they throw\n const traps = Object.getOwnPropertyNames(handler)\n traps.forEach(trap => {\n newHandler[trap] = function () {\n try {\n // Forward the call to the defined proxy handler\n return handler[trap].apply(this, arguments || [])\n } catch (err) {\n // Stack traces differ per browser, we only support chromium based ones currently\n if (!err || !err.stack || !err.stack.includes(`at `)) {\n throw err\n }\n\n // When something throws within one of our traps the Proxy will show up in error stacks\n // An earlier implementation of this code would simply strip lines with a blacklist,\n // but it makes sense to be more surgical here and only remove lines related to our Proxy.\n // We try to use a known \"anchor\" line for that and strip it with everything above it.\n // If the anchor line cannot be found for some reason we fall back to our blacklist approach.\n\n const stripWithBlacklist = (stack, stripFirstLine = true) => {\n const blacklist = [\n `at Reflect.${trap} `, // e.g. Reflect.get or Reflect.apply\n `at Object.${trap} `, // e.g. Object.get or Object.apply\n `at Object.newHandler. [as ${trap}] ` // caused by this very wrapper :-)\n ]\n return (\n err.stack\n .split('\\n')\n // Always remove the first (file) line in the stack (guaranteed to be our proxy)\n .filter((line, index) => !(index === 1 && stripFirstLine))\n // Check if the line starts with one of our blacklisted strings\n .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))\n .join('\\n')\n )\n }\n\n const stripWithAnchor = (stack, anchor) => {\n const stackArr = stack.split('\\n')\n anchor = anchor || `at Object.newHandler. [as ${trap}] ` // Known first Proxy line in chromium\n const anchorIndex = stackArr.findIndex(line =>\n line.trim().startsWith(anchor)\n )\n if (anchorIndex === -1) {\n return false // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n return stackArr.join('\\n')\n }\n\n // Special cases due to our nested toString proxies\n err.stack = err.stack.replace(\n 'at Object.toString (',\n 'at Function.toString ('\n )\n if ((err.stack || '').includes('at Function.toString (')) {\n err.stack = stripWithBlacklist(err.stack, false)\n throw err\n }\n\n // Try using the anchor method, fallback to blacklist if necessary\n err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack)\n\n throw err // Re-throw our now sanitized error\n }\n }\n })\n return newHandler\n}",stripErrorWithAnchor:"(err, anchor) => {\n const stackArr = err.stack.split('\\n')\n const anchorIndex = stackArr.findIndex(line => line.trim().startsWith(anchor))\n if (anchorIndex === -1) {\n return err // 404, anchor not found\n }\n // Strip everything from the top until we reach the anchor line (remove anchor line as well)\n // Note: We're keeping the 1st line (zero index) as it's unrelated (e.g. `TypeError`)\n stackArr.splice(1, anchorIndex)\n err.stack = stackArr.join('\\n')\n return err\n}",replaceProperty:"(obj, propName, descriptorOverrides = {}) => {\n return Object.defineProperty(obj, propName, {\n // Copy over the existing descriptors (writable, enumerable, configurable, etc)\n ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),\n // Add our overrides (e.g. value, get())\n ...descriptorOverrides\n })\n}",preloadCache:"() => {\n if (utils.cache) {\n return\n }\n utils.cache = {\n // Used in our proxies\n Reflect: {\n get: Reflect.get.bind(Reflect),\n apply: Reflect.apply.bind(Reflect)\n },\n // Used in `makeNativeString`\n nativeToStringStr: Function.toString + '' // => `function toString() { [native code] }`\n }\n}",makeNativeString:"(name = '') => {\n return utils.cache.nativeToStringStr.replace('toString', name || '')\n}",patchToString:"(obj, str = '') => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n // `toString` targeted at our proxied Object detected\n if (ctx === obj) {\n // We either return the optional string verbatim or derive the most desired result automatically\n return str || utils.makeNativeString(obj.name)\n }\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",patchToStringNested:"(obj = {}) => {\n return utils.execRecursively(obj, ['function'], utils.patchToString)\n}",redirectToString:"(proxyObj, originalObj) => {\n const handler = {\n apply: function (target, ctx) {\n // This fixes e.g. `HTMLMediaElement.prototype.canPlayType.toString + \"\"`\n if (ctx === Function.prototype.toString) {\n return utils.makeNativeString('toString')\n }\n\n // `toString` targeted at our proxied Object detected\n if (ctx === proxyObj) {\n const fallback = () =>\n originalObj && originalObj.name\n ? utils.makeNativeString(originalObj.name)\n : utils.makeNativeString(proxyObj.name)\n\n // Return the toString representation of our original object if possible\n return originalObj + '' || fallback()\n }\n\n if (typeof ctx === 'undefined' || ctx === null) {\n return target.call(ctx)\n }\n\n // Check if the toString protype of the context is the same as the global prototype,\n // if not indicates that we are doing a check across different windows., e.g. the iframeWithdirect` test case\n const hasSameProto = Object.getPrototypeOf(\n Function.prototype.toString\n ).isPrototypeOf(ctx.toString) // eslint-disable-line no-prototype-builtins\n if (!hasSameProto) {\n // Pass the call on to the local Function.prototype.toString instead\n return ctx.toString()\n }\n\n return target.call(ctx)\n }\n }\n\n const toStringProxy = new Proxy(\n Function.prototype.toString,\n utils.stripProxyFromErrors(handler)\n )\n utils.replaceProperty(Function.prototype, 'toString', {\n value: toStringProxy\n })\n}",replaceWithProxy:"(obj, propName, handler) => {\n const originalObj = obj[propName]\n const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.redirectToString(proxyObj, originalObj)\n\n return true\n}",replaceGetterWithProxy:"(obj, propName, handler) => {\n const fn = Object.getOwnPropertyDescriptor(obj, propName).get\n const fnStr = fn.toString() // special getter function string\n const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { get: proxyObj })\n utils.patchToString(proxyObj, fnStr)\n\n return true\n}",replaceGetterSetter:"(obj, propName, handlerGetterSetter) => {\n const ownPropertyDescriptor = Object.getOwnPropertyDescriptor(obj, propName)\n const handler = { ...ownPropertyDescriptor }\n\n if (handlerGetterSetter.get !== undefined) {\n const nativeFn = ownPropertyDescriptor.get\n handler.get = function() {\n return handlerGetterSetter.get.call(this, nativeFn.bind(this))\n }\n utils.redirectToString(handler.get, nativeFn)\n }\n\n if (handlerGetterSetter.set !== undefined) {\n const nativeFn = ownPropertyDescriptor.set\n handler.set = function(newValue) {\n handlerGetterSetter.set.call(this, newValue, nativeFn.bind(this))\n }\n utils.redirectToString(handler.set, nativeFn)\n }\n\n Object.defineProperty(obj, propName, handler)\n}",mockWithProxy:"(obj, propName, pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n\n utils.replaceProperty(obj, propName, { value: proxyObj })\n utils.patchToString(proxyObj)\n\n return true\n}",createProxy:"(pseudoTarget, handler) => {\n const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler))\n utils.patchToString(proxyObj)\n\n return proxyObj\n}",splitObjPath:"objPath => ({\n // Remove last dot entry (property) ==> `HTMLMediaElement.prototype`\n objName: objPath.split('.').slice(0, -1).join('.'),\n // Extract last dot entry ==> `canPlayType`\n propName: objPath.split('.').slice(-1)[0]\n})",replaceObjPathWithProxy:"(objPath, handler) => {\n const { objName, propName } = utils.splitObjPath(objPath)\n const obj = eval(objName) // eslint-disable-line no-eval\n return utils.replaceWithProxy(obj, propName, handler)\n}",execRecursively:"(obj = {}, typeFilter = [], fn) => {\n function recurse(obj) {\n for (const key in obj) {\n if (obj[key] === undefined) {\n continue\n }\n if (obj[key] && typeof obj[key] === 'object') {\n recurse(obj[key])\n } else {\n if (obj[key] && typeFilter.includes(typeof obj[key])) {\n fn.call(this, obj[key])\n }\n }\n }\n }\n recurse(obj)\n return obj\n}",stringifyFns:"(fnObj = { hello: () => 'world' }) => {\n // Object.fromEntries() ponyfill (in 6 lines) - supported only in Node v12+, modern browsers are fine\n // https://github.com/feross/fromentries\n function fromEntries(iterable) {\n return [...iterable].reduce((obj, [key, val]) => {\n obj[key] = val\n return obj\n }, {})\n }\n return (Object.fromEntries || fromEntries)(\n Object.entries(fnObj)\n .filter(([key, value]) => typeof value === 'function')\n .map(([key, value]) => [key, value.toString()]) // eslint-disable-line no-eval\n )\n}",materializeFns:"(fnStrObj = { hello: \"() => 'world'\" }) => {\n return Object.fromEntries(\n Object.entries(fnStrObj).map(([key, value]) => {\n if (value.startsWith('function')) {\n // some trickery is needed to make oldschool functions work :-)\n return [key, eval(`() => ${value}`)()] // eslint-disable-line no-eval\n } else {\n // arrow functions just work\n return [key, eval(value)] // eslint-disable-line no-eval\n }\n })\n )\n}",makeHandler:"() => ({\n // Used by simple `navigator` getter evasions\n getterValue: value => ({\n apply(target, ctx, args) {\n // Let's fetch the value first, to trigger and escalate potential errors\n // Illegal invocations like `navigator.__proto__.vendor` will throw here\n utils.cache.Reflect.apply(...arguments)\n return value\n }\n })\n})",arrayEquals:"(array1, array2) => {\n if (array1.length !== array2.length) {\n return false\n }\n for (let i = 0; i < array1.length; ++i) {\n if (array1[i] !== array2[i]) {\n return false\n }\n }\n return true\n}",memoize:"fn => {\n const cache = []\n return function(...args) {\n if (!cache.some(c => utils.arrayEquals(c.key, args))) {\n cache.push({ key: args, value: fn.apply(this, args) })\n }\n return cache.find(c => utils.arrayEquals(c.key, args)).value\n }\n}"},_mainFunction:"(utils, opts) => {\n try {\n // Adds a contentWindow proxy to the provided iframe element\n const addContentWindowProxy = iframe => {\n const contentWindowProxy = {\n get(target, key) {\n // Now to the interesting part:\n // We actually make this thing behave like a regular iframe window,\n // by intercepting calls to e.g. `.self` and redirect it to the correct thing. :)\n // That makes it possible for these assertions to be correct:\n // iframe.contentWindow.self === window.top // must be false\n if (key === 'self') {\n return this\n }\n // iframe.contentWindow.frameElement === iframe // must be true\n if (key === 'frameElement') {\n return iframe\n }\n // Intercept iframe.contentWindow[0] to hide the property 0 added by the proxy.\n if (key === '0') {\n return undefined\n }\n return Reflect.get(target, key)\n }\n }\n\n if (!iframe.contentWindow) {\n const proxy = new Proxy(window, contentWindowProxy)\n Object.defineProperty(iframe, 'contentWindow', {\n get() {\n return proxy\n },\n set(newValue) {\n return newValue // contentWindow is immutable\n },\n enumerable: true,\n configurable: false\n })\n }\n }\n\n // Handles iframe element creation, augments `srcdoc` property so we can intercept further\n const handleIframeCreation = (target, thisArg, args) => {\n const iframe = target.apply(thisArg, args)\n\n // We need to keep the originals around\n const _iframe = iframe\n const _srcdoc = _iframe.srcdoc\n\n // Add hook for the srcdoc property\n // We need to be very surgical here to not break other iframes by accident\n Object.defineProperty(iframe, 'srcdoc', {\n configurable: true, // Important, so we can reset this later\n get: function() {\n return _srcdoc\n },\n set: function(newValue) {\n addContentWindowProxy(this)\n // Reset property, the hook is only needed once\n Object.defineProperty(iframe, 'srcdoc', {\n configurable: false,\n writable: false,\n value: _srcdoc\n })\n _iframe.srcdoc = newValue\n }\n })\n return iframe\n }\n\n // Adds a hook to intercept iframe creation events\n const addIframeCreationSniffer = () => {\n /* global document */\n const createElementHandler = {\n // Make toString() native\n get(target, key) {\n return Reflect.get(target, key)\n },\n apply: function(target, thisArg, args) {\n const isIframe =\n args && args.length && `${args[0]}`.toLowerCase() === 'iframe'\n if (!isIframe) {\n // Everything as usual\n return target.apply(thisArg, args)\n } else {\n return handleIframeCreation(target, thisArg, args)\n }\n }\n }\n // All this just due to iframes with srcdoc bug\n utils.replaceWithProxy(\n document,\n 'createElement',\n createElementHandler\n )\n }\n\n // Let's go\n addIframeCreationSniffer()\n } catch (err) {\n // console.warn(err)\n }\n }",_args:[]}); \ No newline at end of file diff --git a/utils/stealth_helper.py b/utils/stealth_helper.py new file mode 100644 index 00000000..65192bf7 --- /dev/null +++ b/utils/stealth_helper.py @@ -0,0 +1,36 @@ +import requests +import os + +class StealthHelper: + @staticmethod + def check_and_download(): + local_file_path = 'utils/stealth.min.js' + remote_url = 'https://cdn.jsdelivr.net/gh/requireCool/stealth.min.js/stealth.min.js' + + # 读取本地文件的生成日期 + with open(local_file_path, 'r') as file: + local_content = file.readlines() + local_generated_on = [line for line in local_content if 'Generated on:' in line] + local_date = local_generated_on[0].split(': ')[1].strip() if local_generated_on else None + + # 获取远程文件内容 + response = requests.get(remote_url) + if response.status_code == 200: + remote_content = response.text + remote_generated_on = [line for line in remote_content.split('\n') if 'Generated on:' in line] + remote_date = remote_generated_on[0].split(': ')[1].strip() if remote_generated_on else None + + # 比较日期 + if local_date != remote_date: + # 下载并保存文件 + with open(local_file_path, 'w') as local_file: + local_file.write(response.text) + print('文件已更新。') + else: + print('文件是最新的。') + else: + print('无法访问远程文件。') + +if __name__ == "__main__": + StealthHelper.check_and_download() + diff --git a/utils/video_content.sql b/utils/video_content.sql new file mode 100644 index 00000000..78bf87ba --- /dev/null +++ b/utils/video_content.sql @@ -0,0 +1,20 @@ +-- Active: 1739798225423@@127.0.0.1@3306 +-- 视频内容主表 +CREATE TABLE IF NOT EXISTS video_contents ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + account_id INTEGER NOT NULL, -- 关联social_media_accounts表 + title TEXT NOT NULL, -- 视频标题 + thumb_base64 TEXT, -- 封面图片base64数据 + publish_time DATETIME, -- 发布时间 + status VARCHAR(50), -- 视频状态 + plays INTEGER DEFAULT 0, -- 播放数 + likes INTEGER DEFAULT 0, -- 点赞数 + comments INTEGER DEFAULT 0, -- 评论数 + shares INTEGER DEFAULT 0, -- 分享数 + tags TEXT, -- 标签列表,JSON格式 + mentions TEXT, -- 提及用户列表,JSON格式 + created_at DATETIME DEFAULT CURRENT_TIMESTAMP, + updated_at DATETIME DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (account_id) REFERENCES social_media_accounts(id), + UNIQUE(account_id, title, publish_time) +); \ No newline at end of file diff --git a/utils/video_content_db.py b/utils/video_content_db.py new file mode 100644 index 00000000..3a528f0f --- /dev/null +++ b/utils/video_content_db.py @@ -0,0 +1,321 @@ +""" +视频内容数据库操作类 +用于管理视频内容数据 +""" +import sqlite3 +from pathlib import Path +from typing import List, Dict, Any, Optional +import logging +import json +from datetime import datetime + +logger = logging.getLogger(__name__) + +class VideoContentDB: + def __init__(self, db_path: str = "data/social_media.db"): + """初始化数据库连接""" + self.db_path = db_path + self.conn = None + self.cursor = None + self._connect() + self._init_tables() + + def _connect(self): + """连接数据库""" + try: + self.conn = sqlite3.connect(self.db_path) + self.cursor = self.conn.cursor() + except Exception as e: + logger.error(f"连接数据库失败: {str(e)}") + raise + + def _init_tables(self): + """初始化数据表""" + try: + # 读取SQL文件 + sql_path = Path(__file__).parent / "video_content.sql" + with open(sql_path, "r", encoding="utf-8") as f: + sql = f.read() + + # 执行建表语句 + self.cursor.executescript(sql) + self.conn.commit() + except Exception as e: + logger.error(f"初始化数据表失败: {str(e)}") + raise + + def close(self): + """关闭数据库连接""" + if self.cursor: + self.cursor.close() + if self.conn: + self.conn.close() + + def add_video_content( + self, + account_id: int, + title: str, + thumb_base64: Optional[str] = None, + publish_time: Optional[str] = None, + status: Optional[str] = None, + plays: int = 0, + likes: int = 0, + comments: int = 0, + shares: int = 0, + tags: Optional[List[str]] = None, + mentions: Optional[List[str]] = None + ) -> int: + """ + 添加视频内容 + + Args: + account_id: 账号ID + title: 视频标题 + thumb_base64: 封面图片base64数据 + publish_time: 发布时间 + status: 视频状态 + plays: 播放数 + likes: 点赞数 + comments: 评论数 + shares: 分享数 + tags: 标签列表 + mentions: 提及用户列表 + + Returns: + int: 视频内容ID + """ + try: + # 将标签和提及用户列表转换为JSON字符串 + tags_json = json.dumps(tags or [], ensure_ascii=False) + mentions_json = json.dumps(mentions or [], ensure_ascii=False) + + # 插入视频内容 + self.cursor.execute( + """ + INSERT INTO video_contents ( + account_id, title, thumb_base64, publish_time, + status, plays, likes, comments, shares, tags, mentions + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + (account_id, title, thumb_base64, publish_time, + status, plays, likes, comments, shares, tags_json, mentions_json) + ) + + video_id = self.cursor.lastrowid + self.conn.commit() + return video_id + + except Exception as e: + self.conn.rollback() + logger.error(f"添加视频内容失败: {str(e)}") + raise + + def get_video_content(self, video_id: int) -> Optional[Dict[str, Any]]: + """ + 获取视频内容 + + Args: + video_id: 视频ID + + Returns: + Dict: 视频内容信息 + """ + try: + # 获取视频基本信息 + self.cursor.execute( + """ + SELECT v.*, a.platform, a.nickname + FROM video_contents v + JOIN social_media_accounts a ON v.account_id = a.id + WHERE v.id = ? + """, + (video_id,) + ) + row = self.cursor.fetchone() + if not row: + return None + + # 解析JSON数据 + tags = json.loads(row[10] or '[]') # tags列 + mentions = json.loads(row[11] or '[]') # mentions列 + + # 构建返回数据 + return { + "id": row[0], + "account_id": row[1], + "title": row[2], + "thumb_base64": row[3], + "publish_time": row[4], + "status": row[5], + "stats": { + "plays": row[6], + "likes": row[7], + "comments": row[8], + "shares": row[9] + }, + "tags": tags, + "mentions": mentions, + "created_at": row[12], + "updated_at": row[13], + "platform": row[14], + "nickname": row[15] + } + + except Exception as e: + logger.error(f"获取视频内容失败: {str(e)}") + raise + + def get_video_count(self, account_id: int) -> int: + """ + 获取指定账号的视频数量 + + Args: + account_id: 账号ID + + Returns: + int: 视频数量 + """ + try: + self.cursor.execute( + "SELECT COUNT(*) FROM video_contents WHERE account_id = ?", + (account_id,) + ) + return self.cursor.fetchone()[0] + except Exception as e: + logger.error(f"获取视频数量失败: {str(e)}") + return 0 + + def get_video_content_by_title( + self, + account_id: int, + title: str + ) -> Optional[Dict[str, Any]]: + """ + 根据账号ID和标题获取视频内容 + + Args: + account_id: 账号ID + title: 视频标题 + + Returns: + Optional[Dict[str, Any]]: 视频内容信息,如果不存在则返回None + """ + try: + # 获取视频基本信息 + self.cursor.execute( + """ + SELECT v.*, a.platform, a.nickname + FROM video_contents v + JOIN social_media_accounts a ON v.account_id = a.id + WHERE v.account_id = ? AND v.title = ? + """, + (account_id, title) + ) + row = self.cursor.fetchone() + if not row: + return None + + # 解析JSON数据 + tags = json.loads(row[10] or '[]') # tags列 + mentions = json.loads(row[11] or '[]') # mentions列 + + # 构建返回数据 + return { + "id": row[0], + "account_id": row[1], + "title": row[2], + "thumb_base64": row[3], + "publish_time": row[4], + "status": row[5], + "stats": { + "plays": row[6], + "likes": row[7], + "comments": row[8], + "shares": row[9] + }, + "tags": tags, + "mentions": mentions, + "created_at": row[12], + "updated_at": row[13], + "platform": row[14], + "nickname": row[15] + } + + except Exception as e: + logger.error(f"根据标题获取视频内容失败: {str(e)}") + return None + + def update_video_content( + self, + video_id: int, + account_id: int, + title: str, + thumb_base64: Optional[str] = None, + publish_time: Optional[str] = None, + status: Optional[str] = None, + stats: Optional[Dict[str, int]] = None, + tags: Optional[List[str]] = None, + mentions: Optional[List[str]] = None + ) -> bool: + """ + 更新视频内容 + + Args: + video_id: 视频ID + account_id: 账号ID + title: 视频标题 + thumb_base64: 封面图片base64数据 + publish_time: 发布时间 + status: 视频状态 + stats: 统计数据字典,包含 plays, likes, comments, shares + tags: 标签列表 + mentions: 提及用户列表 + + Returns: + bool: 更新是否成功 + """ + try: + # 将标签和提及用户列表转换为JSON字符串 + tags_json = json.dumps(tags or [], ensure_ascii=False) + mentions_json = json.dumps(mentions or [], ensure_ascii=False) + + # 准备统计数据 + stats = stats or {} + plays = stats.get('plays', 0) + likes = stats.get('likes', 0) + comments = stats.get('comments', 0) + shares = stats.get('shares', 0) + + # 更新时间 + updated_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S.%f") + + # 更新视频内容 + self.cursor.execute( + """ + UPDATE video_contents + SET account_id = ?, + title = ?, + thumb_base64 = ?, + publish_time = ?, + status = ?, + plays = ?, + likes = ?, + comments = ?, + shares = ?, + tags = ?, + mentions = ?, + updated_at = ? + WHERE id = ? + """, + (account_id, title, thumb_base64, publish_time, + status, plays, likes, comments, shares, + tags_json, mentions_json, updated_at, video_id) + ) + + self.conn.commit() + return True + + except Exception as e: + self.conn.rollback() + logger.error(f"更新视频内容失败: {str(e)}") + return False \ No newline at end of file diff --git a/video_file_manager/.env.example b/video_file_manager/.env.example new file mode 100644 index 00000000..b9f7577c --- /dev/null +++ b/video_file_manager/.env.example @@ -0,0 +1,12 @@ +# 调试模式 +DEBUG=false + +# 视频目录路径 +VIDEO_DIR=F:\向阳也有米\24版本\12月 + +# 日志目录 +LOG_DIR=logs + +# 服务配置 +HOST=0.0.0.0 +PORT=7860 \ No newline at end of file diff --git a/video_file_manager/README.md b/video_file_manager/README.md new file mode 100644 index 00000000..d7152d47 --- /dev/null +++ b/video_file_manager/README.md @@ -0,0 +1,67 @@ +# 视频文件管理器 + +一个基于 Gradio 的视频文件管理工具,帮助你管理本地视频文件和元数据。 + +## 功能特点 + +- 📁 文件树浏览 +- 📊 视频信息显示 +- 📝 元数据管理 +- 🔄 实时更新 + +## 快速开始 + +1. 安装依赖: +```bash +pip install -r requirements.txt +``` + +2. 配置环境变量: +```bash +cp .env.example .env +# 编辑 .env 文件,设置你的配置 +``` + +3. 运行应用: +```bash +python run.py +``` + +4. 打开浏览器访问: +``` +http://localhost:7860 +``` + +## 目录结构 + +``` +video_file_manager/ +├── core/ # 核心功能模块 +├── ui/ # 用户界面模块 +├── utils/ # 工具函数 +├── .env.example # 环境变量示例 +├── config.py # 配置文件 +├── requirements.txt # 依赖清单 +└── run.py # 启动脚本 +``` + +## 配置说明 + +- `DEBUG`: 调试模式开关 +- `VIDEO_DIR`: 视频目录路径 +- `LOG_DIR`: 日志目录路径 +- `HOST`: 服务器地址 +- `PORT`: 服务器端口 + +## 使用说明 + +1. 左侧面板显示文件树,可以浏览和选择视频文件 +2. 右侧面板显示选中视频的详细信息 +3. 可以查看和编辑视频的元数据 +4. 支持实时刷新目录结构 + +## 开发说明 + +- 使用 Python 3.10+ +- 基于 Gradio 5.15+ + \ No newline at end of file diff --git a/video_file_manager/__init__.py b/video_file_manager/__init__.py new file mode 100644 index 00000000..cc89ace7 --- /dev/null +++ b/video_file_manager/__init__.py @@ -0,0 +1,5 @@ +""" +视频文件管理器包 +""" + +__version__ = "0.1.0" \ No newline at end of file diff --git a/video_file_manager/config.py b/video_file_manager/config.py new file mode 100644 index 00000000..2ea42755 --- /dev/null +++ b/video_file_manager/config.py @@ -0,0 +1,30 @@ +""" +配置文件 +""" +from pathlib import Path +from pydantic_settings import BaseSettings, SettingsConfigDict + +class Settings(BaseSettings): + """应用配置""" + # 基础配置 + DEBUG: bool = False + + # 路径配置 + VIDEO_DIR: Path = Path("videos") + LOG_DIR: Path = Path("logs") + + # 服务配置 + HOST: str = "0.0.0.0" + PORT: int = 7860 + + # 视频文件类型 + VIDEO_EXTENSIONS: set = {'.mp4', '.mov', '.avi', '.mkv'} + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + extra="ignore" + ) + +# 创建全局设置实例 +settings = Settings() \ No newline at end of file diff --git a/video_file_manager/core/__init__.py b/video_file_manager/core/__init__.py new file mode 100644 index 00000000..a754afbf --- /dev/null +++ b/video_file_manager/core/__init__.py @@ -0,0 +1,3 @@ +""" +核心功能模块 +""" \ No newline at end of file diff --git a/video_file_manager/core/file_manager.py b/video_file_manager/core/file_manager.py new file mode 100644 index 00000000..1bd57b3b --- /dev/null +++ b/video_file_manager/core/file_manager.py @@ -0,0 +1,100 @@ +""" +文件管理核心类 +""" +from pathlib import Path +from typing import Dict, Optional, List +from datetime import datetime +import logging + +from video_file_manager.config import settings + +logger = logging.getLogger(__name__) + +class FileManager: + """文件管理器""" + + VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv'} + + def __init__(self, base_dir: Optional[Path] = None): + """ + 初始化文件管理器 + + Args: + base_dir: 基础目录路径,如果不指定则使用配置中的 VIDEO_DIR + """ + self.base_dir = Path(base_dir) if base_dir else settings.VIDEO_DIR + if not self.base_dir.exists(): + raise ValueError(f"目录不存在: {self.base_dir}") + + def scan_videos(self) -> List[Dict]: + """ + 扫描所有视频文件 + + Returns: + List[Dict]: 视频文件信息列表 + """ + videos = [] + try: + logger.debug(f"开始扫描目录: {self.base_dir.resolve()}") + # 递归扫描目录 + for file_path in self.base_dir.rglob('*'): + if file_path.is_file() and file_path.suffix.lower() in self.VIDEO_EXTENSIONS: + try: + logger.debug(f"处理文件: {file_path}") + file_info = self.get_file_info(file_path) + if file_info: # 只添加有效的文件信息 + logger.debug(f"文件信息: {file_info}") + videos.append(file_info) + except Exception as e: + logger.error(f"处理文件 {file_path} 失败: {str(e)}") + continue + + # 按修改时间排序 + videos.sort(key=lambda x: x.get('modified', datetime.min), reverse=True) + logger.info(f"找到 {len(videos)} 个视频文件") + return videos + except Exception as e: + logger.error(f"扫描视频目录失败: {str(e)}") + return [] + + def get_file_info(self, file_path: Path) -> Dict: + """ + 获取文件信息 + + Args: + file_path: 文件路径 + + Returns: + Dict: 文件信息 + """ + try: + stat = file_path.stat() + # 确保路径都是绝对路径后再计算相对路径 + abs_base_dir = self.base_dir.resolve() + abs_file_path = file_path.resolve() + relative_path = abs_file_path.relative_to(abs_base_dir) + + logger.debug(f"基础目录: {abs_base_dir}") + logger.debug(f"文件路径: {abs_file_path}") + logger.debug(f"相对路径: {relative_path}") + + return { + "name": file_path.name, + "path": str(abs_file_path), # 使用绝对路径 + "relative_path": str(relative_path), # 保存相对路径 + "size": self._format_size(stat.st_size), + "created": datetime.fromtimestamp(stat.st_ctime), + "modified": datetime.fromtimestamp(stat.st_mtime), + "type": file_path.suffix.lower()[1:] + } + except Exception as e: + logger.error(f"获取文件信息失败: {str(e)}") + return {} + + def _format_size(self, size: int) -> str: + """格式化文件大小""" + for unit in ['B', 'KB', 'MB', 'GB']: + if size < 1024: + return f"{size:.1f} {unit}" + size /= 1024 + return f"{size:.1f} TB" \ No newline at end of file diff --git a/video_file_manager/core/metadata_manager.py b/video_file_manager/core/metadata_manager.py new file mode 100644 index 00000000..eeff8eaf --- /dev/null +++ b/video_file_manager/core/metadata_manager.py @@ -0,0 +1,82 @@ +""" +元数据管理类 +""" +from pathlib import Path +from typing import Dict, Optional +import json +from datetime import datetime +import logging + +logger = logging.getLogger(__name__) + +class MetadataManager: + """元数据管理器""" + + def __init__(self): + """初始化元数据管理器""" + pass + + def read_metadata(self, video_dir: Path) -> Optional[Dict]: + """ + 读取视频元数据 + + Args: + video_dir: 视频目录路径 + + Returns: + Optional[Dict]: 元数据信息 + """ + info_path = video_dir / "info.json" + if not info_path.exists(): + return self._create_default_metadata(video_dir) + + try: + with info_path.open('r', encoding='utf-8') as f: + return json.load(f) + except Exception as e: + logger.error(f"读取元数据失败: {str(e)}") + return self._create_default_metadata(video_dir) + + def write_metadata(self, video_dir: Path, metadata: Dict) -> bool: + """ + 写入视频元数据 + + Args: + video_dir: 视频目录路径 + metadata: 元数据信息 + + Returns: + bool: 是否写入成功 + """ + info_path = video_dir / "info.json" + try: + with info_path.open('w', encoding='utf-8') as f: + json.dump(metadata, f, ensure_ascii=False, indent=4) + return True + except Exception as e: + logger.error(f"写入元数据失败: {str(e)}") + return False + + def _create_default_metadata(self, video_dir: Path) -> Dict: + """ + 创建默认元数据 + + Args: + video_dir: 视频目录路径 + + Returns: + Dict: 默认元数据 + """ + video_files = list(video_dir.glob("*.mp4")) + if not video_files: + return {} + + video_file = video_files[0] + return { + "video_info": { + "file_name": video_file.name, + "create_time": datetime.fromtimestamp(video_file.stat().st_mtime).strftime("%Y-%m-%d %H:%M:%S"), + "size": video_file.stat().st_size + }, + "platforms": {} + } \ No newline at end of file diff --git a/video_file_manager/extract_text.py b/video_file_manager/extract_text.py new file mode 100644 index 00000000..ed089a68 --- /dev/null +++ b/video_file_manager/extract_text.py @@ -0,0 +1,36 @@ +""" +提取视频文件中的文字信息 +""" +import sys +from pathlib import Path +import logging + +# 添加项目根目录到 Python 路径 +root_dir = Path(__file__).resolve().parent.parent +sys.path.append(str(root_dir)) + +from video_file_manager.utils.helpers import extract_text_from_video, setup_logging + +if __name__ == "__main__": + # 设置日志 + log_dir = Path("logs") + log_dir.mkdir(exist_ok=True) + setup_logging( + log_file=log_dir / "extract_text.log", + level="DEBUG", + console_level="DEBUG" + ) + + logger = logging.getLogger(__name__) + + video_path = "F:/向阳也有米/24版本/12月/112914-7-定义30/1014-7-定义.mp4" + try: + logger.info(f"开始处理视频: {video_path}") + text = extract_text_from_video(video_path) + logger.info("提取的文字信息:") + logger.info(text) + print("提取的文字信息:") + print(text) + except Exception as e: + logger.error(f"提取失败: {str(e)}") + print(f"提取失败: {str(e)}") \ No newline at end of file diff --git a/video_file_manager/run.py b/video_file_manager/run.py new file mode 100644 index 00000000..62209499 --- /dev/null +++ b/video_file_manager/run.py @@ -0,0 +1,60 @@ +""" +启动视频文件管理器 +""" +import os +from pathlib import Path +import sys +import logging + +# 添加项目根目录到 Python 路径 +root_dir = Path(__file__).parent.parent +sys.path.append(str(root_dir)) + +from video_file_manager.ui.app import create_app +from video_file_manager.utils.helpers import setup_logging +from video_file_manager.config import settings + +def main(): + """主函数""" + # 设置日志 + log_dir = settings.LOG_DIR + log_dir.mkdir(exist_ok=True) + + # 配置日志 + setup_logging( + log_file=log_dir / "app.log", + level="DEBUG", # 强制使用 DEBUG 级别 + console_level="DEBUG" # 控制台也输出 DEBUG 信息 + ) + + logger = logging.getLogger(__name__) + logger.info(f"视频目录: {settings.VIDEO_DIR}") + logger.info(f"日志目录: {settings.LOG_DIR}") + + # 导入demo实例 + from video_file_manager.ui.app import demo + + # 设置允许访问的路径 + allowed_paths = [ + "F:/向阳也有米/24版本/12月", # 视频根目录 + "F:/向阳也有米/24版本/12月/1125-13-回家1", # 子目录 + str(root_dir), # 项目根目录 + str(settings.VIDEO_DIR), # 配置的视频目录 + ] + + # 使用标准配置启动 + demo.queue().launch( + server_name=settings.HOST, + server_port=settings.PORT, + share=False, # 不创建公共链接 + show_api=False, # 不显示API文档 + show_error=True, # 显示错误信息 + debug=True, # 启用调试模式 + prevent_thread_lock=True, # 防止线程锁 + ssl_verify=False, # 禁用SSL验证 + allowed_paths=allowed_paths, # 允许访问的路径列表 + _frontend=False # 禁用前端自动打开 + ) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/video_file_manager/ui/__init__.py b/video_file_manager/ui/__init__.py new file mode 100644 index 00000000..8a883236 --- /dev/null +++ b/video_file_manager/ui/__init__.py @@ -0,0 +1,3 @@ +""" +UI界面模块 +""" \ No newline at end of file diff --git a/video_file_manager/ui/app.py b/video_file_manager/ui/app.py new file mode 100644 index 00000000..1f64ed6c --- /dev/null +++ b/video_file_manager/ui/app.py @@ -0,0 +1,288 @@ +""" +主应用界面 +""" +import gradio as gr +from pathlib import Path +from typing import Dict, Optional, Tuple +import logging +import tkinter as tk +from tkinter import filedialog + +from video_file_manager.core.file_manager import FileManager +from video_file_manager.core.metadata_manager import MetadataManager +from video_file_manager.ui.components.file_tree import create_file_tree +from video_file_manager.ui.components.video_info import create_video_info +from video_file_manager.utils.helpers import extract_text_from_video + +logger = logging.getLogger(__name__) + +class VideoManagerApp: + def __init__(self): + """初始化应用""" + self.default_dir = Path("F:/向阳也有米/24版本/12月") + self.file_manager = FileManager(self.default_dir) if self.default_dir.exists() else FileManager() + self.metadata_manager = MetadataManager() + self.selected_file: Optional[Path] = None + logger.info(f"初始化应用完成,默认目录:{self.default_dir}") + + def on_directory_select(self, directory: str) -> Tuple[str, list]: + """ + 目录选择回调 + + Args: + directory: 选中的目录路径 + + Returns: + Tuple[str, list]: (目录路径, 文件列表) + """ + try: + logger.info(f"选择目录: {directory}") + if directory: + self.file_manager = FileManager(Path(directory)) + return directory, self.refresh_files() + return "", [] + except Exception as e: + logger.exception(f"切换目录失败: {str(e)}") + return "", [] + + def on_file_select(self, evt: gr.SelectData) -> tuple: + """ + 文件选择回调 + + Args: + evt: 选择事件数据 + + Returns: + tuple: (视频预览, 文本内容, 提取按钮状态, 文件名, 完整路径, 标题, 标签, 描述, 平台信息) + """ + try: + logger.debug(f"选择事件数据: {evt}") + logger.debug(f"选择事件索引: {evt.index}") + logger.debug(f"选择事件值: {evt.value}") + + # 获取选中行的数据 + row_index = evt.index[0] # 获取选中的行索引 + videos = self.file_manager.scan_videos() # 获取当前的视频列表 + if not videos or row_index >= len(videos): + logger.error(f"无效的行索引: {row_index}") + return None, "", False, "", "", "", "", "", {} # 注意这里返回9个值 + + video = videos[row_index] # 获取选中的视频信息 + logger.debug(f"选中的视频信息: {video}") + + # 使用文件管理器中的完整信息 + file_name = video["name"] + relative_path = video["relative_path"] + + logger.debug(f"文件名: {file_name}") + logger.debug(f"相对路径: {relative_path}") + + # 确保使用绝对路径 + base_dir = self.file_manager.base_dir.resolve() + full_path = base_dir / relative_path + + logger.debug(f"基础目录: {base_dir}") + logger.debug(f"相对路径: {relative_path}") + logger.info(f"选中文件: {full_path}") + + if full_path.is_file(): + self.selected_file = full_path + metadata = self.metadata_manager.read_metadata(full_path.parent) + logger.debug(f"元数据信息: {metadata}") + + # 返回文件信息和元数据 + return ( + str(full_path), # 视频预览路径 + "", # 文本内容(初始为空) + True, # 提取按钮状态(可用) + file_name, # 文件名 + str(full_path), # 完整路径 + metadata.get("video_info", {}).get("title", ""), # 标题 + ", ".join(metadata.get("video_info", {}).get("tags", [])), # 标签 + metadata.get("video_info", {}).get("description", ""), # 描述 + metadata.get("platforms", {}) # 平台信息 + ) + except Exception as e: + logger.exception(f"处理文件选择失败: {str(e)}") + + # 如果出现错误,返回空值 + return None, "", False, "", "", "", "", "", {} # 注意这里返回9个值 + + def refresh_files(self) -> list: + """ + 刷新文件列表 + + Returns: + list: 更新后的文件列表数据 + """ + logger.info("开始刷新文件列表") + videos = self.file_manager.scan_videos() + logger.info(f"找到 {len(videos)} 个视频文件") + logger.debug(f"视频列表: {videos}") + + rows = [ + [ + video["name"], + video["relative_path"], + video["size"], + video["modified"].strftime("%Y-%m-%d %H:%M:%S") + ] + for video in videos + ] + return rows + + def select_directory(self) -> str: + """ + 打开目录选择对话框 + + Returns: + str: 选中的目录路径 + """ + try: + root = tk.Tk() + root.withdraw() # 隐藏主窗口 + root.attributes('-topmost', True) # 确保对话框在最前面 + + directory = filedialog.askdirectory( + title="选择视频目录", + initialdir=str(self.file_manager.base_dir), + parent=root + ) + + logger.debug(f"选择的目录: {directory}") + + if directory: + return directory + return str(self.file_manager.base_dir) + except Exception as e: + logger.exception(f"打开目录选择对话框失败: {str(e)}") + return str(self.file_manager.base_dir) + finally: + try: + root.destroy() # 确保窗口被销毁 + except Exception: + pass + + def extract_text(self, video_path: str) -> str: + """ + 提取视频文字 + + Args: + video_path: 视频文件路径 + + Returns: + str: 提取的文字内容 + """ + try: + if not video_path: + logger.warning("未提供视频路径") + return "请先选择视频文件" + + logger.info(f"开始提取视频文字,输入路径: {video_path}") + logger.debug(f"路径类型: {type(video_path)}") + + # 确保路径是字符串类型 + if not isinstance(video_path, str): + video_path = str(video_path) + + # 检查路径是否存在 + video_file = Path(video_path) + logger.debug(f"转换后的路径: {video_file.resolve()}") + logger.debug(f"文件是否存在: {video_file.exists()}") + + text = extract_text_from_video(video_path) + logger.info("文字提取完成") + return text + except Exception as e: + logger.exception(f"提取文字失败: {str(e)}") + return f"提取失败: {str(e)}" + + def create_ui(self) -> gr.Blocks: + """ + 创建用户界面 + + Returns: + gr.Blocks: Gradio界面 + """ + logger.info("开始创建用户界面") + with gr.Blocks(title="视频文件管理器") as app: + gr.Markdown("# 视频文件管理器") + + with gr.Row(): + # 左侧面板 + with gr.Column(scale=2): + gr.Markdown("## 文件目录") + with gr.Row(): + dir_input = gr.Textbox( + label="视频目录", + placeholder="请选择或输入视频目录路径", + value=str(self.default_dir), + show_label=True + ) + dir_button = gr.Button("选择目录", variant="secondary") + refresh_btn = gr.Button("刷新目录", variant="primary") + file_list = create_file_tree({}) + logger.debug("文件列表组件创建完成") + + # 右侧面板 + with gr.Column(scale=3): + info_components = create_video_info() + logger.debug("信息显示组件创建完成") + + # 事件处理 + dir_button.click( + fn=self.select_directory, + outputs=dir_input, + api_name="select_directory" + ).then( + fn=self.on_directory_select, + inputs=dir_input, + outputs=[dir_input, file_list], + api_name="update_directory" + ) + + refresh_btn.click( + fn=self.refresh_files, + outputs=[file_list] + ) + + file_list.select( + fn=self.on_file_select, + outputs=info_components + ) + + # 提取文字按钮事件 + info_components[2].click( # extract_btn + fn=self.extract_text, + inputs=[info_components[4]], # file_path + outputs=[info_components[1]] # transcript_text + ) + + logger.info("用户界面创建完成") + return app + +def create_app() -> gr.Blocks: + """ + 创建应用实例 + + Returns: + gr.Blocks: Gradio应用界面 + """ + app = VideoManagerApp() + return app.create_ui() + +# 创建全局demo实例 +demo = create_app() + +# 仅在直接运行时启动服务器 +if __name__ == "__main__": + # 使用标准配置启动 + demo.queue().launch( + server_name="0.0.0.0", + server_port=7860, + share=False, + show_api=False, + show_error=True, + debug=True, + prevent_thread_lock=True + ) diff --git a/video_file_manager/ui/components/__init__.py b/video_file_manager/ui/components/__init__.py new file mode 100644 index 00000000..2bc423cf --- /dev/null +++ b/video_file_manager/ui/components/__init__.py @@ -0,0 +1,3 @@ +""" +UI组件模块 +""" \ No newline at end of file diff --git a/video_file_manager/ui/components/file_tree.py b/video_file_manager/ui/components/file_tree.py new file mode 100644 index 00000000..6746e84a --- /dev/null +++ b/video_file_manager/ui/components/file_tree.py @@ -0,0 +1,67 @@ +""" +文件树组件 +""" +import gradio as gr +from pathlib import Path +from typing import Dict, Callable, Optional +from datetime import datetime +import logging + +logger = logging.getLogger(__name__) + +def create_file_tree( + directory_data: Dict, + on_select: Optional[Callable] = None +) -> gr.Dataframe: + """ + 创建文件列表组件 + + Args: + directory_data: 目录数据(未使用) + on_select: 选择回调函数 + + Returns: + gr.Dataframe: 文件列表组件 + """ + from video_file_manager.config import settings + from video_file_manager.core.file_manager import FileManager + + # 扫描视频文件 + file_manager = FileManager() + videos = file_manager.scan_videos() + + # 准备表格数据 + headers = ["名称", "相对路径", "大小", "修改时间"] + rows = [ + [ + video["name"], + video["relative_path"], # 使用从 file_manager 获取的相对路径 + video["size"], + video["modified"].strftime("%Y-%m-%d %H:%M:%S") + ] + for video in videos + if video # 只处理有效的文件信息 + ] + + logger.debug(f"表格数据: {rows}") + + return gr.Dataframe( + headers=headers, + datatype=["str", "str", "str", "str"], + value=rows, + interactive=False, # 设置为只读 + wrap=True, # 允许文本换行 + row_count=len(rows) # 显示所有行并启用选择功能 + ) + +def _format_size(size: int) -> str: + """格式化文件大小""" + for unit in ['B', 'KB', 'MB', 'GB']: + if size < 1024: + return f"{size:.1f} {unit}" + size /= 1024 + return f"{size:.1f} TB" + +def _format_time(timestamp: float) -> str: + """格式化时间戳""" + return datetime.fromtimestamp(timestamp).strftime("%Y-%m-%d %H:%M:%S") \ No newline at end of file diff --git a/video_file_manager/ui/components/video_info.py b/video_file_manager/ui/components/video_info.py new file mode 100644 index 00000000..3d5b2a16 --- /dev/null +++ b/video_file_manager/ui/components/video_info.py @@ -0,0 +1,99 @@ +""" +视频信息组件 +""" +import gradio as gr +from pathlib import Path +from typing import Dict, Optional, List + +def create_video_info() -> List[gr.Component]: + """ + 创建视频信息组件 + + Returns: + List[gr.Component]: 视频信息组件列表 + """ + components = [] + + # 基本信息 + with gr.Group(): + gr.Markdown("### 基本信息") + + # 视频预览和文本显示 + with gr.Row(): + with gr.Column(scale=1): + video_preview = gr.Video( + label="视频预览", + interactive=False, + height=400, + width=600 + ) + components.append(video_preview) + + with gr.Column(scale=1): + transcript_text = gr.Textbox( + label="视频文字", + value="", + interactive=True, + show_copy_button=True, + lines=25, # 增加行数以匹配视频预览高度 + container=True, # 使用容器样式 + scale=1 # 填充整个列宽 + ) + extract_btn = gr.Button( + "提取文字", + variant="primary", + scale=1 # 填充整个列宽 + ) + components.extend([transcript_text, extract_btn]) # 添加到组件列表 + + # 文件信息 + with gr.Row(): + file_name = gr.Textbox( + label="文件名", + value="", + interactive=False, + show_copy_button=True + ) + file_path = gr.Textbox( + label="完整路径", + value="", + interactive=False, + show_copy_button=True + ) + components.extend([file_name, file_path]) + + # 元数据信息 + with gr.Group(): + gr.Markdown("### 元数据信息") + title = gr.Textbox( + label="标题", + value="", + interactive=True, + placeholder="请输入视频标题..." + ) + tags = gr.Textbox( + label="标签", + value="", + interactive=True, + placeholder="请输入标签,用逗号分隔..." + ) + description = gr.Textbox( + label="描述", + value="", + interactive=True, + placeholder="请输入视频描述...", + lines=3 + ) + components.extend([title, tags, description]) + + # 平台发布状态 + with gr.Group(): + gr.Markdown("### 平台发布状态") + platforms = gr.JSON( + label="平台状态", + value={}, + show_label=True + ) + components.append(platforms) + + return components \ No newline at end of file diff --git a/video_file_manager/utils/__init__.py b/video_file_manager/utils/__init__.py new file mode 100644 index 00000000..61b7bbdc --- /dev/null +++ b/video_file_manager/utils/__init__.py @@ -0,0 +1,3 @@ +""" +工具函数模块 +""" \ No newline at end of file diff --git a/video_file_manager/utils/helpers.py b/video_file_manager/utils/helpers.py new file mode 100644 index 00000000..8526eabb --- /dev/null +++ b/video_file_manager/utils/helpers.py @@ -0,0 +1,241 @@ +""" +工具函数 +""" +from pathlib import Path +from typing import Union +import logging +import whisper +import subprocess +import librosa +import numpy as np +from opencc import OpenCC # 添加OpenCC导入 + +logger = logging.getLogger(__name__) + +def format_size(size_in_bytes: Union[int, float]) -> str: + """ + 格式化文件大小 + + Args: + size_in_bytes: 文件大小(字节) + + Returns: + str: 格式化后的大小 + """ + for unit in ['B', 'KB', 'MB', 'GB']: + if size_in_bytes < 1024: + return f"{size_in_bytes:.1f} {unit}" + size_in_bytes /= 1024 + return f"{size_in_bytes:.1f} TB" + +def setup_logging( + log_file: Union[str, Path], + level: str = "INFO", + console_level: str = "INFO", + format_str: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" +) -> None: + """ + 配置日志 + + Args: + log_file: 日志文件路径 + level: 文件日志级别 + console_level: 控制台日志级别 + format_str: 日志格式 + """ + # 创建根日志记录器 + root_logger = logging.getLogger() + root_logger.setLevel(logging.DEBUG) # 设置为最低级别,让处理器决定要显示的级别 + + # 清除现有的处理器 + root_logger.handlers.clear() + + # 创建格式化器 + formatter = logging.Formatter(format_str) + + # 文件处理器 + file_handler = logging.FileHandler(log_file, encoding='utf-8') + file_handler.setLevel(getattr(logging, level.upper())) + file_handler.setFormatter(formatter) + root_logger.addHandler(file_handler) + + # 控制台处理器 + console_handler = logging.StreamHandler() + console_handler.setLevel(getattr(logging, console_level.upper())) + console_handler.setFormatter(formatter) + root_logger.addHandler(console_handler) + + # 设置第三方库的日志级别 + logging.getLogger("gradio").setLevel(logging.WARNING) + logging.getLogger("httpx").setLevel(logging.WARNING) + logging.getLogger("uvicorn").setLevel(logging.WARNING) + +def is_video_file(file_path: Path) -> bool: + """ + 检查是否是视频文件 + + Args: + file_path: 文件路径 + + Returns: + bool: 是否是视频文件 + """ + video_extensions = {'.mp4', '.mov', '.avi', '.mkv'} + return file_path.is_file() and file_path.suffix.lower() in video_extensions + +def extract_text_from_video(video_path: str) -> str: + """ + 从视频中提取文本 + + Args: + video_path: 视频文件路径 + + Returns: + str: 提取的文本内容(带时间戳的分段文本) + """ + # 初始化繁体转简体转换器 + converter = OpenCC('t2s') # 繁体到简体 + + # 确保视频文件存在 + video_file = Path(video_path).resolve() # 转换为绝对路径 + logger.info(f"检查视频文件: {video_file}") + if not video_file.is_file(): + raise FileNotFoundError(f"视频文件不存在: {video_file}") + logger.info(f"视频文件存在: {video_file}") + + # 获取项目根目录 + current_file = Path(__file__).resolve() + project_root = current_file.parent.parent.parent + logger.info(f"当前文件: {current_file}") + logger.info(f"项目根目录: {project_root}") + + # 提取音频 + audio_path = video_file.parent / f"{video_file.stem}_audio.wav" # 使用视频所在目录 + audio_path = audio_path.resolve() # 转换为绝对路径 + # 设置文本输出路径 + text_path = video_file.parent / f"{video_file.stem}_transcript.txt" + text_path = text_path.resolve() + + ffmpeg_path = project_root / "video_file_manager" / "ffmpeg" / "bin" / "ffmpeg.exe" + ffmpeg_path = ffmpeg_path.resolve() # 转换为绝对路径 + + # 检查并创建音频文件的父目录 + audio_path.parent.mkdir(parents=True, exist_ok=True) + + logger.info(f"音频输出路径: {audio_path}") + logger.info(f"文本输出路径: {text_path}") + logger.info(f"音频目录是否存在: {audio_path.parent.exists()}") + logger.info(f"使用的FFmpeg路径: {ffmpeg_path}") + logger.info(f"FFmpeg是否存在: {ffmpeg_path.is_file()}") + + if not ffmpeg_path.is_file(): + raise FileNotFoundError(f"找不到FFmpeg: {ffmpeg_path}") + + segments_text = [] # 存储带时间戳的分段文本 + + try: + # 检查音频文件是否已存在 + if audio_path.is_file(): + audio_size = audio_path.stat().st_size + if audio_size > 0: + logger.info(f"音频文件已存在且大小正常({audio_size}字节),跳过转换步骤") + else: + logger.warning(f"已存在的音频文件大小为0,将重新转换") + audio_path.unlink() # 删除大小为0的文件 + + # 如果音频文件不存在或已被删除,则执行转换 + if not audio_path.is_file(): + # 执行FFmpeg命令 + logger.info("开始执行FFmpeg命令...") + result = subprocess.run( + [str(ffmpeg_path), '-i', str(video_file), str(audio_path)], + check=True, + capture_output=True, + text=True + ) + logger.info("音频提取完成") + logger.debug(f"FFmpeg输出: {result.stdout}") + if result.stderr: + logger.debug(f"FFmpeg错误输出: {result.stderr}") + + # 确认音频文件存在 + logger.info(f"检查生成的音频文件: {audio_path}") + if not audio_path.is_file(): + raise FileNotFoundError(f"音频文件未生成: {audio_path}") + logger.info(f"音频文件生成成功,大小: {audio_path.stat().st_size} 字节") + + # 加载 Whisper 模型 + logger.info("开始加载Whisper模型") + model = whisper.load_model("base", device="cpu") # 显式指定使用CPU + logger.info("Whisper模型加载完成") + + # 转录音频 + logger.info(f"开始转录音频: {audio_path}") + # 再次确认音频文件存在 + if not audio_path.is_file(): + raise FileNotFoundError(f"转录前音频文件丢失: {audio_path}") + + # 检查音频文件大小 + audio_size = audio_path.stat().st_size + logger.info(f"转录前音频文件大小: {audio_size} 字节") + if audio_size == 0: + raise ValueError(f"音频文件大小为0: {audio_path}") + + # 使用librosa加载音频文件 + logger.info("使用librosa加载音频文件...") + try: + audio_array, sampling_rate = librosa.load(str(audio_path), sr=16000) + logger.info(f"音频加载成功: 采样率={sampling_rate}Hz, 长度={len(audio_array)}采样点") + + # 使用加载的音频数据进行转录 + logger.info("开始转录音频数据...") + result = model.transcribe( + audio_array, # 直接使用音频数据 + fp16=False, + language="zh" + ) + logger.info("转录完成") + + # 保存带时间戳的文本 + segments = result["segments"] + + logger.info("保存转录文本...") + with open(text_path, "w", encoding="utf-8") as f: + # 只写入带时间戳的分段文本 + for segment in segments: + start = segment["start"] + end = segment["end"] + text = segment["text"] + # 将文本转换为简体中文 + simplified_text = converter.convert(text) + segment_line = f"[{start:.2f}s -> {end:.2f}s] {simplified_text}" + segments_text.append(segment_line) + f.write(segment_line + "\n") + + logger.info(f"转录文本已保存到: {text_path}") + # 返回带时间戳的分段文本 + return "\n".join(segments_text) + + except Exception as e: + logger.error(f"音频加载失败: {e}") + raise + + except subprocess.CalledProcessError as e: + logger.error(f"FFmpeg处理失败: {e.stderr}") + raise + except Exception as e: + logger.error(f"处理过程中出错: {str(e)}") + logger.error(f"错误类型: {type(e)}") + logger.error(f"错误详情: {str(e)}") + raise + finally: + # 只有在成功获取文本后才清理临时文件 + if segments_text: + try: + if audio_path.exists(): + audio_path.unlink() + logger.info("清理临时音频文件") + except Exception as e: + logger.warning(f"清理临时文件失败: {str(e)}") + else: + logger.info("保留临时音频文件以供调试") \ No newline at end of file diff --git a/video_file_manager/utils/image_text_detector.py b/video_file_manager/utils/image_text_detector.py new file mode 100644 index 00000000..c683896c --- /dev/null +++ b/video_file_manager/utils/image_text_detector.py @@ -0,0 +1,229 @@ +""" +图片文字识别器 +用于识别图片中最大的文字,基于DeepSeek VL2模型实现 +""" +from pathlib import Path +from typing import Union, Optional, Dict, Any, List +import logging +import aiohttp +import asyncio +import os +from dotenv import load_dotenv +from urllib.parse import urlparse + +# 加载环境变量 +load_dotenv() + +logger = logging.getLogger(__name__) + +class APIError(Exception): + """API调用相关错误""" + pass + +class ImageTextDetector: + """ + 图片文字识别器 - 专门用于识别图片中最大的文字 + 基于DeepSeek VL2模型实现 + """ + def __init__( + self, + api_key: Optional[str] = None, + api_base: Optional[str] = None, + model: str = "Qwen/Qwen2-VL-72B-Instruct", + timeout: int = 30, + max_retries: int = 3, + max_concurrent: int = 5 # 最大并发数 + ): + """ + 初始化图片文字识别器 + + Args: + api_key: SiliconFlow API密钥(可选,默认从环境变量获取) + api_base: API基础URL(可选,默认从环境变量获取) + model: 使用的模型名称 + timeout: API调用超时时间(秒) + max_retries: 最大重试次数 + max_concurrent: 最大并发请求数 + """ + self.api_key = api_key or os.getenv('SILICONFLOW_API_KEY') + if not self.api_key: + raise ValueError("未设置API密钥,请在.env文件中设置SILICONFLOW_API_KEY") + + self.api_base = (api_base or os.getenv('SILICONFLOW_API_BASE', 'https://api.siliconflow.cn/v1')).rstrip('/') + self.model = model + self.timeout = timeout + self.max_retries = max_retries + self.max_concurrent = max_concurrent + self._session: Optional[aiohttp.ClientSession] = None + self._semaphore: Optional[asyncio.Semaphore] = None + + async def _ensure_session(self) -> aiohttp.ClientSession: + """确保aiohttp会话存在""" + if self._session is None or self._session.closed: + self._session = aiohttp.ClientSession( + headers={ + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json" + } + ) + return self._session + + async def _ensure_semaphore(self) -> asyncio.Semaphore: + """确保信号量存在""" + if self._semaphore is None: + self._semaphore = asyncio.Semaphore(self.max_concurrent) + return self._semaphore + + async def detect_text( + self, + image_url: str, + prompt: str = "请识别这张图片中最大的文字是什么?只需要返回文字内容,不需要其他解释。", + stop: list = [] + ) -> str: + """ + 识别单个图片中的文字 + + Args: + image_url: 图片URL + prompt: 提示词 + stop: 停止词列表 + + Returns: + str: 识别出的文字 + + Raises: + ValueError: 图片URL无效 + APIError: API调用失败 + """ + try: + # 验证URL格式 + parsed_url = urlparse(image_url) + if not all([parsed_url.scheme, parsed_url.netloc]): + raise ValueError(f"无效的图片URL: {image_url}") + + # 准备API请求数据 + payload = { + "model": self.model, + "stop": stop, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": {"url": image_url} + }, + { + "type": "text", + "text": prompt + } + ] + } + ] + } + + # 获取信号量 + semaphore = await self._ensure_semaphore() + + # 发送API请求 + session = await self._ensure_session() + async with semaphore: # 使用信号量控制并发 + for attempt in range(self.max_retries): + try: + async with session.post( + f"{self.api_base}/chat/completions", + json=payload, + timeout=self.timeout + ) as response: + if response.status == 200: + result = await response.json() + text = result["choices"][0]["message"]["content"].strip() + + # 移除可能存在的格式标记和额外空格 + text = text.replace("{", "").replace("}", "").replace("标题简称:", "").strip() + return text + + else: + error_text = await response.text() + raise APIError(f"API调用失败 (HTTP {response.status}): {error_text}") + + except asyncio.TimeoutError: + if attempt == self.max_retries - 1: + raise APIError(f"API调用超时(已重试{attempt + 1}次)") + logger.warning(f"API调用超时,正在重试(第{attempt + 1}次)...") + await asyncio.sleep(1 * (attempt + 1)) # 指数退避 + + except Exception as e: + if attempt == self.max_retries - 1: + raise APIError(f"API调用失败: {str(e)}") + logger.warning(f"API调用失败,正在重试(第{attempt + 1}次): {str(e)}") + await asyncio.sleep(1 * (attempt + 1)) + + except Exception as e: + logger.error(f"识别失败: {str(e)}") + raise + + async def detect_batch( + self, + image_urls: List[str], + prompt: str = "请识别这张图片中最大的文字是什么?只需要返回文字内容,不需要其他解释。", + stop: list = [] + ) -> List[Dict[str, str]]: + """ + 批量识别多个图片中的文字 + + Args: + image_urls: 图片URL列表 + prompt: 提示词 + stop: 停止词列表 + + Returns: + List[Dict[str, str]]: 识别结果列表,每个元素为 {"url": "图片URL", "text": "识别结果", "error": "错误信息"} + """ + tasks = [] + for url in image_urls: + task = asyncio.create_task(self._process_single_url(url, prompt, stop)) + tasks.append(task) + + results = await asyncio.gather(*tasks, return_exceptions=True) + + # 处理结果 + processed_results = [] + for url, result in zip(image_urls, results): + if isinstance(result, Exception): + processed_results.append({ + "url": url, + "text": None, + "error": str(result) + }) + else: + processed_results.append({ + "url": url, + "text": result, + "error": None + }) + + return processed_results + + async def _process_single_url(self, url: str, prompt: str, stop: list) -> str: + """处理单个URL的包装方法,用于错误处理""" + try: + return await self.detect_text(url, prompt, stop) + except Exception as e: + raise e + + async def close(self): + """关闭资源""" + if self._session and not self._session.closed: + await self._session.close() + + async def __aenter__(self): + """异步上下文管理器入口""" + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + """异步上下文管理器出口""" + await self.close() + +# 创建默认实例 +detector = ImageTextDetector() \ No newline at end of file diff --git a/video_file_manager/utils/t1.py b/video_file_manager/utils/t1.py new file mode 100644 index 00000000..e66baaa0 --- /dev/null +++ b/video_file_manager/utils/t1.py @@ -0,0 +1,77 @@ +import asyncio +import time +from image_text_detector import detector + +# 待处理的图片URL列表 +image_urls = [ + "https://finder.video.qq.com/251/20304/stodownload?encfilekey=rjD5jyTuFrIpZ2ibE8T7Ym3K77SEULgkia1FnbTHE3r1VZic5icWRSrQsJrqLF51L0yF0w6GQLnbzws0Kksz7GeRicqlMKW6x360siazAqJ3UBmich8IiaMpISydsQ&token=o3K9JoTic9IjjCEWI9eqVYutCnic9Vicyv6EOjgqokibHH8nJeMVgpPLsHrPg2gcALYCLvxdNKEicjqTlPkeBnl4RcJCtj2QCiahexwsBJ3SnNplemTcUlZIUC7uMicGQfutkibh4Jf6UxNEnot8AR9TCXQrgjOnicul3Qib7KeicYtVqyYa59MJyNXkkzKJw&idx=1&hy=SZ&m=&scene=2&uzid=2", + "https://finder.video.qq.com/251/20304/stodownload?encfilekey=rjD5jyTuFrIpZ2ibE8T7Ym3K77SEULgkiaTYSYnPjAH4KPOXjbdQc6AsNuUllx8XlM3uuNvrJLsEuASXzdx2Tmicgo3ibNazHThgNHVwjeyRPU4lEDR5YAZVXg&token=Cvvj5Ix3eexZiajDdmtxmMH0leeRT01p1MGfQAsibwhlbOwVzzwxyrntImicjSYoHxTyZk5ZqfedumJNXmCia4eIeEje0y5WU9Elk58r0qiaiaZVtX4GsxOEnYeQW5FOfvSyboVpX0pRLL1LPwesibSbvoYGoiaaHr4sfU09mW4exjMibAkNY0TO8WgzS9kOgibDRQich0r&idx=1&hy=SZ&m=&scene=2&uzid=2", + "https://finder.video.qq.com/251/20304/stodownload?encfilekey=rjD5jyTuFrIpZ2ibE8T7Ym3K77SEULgkiaoCo8lL58OmloAN31zWRTClrlX0xHLGdZWuPgG5wuM78P4WTECa8VAeSiacE8gyFF9CyyrzgibyvQiawjiaPKp0h7qQ&token=Cvvj5Ix3eexZiajDdmtxmMH0leeRT01p1buiaW9rw9hFqMbHLWJ9M0vCJN4vJuEnFMDjQB8piaYsvedRwI1MkVo2Vv5ibmGvonSib1Wp0PM8icxM9Vqa9YMYUndKBCeJiakern4WYJ3aLt9libib71fZ4QaKtr2o9M3U6nBHxYvgnR98RfSMIykQX9tbIwFxF1ndNmvdh&idx=1&hy=SZ&m=&scene=2&uzid=2" +] + +async def process_single_image(url: str, index: int) -> dict: + """处理单个图片并记录时间""" + start_time = time.time() + try: + text = await detector.detect_text(url) + end_time = time.time() + return { + "index": index, + "url": url, + "text": text, + "error": None, + "time_taken": end_time - start_time + } + except Exception as e: + end_time = time.time() + return { + "index": index, + "url": url, + "text": None, + "error": str(e), + "time_taken": end_time - start_time + } + +async def main(): + print(f"开始处理 {len(image_urls)} 张图片...") + total_start_time = time.time() + + # 创建任务列表 + tasks = [process_single_image(url, i) for i, url in enumerate(image_urls, 1)] + + # 并发执行所有任务 + results = await asyncio.gather(*tasks) + + # 计算总耗时 + total_time = time.time() - total_start_time + + # 输出详细结果 + print("\n处理结果:") + print("-" * 50) + + success_count = 0 + error_count = 0 + + for result in sorted(results, key=lambda x: x["index"]): + print(f"\n图片 {result['index']}:") + if result["error"]: + print(f" 状态: 失败") + print(f" 错误: {result['error']}") + error_count += 1 + else: + print(f" 状态: 成功") + print(f" 识别结果: {result['text']}") + success_count += 1 + print(f" 处理时间: {result['time_taken']:.2f}秒") + + # 输出统计信息 + print("\n统计信息:") + print("-" * 50) + print(f"总处理时间: {total_time:.2f}秒") + print(f"平均每张耗时: {total_time/len(image_urls):.2f}秒") + print(f"成功数量: {success_count}") + print(f"失败数量: {error_count}") + print(f"成功率: {(success_count/len(image_urls))*100:.1f}%") + +# 运行示例 +asyncio.run(main()) \ No newline at end of file diff --git a/videos/demo.txt b/videos/demo.txt index 94b58ea3..4320e927 100644 --- a/videos/demo.txt +++ b/videos/demo.txt @@ -1,2 +1,3 @@ -这位勇敢的男子为了心爱之人每天坚守 🥺❤️‍🩹 -#坚持不懈 #爱情执着 #奋斗使者 #短视频 \ No newline at end of file +这位勇敢的男子为了心爱之人每天坚守 🥺❤️‍🩹 +#坚持不懈 #爱情执着 #奋斗使者 #短视频 +@微信创作者 @微信创作者助手 @向阳也有米 \ No newline at end of file diff --git "a/\345\276\205\345\256\214\346\210\220\345\212\237\350\203\275.md" "b/\345\276\205\345\256\214\346\210\220\345\212\237\350\203\275.md" new file mode 100644 index 00000000..bf382f3c --- /dev/null +++ "b/\345\276\205\345\256\214\346\210\220\345\212\237\350\203\275.md" @@ -0,0 +1,25 @@ +## 视频号 + - 仅自己可见标签没有提取 + + + + + +## 快手 +- 实际发布后验证 标签和好友是否能正常跳转 + + + +## 抖音 +- 视频上传模块没有调试,接下来应该主要调试视频上传发布模块。 + + + +## 小红书 + + + + +## 总结 +- 一定要找到元素标签后,日志打印找到标签的html代码,如果没找到,打印全部html代码,然后投喂给大模型,更新选择器查找的定位。 +- 如果可以,直接拔取html页面代码,通过clean_html.py清理html代码后,让大模型去读取相应层级标签等信息,更新最好的选择器定位。 \ No newline at end of file