xhs_crawler/config/base_config.py
Relakkes aba9f14f50 refactor: 规范日志打印
feat: B站指定视频ID爬取(bvid)
2023-12-23 01:04:08 +08:00

66 lines
1.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 基础配置
PLATFORM = "xhs"
KEYWORDS = "python,golang"
LOGIN_TYPE = "qrcode" # qrcode or phone or cookie
COOKIES = ""
CRAWLER_TYPE = "search"
# 是否开启 IP 代理
ENABLE_IP_PROXY = False
# 代理IP池数量
IP_PROXY_POOL_COUNT = 2
# 重试时间
RETRY_INTERVAL = 60 * 30 # 30 minutes
# 无头浏览器的标识True:开启 False 关闭(会打开一个浏览器)
HEADLESS = False
# 是否保存登录状态
SAVE_LOGIN_STATE = True
# 用户浏览器缓存的浏览器文件配置
USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
# 爬取视频/帖子的数量控制
CRAWLER_MAX_NOTES_COUNT = 20
# 并发爬虫数量控制
MAX_CONCURRENCY_NUM = 10
# 每个视频/帖子抓取评论最大条数 (为0则不限制)
MAX_COMMENTS_PER_POST = 10
# 评论关键词筛选(只会留下包含关键词的评论,为空不限制)
COMMENT_KEYWORDS = [
# "真棒"
# ........................
]
# 指定小红书需要爬虫的笔记ID列表
XHS_SPECIFIED_ID_LIST = [
"6422c2750000000027000d88",
"64ca1b73000000000b028dd2",
"630d5b85000000001203ab41",
# ........................
]
# 指定抖音需要爬取的ID列表
DY_SPECIFIED_ID_LIST = [
"7280854932641664319",
"7202432992642387233"
# ........................
]
# 指定快手平台需要爬取的ID列表
KS_SPECIFIED_ID_LIST = []
# 指定B站平台需要爬取的视频bvid列表
BILI_SPECIFIED_ID_LIST = [
"BV1d54y1g7db",
"BV1Sz4y1U77N",
"BV14Q4y1n7jz",
# ........................
]