抖音二级评论

This commit is contained in:
ZuWard 2024-05-29 06:35:37 +08:00
parent 5e145c31b9
commit 0ba68809a5
3 changed files with 45 additions and 8 deletions

View File

@ -18,12 +18,12 @@
> 下面不支持的项目相关的代码架构已经搭建好只需要实现对应的方法即可欢迎大家提交PR > 下面不支持的项目相关的代码架构已经搭建好只需要实现对应的方法即可欢迎大家提交PR
| 平台 | 关键词搜索 | 指定帖子ID爬取 | 二级评论 | 指定创作者主页 | 登录态缓存 | IP代理池 | | 平台 | 关键词搜索 | 指定帖子ID爬取 | 二级评论 | 指定创作者主页 | 登录态缓存 | IP代理池 |
|-----|-------|----------|------|--------|-------|-------| |-----|-------|----------|-----|--------|-------|-------|
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| 抖音 | ✅ | ✅ | | ✅ | ✅ | ✅ | | 抖音 | ✅ | ✅ | | ✅ | ✅ | ✅ |
| 快手 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | | 快手 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
| B 站 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | | B 站 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
| 微博 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | | 微博 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
## 使用方法 ## 使用方法

View File

@ -165,6 +165,23 @@ class DOUYINClient(AbstractApiClient):
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/') headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
return await self.get(uri, params) return await self.get(uri, params)
async def get_sub_comments(self, comment_id: str, cursor: int = 0):
"""
获取子评论
"""
uri = "/aweme/v1/web/comment/list/reply/"
params = {
'comment_id': comment_id,
"cursor": cursor,
"count": 20,
"item_type": 0,
}
keywords = request_keyword_var.get()
referer_url = "https://www.douyin.com/search/" + keywords + '?aid=3a3cec5a-9e27-4040-b6aa-ef548c2c1138&publish_time=0&sort_type=0&source=search_history&type=general'
headers = copy.copy(self.headers)
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
return await self.get(uri, params)
async def get_aweme_all_comments( async def get_aweme_all_comments(
self, self,
aweme_id: str, aweme_id: str,
@ -197,7 +214,27 @@ class DOUYINClient(AbstractApiClient):
await asyncio.sleep(crawl_interval) await asyncio.sleep(crawl_interval)
if not is_fetch_sub_comments: if not is_fetch_sub_comments:
continue continue
# todo fetch sub comments # 获取二级评论
for comment in comments:
reply_comment_total = comment.get("reply_comment_total")
if reply_comment_total > 0:
comment_id = comment.get("cid")
sub_comments_has_more = 1
sub_comments_cursor = 0
while sub_comments_has_more:
sub_comments_res = await self.get_sub_comments(comment_id, sub_comments_cursor)
sub_comments_has_more = sub_comments_res.get("has_more", 0)
sub_comments_cursor = sub_comments_res.get("cursor", 0)
sub_comments = sub_comments_res.get("comments", [])
if not sub_comments:
continue
result.extend(sub_comments)
if callback: # 如果有回调函数,就执行回调函数
await callback(aweme_id, sub_comments)
await asyncio.sleep(crawl_interval)
return result return result
async def get_user_info(self, sec_user_id: str): async def get_user_info(self, sec_user_id: str):

View File

@ -177,8 +177,8 @@ class DouYinCrawler(AbstractCrawler):
await self.dy_client.get_aweme_all_comments( await self.dy_client.get_aweme_all_comments(
aweme_id=aweme_id, aweme_id=aweme_id,
crawl_interval=random.random(), crawl_interval=random.random(),
is_fetch_sub_comments=config.ENABLE_GET_SUB_COMMENTS,
callback=douyin_store.batch_update_dy_aweme_comments callback=douyin_store.batch_update_dy_aweme_comments
) )
utils.logger.info( utils.logger.info(
f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} comments have all been obtained and filtered ...") f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} comments have all been obtained and filtered ...")