抖音二级评论
This commit is contained in:
parent
5e145c31b9
commit
0ba68809a5
12
README.md
12
README.md
@ -18,12 +18,12 @@
|
|||||||
> 下面不支持的项目,相关的代码架构已经搭建好,只需要实现对应的方法即可,欢迎大家提交PR
|
> 下面不支持的项目,相关的代码架构已经搭建好,只需要实现对应的方法即可,欢迎大家提交PR
|
||||||
|
|
||||||
| 平台 | 关键词搜索 | 指定帖子ID爬取 | 二级评论 | 指定创作者主页 | 登录态缓存 | IP代理池 |
|
| 平台 | 关键词搜索 | 指定帖子ID爬取 | 二级评论 | 指定创作者主页 | 登录态缓存 | IP代理池 |
|
||||||
|-----|-------|----------|------|--------|-------|-------|
|
|-----|-------|----------|-----|--------|-------|-------|
|
||||||
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| 抖音 | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ |
|
| 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
|
||||||
| 快手 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
| 快手 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
||||||
| B 站 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
| B 站 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ |
|
||||||
| 微博 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
| 微博 | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ |
|
||||||
|
|
||||||
|
|
||||||
## 使用方法
|
## 使用方法
|
||||||
|
@ -165,6 +165,23 @@ class DOUYINClient(AbstractApiClient):
|
|||||||
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
|
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
|
||||||
return await self.get(uri, params)
|
return await self.get(uri, params)
|
||||||
|
|
||||||
|
async def get_sub_comments(self, comment_id: str, cursor: int = 0):
|
||||||
|
"""
|
||||||
|
获取子评论
|
||||||
|
"""
|
||||||
|
uri = "/aweme/v1/web/comment/list/reply/"
|
||||||
|
params = {
|
||||||
|
'comment_id': comment_id,
|
||||||
|
"cursor": cursor,
|
||||||
|
"count": 20,
|
||||||
|
"item_type": 0,
|
||||||
|
}
|
||||||
|
keywords = request_keyword_var.get()
|
||||||
|
referer_url = "https://www.douyin.com/search/" + keywords + '?aid=3a3cec5a-9e27-4040-b6aa-ef548c2c1138&publish_time=0&sort_type=0&source=search_history&type=general'
|
||||||
|
headers = copy.copy(self.headers)
|
||||||
|
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
|
||||||
|
return await self.get(uri, params)
|
||||||
|
|
||||||
async def get_aweme_all_comments(
|
async def get_aweme_all_comments(
|
||||||
self,
|
self,
|
||||||
aweme_id: str,
|
aweme_id: str,
|
||||||
@ -197,7 +214,27 @@ class DOUYINClient(AbstractApiClient):
|
|||||||
await asyncio.sleep(crawl_interval)
|
await asyncio.sleep(crawl_interval)
|
||||||
if not is_fetch_sub_comments:
|
if not is_fetch_sub_comments:
|
||||||
continue
|
continue
|
||||||
# todo fetch sub comments
|
# 获取二级评论
|
||||||
|
for comment in comments:
|
||||||
|
reply_comment_total = comment.get("reply_comment_total")
|
||||||
|
|
||||||
|
if reply_comment_total > 0:
|
||||||
|
comment_id = comment.get("cid")
|
||||||
|
sub_comments_has_more = 1
|
||||||
|
sub_comments_cursor = 0
|
||||||
|
|
||||||
|
while sub_comments_has_more:
|
||||||
|
sub_comments_res = await self.get_sub_comments(comment_id, sub_comments_cursor)
|
||||||
|
sub_comments_has_more = sub_comments_res.get("has_more", 0)
|
||||||
|
sub_comments_cursor = sub_comments_res.get("cursor", 0)
|
||||||
|
sub_comments = sub_comments_res.get("comments", [])
|
||||||
|
|
||||||
|
if not sub_comments:
|
||||||
|
continue
|
||||||
|
result.extend(sub_comments)
|
||||||
|
if callback: # 如果有回调函数,就执行回调函数
|
||||||
|
await callback(aweme_id, sub_comments)
|
||||||
|
await asyncio.sleep(crawl_interval)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
async def get_user_info(self, sec_user_id: str):
|
async def get_user_info(self, sec_user_id: str):
|
||||||
|
@ -177,8 +177,8 @@ class DouYinCrawler(AbstractCrawler):
|
|||||||
await self.dy_client.get_aweme_all_comments(
|
await self.dy_client.get_aweme_all_comments(
|
||||||
aweme_id=aweme_id,
|
aweme_id=aweme_id,
|
||||||
crawl_interval=random.random(),
|
crawl_interval=random.random(),
|
||||||
|
is_fetch_sub_comments=config.ENABLE_GET_SUB_COMMENTS,
|
||||||
callback=douyin_store.batch_update_dy_aweme_comments
|
callback=douyin_store.batch_update_dy_aweme_comments
|
||||||
|
|
||||||
)
|
)
|
||||||
utils.logger.info(
|
utils.logger.info(
|
||||||
f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} comments have all been obtained and filtered ...")
|
f"[DouYinCrawler.get_comments] aweme_id: {aweme_id} comments have all been obtained and filtered ...")
|
||||||
|
Loading…
Reference in New Issue
Block a user