fix: issue #32
This commit is contained in:
parent
99812b4669
commit
c1a3f06c7a
@ -53,8 +53,8 @@ class DOUYINClient:
|
|||||||
"platform": "PC",
|
"platform": "PC",
|
||||||
"screen_width": "1920",
|
"screen_width": "1920",
|
||||||
"screen_height": "1200",
|
"screen_height": "1200",
|
||||||
"webid": douyin_js_obj.call("get_web_id"),
|
#" webid": douyin_js_obj.call("get_web_id"),
|
||||||
"msToken": local_storage.get("xmst"),
|
# "msToken": local_storage.get("xmst"),
|
||||||
# "msToken": "abL8SeUTPa9-EToD8qfC7toScSADxpg6yLh2dbNcpWHzE0bT04txM_4UwquIcRvkRb9IU8sifwgM1Kwf1Lsld81o9Irt2_yNyUbbQPSUO8EfVlZJ_78FckDFnwVBVUVK",
|
# "msToken": "abL8SeUTPa9-EToD8qfC7toScSADxpg6yLh2dbNcpWHzE0bT04txM_4UwquIcRvkRb9IU8sifwgM1Kwf1Lsld81o9Irt2_yNyUbbQPSUO8EfVlZJ_78FckDFnwVBVUVK",
|
||||||
}
|
}
|
||||||
params.update(common_params)
|
params.update(common_params)
|
||||||
@ -142,7 +142,7 @@ class DOUYINClient:
|
|||||||
del headers["Origin"]
|
del headers["Origin"]
|
||||||
return await self.get("/aweme/v1/web/aweme/detail/", params, headers)
|
return await self.get("/aweme/v1/web/aweme/detail/", params, headers)
|
||||||
|
|
||||||
async def get_aweme_comments(self, aweme_id: str, cursor: int = 0):
|
async def get_aweme_comments(self, aweme_id: str, cursor: int = 0, keywords: str = ""):
|
||||||
"""get note comments
|
"""get note comments
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@ -153,6 +153,9 @@ class DOUYINClient:
|
|||||||
"count": 20,
|
"count": 20,
|
||||||
"item_type": 0
|
"item_type": 0
|
||||||
}
|
}
|
||||||
|
referer_url = "https://www.douyin.com/search/" + keywords + '?aid=3a3cec5a-9e27-4040-b6aa-ef548c2c1138&publish_time=0&sort_type=0&source=search_history&type=general'
|
||||||
|
headers = copy.copy(self.headers)
|
||||||
|
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
|
||||||
return await self.get(uri, params)
|
return await self.get(uri, params)
|
||||||
|
|
||||||
async def get_aweme_all_comments(
|
async def get_aweme_all_comments(
|
||||||
@ -160,7 +163,8 @@ class DOUYINClient:
|
|||||||
aweme_id: str,
|
aweme_id: str,
|
||||||
crawl_interval: float = 1.0,
|
crawl_interval: float = 1.0,
|
||||||
is_fetch_sub_comments=False,
|
is_fetch_sub_comments=False,
|
||||||
callback: Optional[Callable] = None
|
callback: Optional[Callable] = None,
|
||||||
|
keywords: str = ""
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
get note all comments include sub comments
|
get note all comments include sub comments
|
||||||
@ -168,13 +172,14 @@ class DOUYINClient:
|
|||||||
:param crawl_interval:
|
:param crawl_interval:
|
||||||
:param is_fetch_sub_comments:
|
:param is_fetch_sub_comments:
|
||||||
:param callback:
|
:param callback:
|
||||||
|
:param keywords:
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
result = []
|
result = []
|
||||||
comments_has_more = 1
|
comments_has_more = 1
|
||||||
comments_cursor = 0
|
comments_cursor = 0
|
||||||
while comments_has_more:
|
while comments_has_more:
|
||||||
comments_res = await self.get_aweme_comments(aweme_id, comments_cursor)
|
comments_res = await self.get_aweme_comments(aweme_id, comments_cursor, keywords)
|
||||||
comments_has_more = comments_res.get("has_more", 0)
|
comments_has_more = comments_res.get("has_more", 0)
|
||||||
comments_cursor = comments_res.get("cursor", comments_cursor + 20)
|
comments_cursor = comments_res.get("cursor", comments_cursor + 20)
|
||||||
comments = comments_res.get("comments")
|
comments = comments_res.get("comments")
|
||||||
|
@ -91,22 +91,23 @@ class DouYinCrawler(AbstractCrawler):
|
|||||||
aweme_list.append(aweme_info.get("aweme_id", ""))
|
aweme_list.append(aweme_info.get("aweme_id", ""))
|
||||||
await douyin.update_douyin_aweme(aweme_item=aweme_info)
|
await douyin.update_douyin_aweme(aweme_item=aweme_info)
|
||||||
utils.logger.info(f"keyword:{keyword}, aweme_list:{aweme_list}")
|
utils.logger.info(f"keyword:{keyword}, aweme_list:{aweme_list}")
|
||||||
await self.batch_get_note_comments(aweme_list)
|
await self.batch_get_note_comments(aweme_list, keyword)
|
||||||
|
|
||||||
async def batch_get_note_comments(self, aweme_list: List[str]) -> None:
|
async def batch_get_note_comments(self, aweme_list: List[str], keywords: str) -> None:
|
||||||
task_list: List[Task] = []
|
task_list: List[Task] = []
|
||||||
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
|
semaphore = asyncio.Semaphore(config.MAX_CONCURRENCY_NUM)
|
||||||
for aweme_id in aweme_list:
|
for aweme_id in aweme_list:
|
||||||
task = asyncio.create_task(self.get_comments(aweme_id, semaphore), name=aweme_id)
|
task = asyncio.create_task(self.get_comments(aweme_id, semaphore, keywords), name=aweme_id)
|
||||||
task_list.append(task)
|
task_list.append(task)
|
||||||
await asyncio.wait(task_list)
|
await asyncio.wait(task_list)
|
||||||
|
|
||||||
async def get_comments(self, aweme_id: str, semaphore: asyncio.Semaphore) -> None:
|
async def get_comments(self, aweme_id: str, semaphore: asyncio.Semaphore, keywords: str) -> None:
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
try:
|
try:
|
||||||
await self.dy_client.get_aweme_all_comments(
|
await self.dy_client.get_aweme_all_comments(
|
||||||
aweme_id=aweme_id,
|
aweme_id=aweme_id,
|
||||||
callback=douyin.batch_update_dy_aweme_comments
|
callback=douyin.batch_update_dy_aweme_comments,
|
||||||
|
keywords=keywords
|
||||||
)
|
)
|
||||||
utils.logger.info(f"aweme_id: {aweme_id} comments have all been obtained completed ...")
|
utils.logger.info(f"aweme_id: {aweme_id} comments have all been obtained completed ...")
|
||||||
except DataFetchError as e:
|
except DataFetchError as e:
|
||||||
|
@ -93,7 +93,8 @@ async def update_douyin_aweme(aweme_item: Dict):
|
|||||||
douyin_aweme_pydantic.validate(douyin_data)
|
douyin_aweme_pydantic.validate(douyin_data)
|
||||||
await DouyinAweme.create(**douyin_data.dict())
|
await DouyinAweme.create(**douyin_data.dict())
|
||||||
else:
|
else:
|
||||||
douyin_aweme_pydantic = pydantic_model_creator(DouyinAweme, name='DouyinAwemeUpdate', exclude=('id','add_ts'))
|
douyin_aweme_pydantic = pydantic_model_creator(DouyinAweme, name='DouyinAwemeUpdate',
|
||||||
|
exclude=('id', 'add_ts'))
|
||||||
douyin_data = douyin_aweme_pydantic(**local_db_item)
|
douyin_data = douyin_aweme_pydantic(**local_db_item)
|
||||||
douyin_aweme_pydantic.validate(douyin_data)
|
douyin_aweme_pydantic.validate(douyin_data)
|
||||||
await DouyinAweme.filter(aweme_id=aweme_id).update(**douyin_data.dict())
|
await DouyinAweme.filter(aweme_id=aweme_id).update(**douyin_data.dict())
|
||||||
@ -121,7 +122,6 @@ async def update_dy_aweme_comment(aweme_id: str, comment_item: Dict):
|
|||||||
"ip_location": comment_item.get("ip_label", ""),
|
"ip_location": comment_item.get("ip_label", ""),
|
||||||
"aweme_id": aweme_id,
|
"aweme_id": aweme_id,
|
||||||
"content": comment_item.get("text"),
|
"content": comment_item.get("text"),
|
||||||
"content_extra": json.dumps(comment_item.get("text_extra", [])),
|
|
||||||
"user_id": user_info.get("uid"),
|
"user_id": user_info.get("uid"),
|
||||||
"sec_uid": user_info.get("sec_uid"),
|
"sec_uid": user_info.get("sec_uid"),
|
||||||
"short_user_id": user_info.get("short_id"),
|
"short_user_id": user_info.get("short_id"),
|
||||||
@ -136,12 +136,14 @@ async def update_dy_aweme_comment(aweme_id: str, comment_item: Dict):
|
|||||||
if config.IS_SAVED_DATABASED:
|
if config.IS_SAVED_DATABASED:
|
||||||
if not await DouyinAwemeComment.filter(comment_id=comment_id).exists():
|
if not await DouyinAwemeComment.filter(comment_id=comment_id).exists():
|
||||||
local_db_item["add_ts"] = utils.get_current_timestamp()
|
local_db_item["add_ts"] = utils.get_current_timestamp()
|
||||||
comment_pydantic = pydantic_model_creator(DouyinAwemeComment, name='DouyinAwemeCommentCreate', exclude=('id',))
|
comment_pydantic = pydantic_model_creator(DouyinAwemeComment, name='DouyinAwemeCommentCreate',
|
||||||
|
exclude=('id',))
|
||||||
comment_data = comment_pydantic(**local_db_item)
|
comment_data = comment_pydantic(**local_db_item)
|
||||||
comment_pydantic.validate(comment_data)
|
comment_pydantic.validate(comment_data)
|
||||||
await DouyinAwemeComment.create(**comment_data.dict())
|
await DouyinAwemeComment.create(**comment_data.dict())
|
||||||
else:
|
else:
|
||||||
comment_pydantic = pydantic_model_creator(DouyinAwemeComment, name='DouyinAwemeCommentUpdate', exclude=('id','add_ts'))
|
comment_pydantic = pydantic_model_creator(DouyinAwemeComment, name='DouyinAwemeCommentUpdate',
|
||||||
|
exclude=('id', 'add_ts'))
|
||||||
comment_data = comment_pydantic(**local_db_item)
|
comment_data = comment_pydantic(**local_db_item)
|
||||||
comment_pydantic.validate(comment_data)
|
comment_pydantic.validate(comment_data)
|
||||||
await DouyinAwemeComment.filter(comment_id=comment_id).update(**comment_data.dict())
|
await DouyinAwemeComment.filter(comment_id=comment_id).update(**comment_data.dict())
|
||||||
|
@ -120,11 +120,8 @@ async def update_xhs_note_comment(note_id: str, comment_item: Dict):
|
|||||||
comment_pydantic.validate(comment_data)
|
comment_pydantic.validate(comment_data)
|
||||||
await XHSNoteComment.create(**comment_data.dict())
|
await XHSNoteComment.create(**comment_data.dict())
|
||||||
else:
|
else:
|
||||||
comment_pydantic = pydantic_model_creator(XHSNoteComment, name="CommentPydanticUpdate", exclude=('id','add_ts',))
|
comment_pydantic = pydantic_model_creator(XHSNoteComment, name="CommentPydanticUpdate",
|
||||||
|
exclude=('id', 'add_ts',))
|
||||||
comment_data = comment_pydantic(**local_db_item)
|
comment_data = comment_pydantic(**local_db_item)
|
||||||
comment_pydantic.validate(comment_data)
|
comment_pydantic.validate(comment_data)
|
||||||
await XHSNoteComment.filter(comment_id=comment_id).update(**comment_data.dict())
|
await XHSNoteComment.filter(comment_id=comment_id).update(**comment_data.dict())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user