Merge branch 'NanmiCoder:main' into main

This commit is contained in:
jayeeliu 2024-03-02 01:51:02 +08:00 committed by GitHub
commit 9a5460ccca
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 5 additions and 1 deletions

View File

@ -125,6 +125,7 @@ class DOUYINClient:
"count": 10 # must be set to 10 "count": 10 # must be set to 10
} }
referer_url = "https://www.douyin.com/search/" + keyword referer_url = "https://www.douyin.com/search/" + keyword
referer_url += f"?publish_time={publish_time.value}&sort_type={sort_type.value}&type=general"
headers = copy.copy(self.headers) headers = copy.copy(self.headers)
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/') headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
return await self.get("/aweme/v1/web/general/search/single/", params, headers=headers) return await self.get("/aweme/v1/web/general/search/single/", params, headers=headers)

View File

@ -17,6 +17,7 @@ from var import crawler_type_var
from .client import DOUYINClient from .client import DOUYINClient
from .exception import DataFetchError from .exception import DataFetchError
from .login import DouYinLogin from .login import DouYinLogin
from .field import PublishTimeType
class DouYinCrawler(AbstractCrawler): class DouYinCrawler(AbstractCrawler):
@ -88,7 +89,9 @@ class DouYinCrawler(AbstractCrawler):
while (page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT: while (page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
try: try:
posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword, posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword,
offset=page * dy_limit_count) offset=page * dy_limit_count,
publish_time=PublishTimeType.UNLIMITED
)
except DataFetchError: except DataFetchError:
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed") utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
break break