This commit is contained in:
Relakkes 2024-02-26 23:47:02 +08:00
parent 67d2b7cff8
commit 384c8f9f7e
2 changed files with 5 additions and 1 deletions

View File

@ -125,6 +125,7 @@ class DOUYINClient:
"count": 10 # must be set to 10 "count": 10 # must be set to 10
} }
referer_url = "https://www.douyin.com/search/" + keyword referer_url = "https://www.douyin.com/search/" + keyword
referer_url += f"?publish_time={publish_time.value}&sort_type={sort_type.value}&type=general"
headers = copy.copy(self.headers) headers = copy.copy(self.headers)
headers["Referer"] = urllib.parse.quote(referer_url, safe=':/') headers["Referer"] = urllib.parse.quote(referer_url, safe=':/')
return await self.get("/aweme/v1/web/general/search/single/", params, headers=headers) return await self.get("/aweme/v1/web/general/search/single/", params, headers=headers)

View File

@ -17,6 +17,7 @@ from var import crawler_type_var
from .client import DOUYINClient from .client import DOUYINClient
from .exception import DataFetchError from .exception import DataFetchError
from .login import DouYinLogin from .login import DouYinLogin
from .field import PublishTimeType
class DouYinCrawler(AbstractCrawler): class DouYinCrawler(AbstractCrawler):
@ -88,7 +89,9 @@ class DouYinCrawler(AbstractCrawler):
while (page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT: while (page + 1) * dy_limit_count <= config.CRAWLER_MAX_NOTES_COUNT:
try: try:
posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword, posts_res = await self.dy_client.search_info_by_keyword(keyword=keyword,
offset=page * dy_limit_count) offset=page * dy_limit_count,
publish_time=PublishTimeType.UNLIMITED
)
except DataFetchError: except DataFetchError:
utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed") utils.logger.error(f"[DouYinCrawler.search] search douyin keyword: {keyword} failed")
break break