diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index fc1936b..eb4ab1e 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -48,6 +48,13 @@ class XiaoHongShuCrawler(AbstractCrawler): ) # stealth.min.js is a js script to prevent the website from detecting the crawler. await self.browser_context.add_init_script(path="libs/stealth.min.js") + # add a cookie attribute webId to avoid the appearance of a sliding captcha on the webpage + await self.browser_context.add_cookies([{ + 'name': "webId", + 'value': "xxx123", # any value + 'domain': ".xiaohongshu.com", + 'path': "/" + }]) self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url)