From 03565d61c6e2ac653ec3e0a10b15e85e9edc0f4f Mon Sep 17 00:00:00 2001 From: Relakkes Date: Tue, 25 Jul 2023 20:22:22 +0800 Subject: [PATCH] fix: issue #19 --- media_platform/xhs/core.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index fc1936b..eb4ab1e 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -48,6 +48,13 @@ class XiaoHongShuCrawler(AbstractCrawler): ) # stealth.min.js is a js script to prevent the website from detecting the crawler. await self.browser_context.add_init_script(path="libs/stealth.min.js") + # add a cookie attribute webId to avoid the appearance of a sliding captcha on the webpage + await self.browser_context.add_cookies([{ + 'name': "webId", + 'value': "xxx123", # any value + 'domain': ".xiaohongshu.com", + 'path': "/" + }]) self.context_page = await self.browser_context.new_page() await self.context_page.goto(self.index_url)