fix: 修复微博PC端登录后COOKIE在手机端无法使用的bug

2024-01-06 19:18:07 +08:00 · 2024-01-06 19:18:07 +08:00 · 4de14ad6a8
commit 4de14ad6a8
parent fe073801f8
3 changed files with 20 additions and 8 deletions
--- a/media_platform/weibo/client.py
+++ b/media_platform/weibo/client.py
@ -7,7 +7,7 @@ import asyncio
 import copy
 import json
 import re
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional
 from urllib.parse import urlencode
 import httpx
@ -73,6 +73,8 @@ class WeiboClient:
            resp_data: Dict = await self.request(method="GET", url=f"{self._host}{uri}", headers=self.headers)
            if resp_data.get("login"):
                ping_flag = True
            else:
                utils.logger.error(f"[WeiboClient.pong] cookie may be invalid and again login...")
        except Exception as e:
            utils.logger.error(f"[WeiboClient.pong] Pong weibo failed: {e}, and try to login again...")
            ping_flag = False
--- a/media_platform/weibo/core.py
+++ b/media_platform/weibo/core.py
@ -37,7 +37,9 @@ class WeiboCrawler(AbstractCrawler):
    def __init__(self):
        self.index_url = "https://www.weibo.com"
        self.mobile_index_url = "https://m.weibo.cn"
        self.user_agent = utils.get_user_agent()
        self.mobile_user_agent = utils.get_mobile_user_agent()
    def init_config(self, platform: str, login_type: str, crawler_type: str):
        self.platform = platform
@ -57,13 +59,13 @@ class WeiboCrawler(AbstractCrawler):
            self.browser_context = await self.launch_browser(
                chromium,
                None,
-                self.user_agent,
+                self.mobile_user_agent,
                headless=config.HEADLESS
            )
            # stealth.min.js is a js script to prevent the website from detecting the crawler.
            await self.browser_context.add_init_script(path="libs/stealth.min.js")
            self.context_page = await self.browser_context.new_page()
-            await self.context_page.goto(self.index_url)
+            await self.context_page.goto(self.mobile_index_url)
            # Create a client to interact with the xiaohongshu website.
            self.wb_client = await self.create_weibo_client(httpx_proxy_format)
@ -75,7 +77,14 @@ class WeiboCrawler(AbstractCrawler):
                    context_page=self.context_page,
                    cookie_str=config.COOKIES
                )
                await self.context_page.goto(self.index_url)
                await asyncio.sleep(1)
                await login_obj.begin()
                # 登录成功后重定向到手机端的网站，再更新手机端登录成功的cookie
                utils.logger.info("[WeiboCrawler.start] redirect weibo mobile homepage and update cookies on mobile platform")
                await self.context_page.goto(self.mobile_index_url)
                await asyncio.sleep(2)
                await self.wb_client.update_cookies(browser_context=self.browser_context)
            crawler_type_var.set(self.crawler_type)
@ -183,7 +192,7 @@ class WeiboCrawler(AbstractCrawler):
                # Download comments
                all_comments = await self.wb_client.get_note_all_comments(
                    note_id=note_id,
-                    crawl_interval=random.random(),
+                    crawl_interval=random.randint(1,10), # 微博对API的限流比较严重，所以延时提高一些
                )
                # Filter comments by keyword
--- a/media_platform/weibo/login.py
+++ b/media_platform/weibo/login.py
@ -43,6 +43,7 @@ class WeiboLogin(AbstractLogin):
            raise ValueError(
                "[WeiboLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...")
    @retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
    async def check_login_state(self, no_logged_in_session: str) -> bool:
        """
@ -61,8 +62,8 @@ class WeiboLogin(AbstractLogin):
        """If the login dialog box does not pop up automatically, we will manually click the login button"""
        dialog_selector = "xpath=//div[@class='woo-modal-main']"
        try:
-            # check dialog box is auto popup and wait for 10 seconds
+            # check dialog box is auto popup and wait for 4 seconds
-            await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 10)
+            await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 4)
        except Exception as e:
            utils.logger.error(
                f"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, error: {e}")
@ -71,12 +72,12 @@ class WeiboLogin(AbstractLogin):
            # 向下滚动1000像素
            await self.context_page.mouse.wheel(0,500)
-            await asyncio.sleep(2)
+            await asyncio.sleep(0.5)
            try:
                # click login button
                login_button_ele = self.context_page.locator(
-                    "xpath=//a[text()='登录']"
+                    "xpath=//a[text()='登录']",
                )
                await login_button_ele.click()
                await asyncio.sleep(0.5)