fix: 修复微博PC端登录后COOKIE在手机端无法使用的bug

This commit is contained in:
Relakkes 2024-01-06 19:18:07 +08:00
parent fe073801f8
commit 4de14ad6a8
3 changed files with 20 additions and 8 deletions

View File

@ -7,7 +7,7 @@ import asyncio
import copy import copy
import json import json
import re import re
from typing import Any, Callable, Dict, List, Optional, Tuple, Union from typing import Any, Callable, Dict, List, Optional
from urllib.parse import urlencode from urllib.parse import urlencode
import httpx import httpx
@ -73,6 +73,8 @@ class WeiboClient:
resp_data: Dict = await self.request(method="GET", url=f"{self._host}{uri}", headers=self.headers) resp_data: Dict = await self.request(method="GET", url=f"{self._host}{uri}", headers=self.headers)
if resp_data.get("login"): if resp_data.get("login"):
ping_flag = True ping_flag = True
else:
utils.logger.error(f"[WeiboClient.pong] cookie may be invalid and again login...")
except Exception as e: except Exception as e:
utils.logger.error(f"[WeiboClient.pong] Pong weibo failed: {e}, and try to login again...") utils.logger.error(f"[WeiboClient.pong] Pong weibo failed: {e}, and try to login again...")
ping_flag = False ping_flag = False

View File

@ -37,7 +37,9 @@ class WeiboCrawler(AbstractCrawler):
def __init__(self): def __init__(self):
self.index_url = "https://www.weibo.com" self.index_url = "https://www.weibo.com"
self.mobile_index_url = "https://m.weibo.cn"
self.user_agent = utils.get_user_agent() self.user_agent = utils.get_user_agent()
self.mobile_user_agent = utils.get_mobile_user_agent()
def init_config(self, platform: str, login_type: str, crawler_type: str): def init_config(self, platform: str, login_type: str, crawler_type: str):
self.platform = platform self.platform = platform
@ -57,13 +59,13 @@ class WeiboCrawler(AbstractCrawler):
self.browser_context = await self.launch_browser( self.browser_context = await self.launch_browser(
chromium, chromium,
None, None,
self.user_agent, self.mobile_user_agent,
headless=config.HEADLESS headless=config.HEADLESS
) )
# stealth.min.js is a js script to prevent the website from detecting the crawler. # stealth.min.js is a js script to prevent the website from detecting the crawler.
await self.browser_context.add_init_script(path="libs/stealth.min.js") await self.browser_context.add_init_script(path="libs/stealth.min.js")
self.context_page = await self.browser_context.new_page() self.context_page = await self.browser_context.new_page()
await self.context_page.goto(self.index_url) await self.context_page.goto(self.mobile_index_url)
# Create a client to interact with the xiaohongshu website. # Create a client to interact with the xiaohongshu website.
self.wb_client = await self.create_weibo_client(httpx_proxy_format) self.wb_client = await self.create_weibo_client(httpx_proxy_format)
@ -75,7 +77,14 @@ class WeiboCrawler(AbstractCrawler):
context_page=self.context_page, context_page=self.context_page,
cookie_str=config.COOKIES cookie_str=config.COOKIES
) )
await self.context_page.goto(self.index_url)
await asyncio.sleep(1)
await login_obj.begin() await login_obj.begin()
# 登录成功后重定向到手机端的网站再更新手机端登录成功的cookie
utils.logger.info("[WeiboCrawler.start] redirect weibo mobile homepage and update cookies on mobile platform")
await self.context_page.goto(self.mobile_index_url)
await asyncio.sleep(2)
await self.wb_client.update_cookies(browser_context=self.browser_context) await self.wb_client.update_cookies(browser_context=self.browser_context)
crawler_type_var.set(self.crawler_type) crawler_type_var.set(self.crawler_type)
@ -183,7 +192,7 @@ class WeiboCrawler(AbstractCrawler):
# Download comments # Download comments
all_comments = await self.wb_client.get_note_all_comments( all_comments = await self.wb_client.get_note_all_comments(
note_id=note_id, note_id=note_id,
crawl_interval=random.random(), crawl_interval=random.randint(1,10), # 微博对API的限流比较严重所以延时提高一些
) )
# Filter comments by keyword # Filter comments by keyword

View File

@ -43,6 +43,7 @@ class WeiboLogin(AbstractLogin):
raise ValueError( raise ValueError(
"[WeiboLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...") "[WeiboLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...")
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False)) @retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
async def check_login_state(self, no_logged_in_session: str) -> bool: async def check_login_state(self, no_logged_in_session: str) -> bool:
""" """
@ -61,8 +62,8 @@ class WeiboLogin(AbstractLogin):
"""If the login dialog box does not pop up automatically, we will manually click the login button""" """If the login dialog box does not pop up automatically, we will manually click the login button"""
dialog_selector = "xpath=//div[@class='woo-modal-main']" dialog_selector = "xpath=//div[@class='woo-modal-main']"
try: try:
# check dialog box is auto popup and wait for 10 seconds # check dialog box is auto popup and wait for 4 seconds
await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 10) await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 4)
except Exception as e: except Exception as e:
utils.logger.error( utils.logger.error(
f"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, error: {e}") f"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, error: {e}")
@ -71,12 +72,12 @@ class WeiboLogin(AbstractLogin):
# 向下滚动1000像素 # 向下滚动1000像素
await self.context_page.mouse.wheel(0,500) await self.context_page.mouse.wheel(0,500)
await asyncio.sleep(2) await asyncio.sleep(0.5)
try: try:
# click login button # click login button
login_button_ele = self.context_page.locator( login_button_ele = self.context_page.locator(
"xpath=//a[text()='登录']" "xpath=//a[text()='登录']",
) )
await login_button_ele.click() await login_button_ele.click()
await asyncio.sleep(0.5) await asyncio.sleep(0.5)