fix: 修复微博PC端登录后COOKIE在手机端无法使用的bug
This commit is contained in:
parent
fe073801f8
commit
4de14ad6a8
@ -7,7 +7,7 @@ import asyncio
|
|||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
from urllib.parse import urlencode
|
from urllib.parse import urlencode
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
@ -73,6 +73,8 @@ class WeiboClient:
|
|||||||
resp_data: Dict = await self.request(method="GET", url=f"{self._host}{uri}", headers=self.headers)
|
resp_data: Dict = await self.request(method="GET", url=f"{self._host}{uri}", headers=self.headers)
|
||||||
if resp_data.get("login"):
|
if resp_data.get("login"):
|
||||||
ping_flag = True
|
ping_flag = True
|
||||||
|
else:
|
||||||
|
utils.logger.error(f"[WeiboClient.pong] cookie may be invalid and again login...")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
utils.logger.error(f"[WeiboClient.pong] Pong weibo failed: {e}, and try to login again...")
|
utils.logger.error(f"[WeiboClient.pong] Pong weibo failed: {e}, and try to login again...")
|
||||||
ping_flag = False
|
ping_flag = False
|
||||||
|
@ -37,7 +37,9 @@ class WeiboCrawler(AbstractCrawler):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.index_url = "https://www.weibo.com"
|
self.index_url = "https://www.weibo.com"
|
||||||
|
self.mobile_index_url = "https://m.weibo.cn"
|
||||||
self.user_agent = utils.get_user_agent()
|
self.user_agent = utils.get_user_agent()
|
||||||
|
self.mobile_user_agent = utils.get_mobile_user_agent()
|
||||||
|
|
||||||
def init_config(self, platform: str, login_type: str, crawler_type: str):
|
def init_config(self, platform: str, login_type: str, crawler_type: str):
|
||||||
self.platform = platform
|
self.platform = platform
|
||||||
@ -57,13 +59,13 @@ class WeiboCrawler(AbstractCrawler):
|
|||||||
self.browser_context = await self.launch_browser(
|
self.browser_context = await self.launch_browser(
|
||||||
chromium,
|
chromium,
|
||||||
None,
|
None,
|
||||||
self.user_agent,
|
self.mobile_user_agent,
|
||||||
headless=config.HEADLESS
|
headless=config.HEADLESS
|
||||||
)
|
)
|
||||||
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
# stealth.min.js is a js script to prevent the website from detecting the crawler.
|
||||||
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
await self.browser_context.add_init_script(path="libs/stealth.min.js")
|
||||||
self.context_page = await self.browser_context.new_page()
|
self.context_page = await self.browser_context.new_page()
|
||||||
await self.context_page.goto(self.index_url)
|
await self.context_page.goto(self.mobile_index_url)
|
||||||
|
|
||||||
# Create a client to interact with the xiaohongshu website.
|
# Create a client to interact with the xiaohongshu website.
|
||||||
self.wb_client = await self.create_weibo_client(httpx_proxy_format)
|
self.wb_client = await self.create_weibo_client(httpx_proxy_format)
|
||||||
@ -75,7 +77,14 @@ class WeiboCrawler(AbstractCrawler):
|
|||||||
context_page=self.context_page,
|
context_page=self.context_page,
|
||||||
cookie_str=config.COOKIES
|
cookie_str=config.COOKIES
|
||||||
)
|
)
|
||||||
|
await self.context_page.goto(self.index_url)
|
||||||
|
await asyncio.sleep(1)
|
||||||
await login_obj.begin()
|
await login_obj.begin()
|
||||||
|
|
||||||
|
# 登录成功后重定向到手机端的网站,再更新手机端登录成功的cookie
|
||||||
|
utils.logger.info("[WeiboCrawler.start] redirect weibo mobile homepage and update cookies on mobile platform")
|
||||||
|
await self.context_page.goto(self.mobile_index_url)
|
||||||
|
await asyncio.sleep(2)
|
||||||
await self.wb_client.update_cookies(browser_context=self.browser_context)
|
await self.wb_client.update_cookies(browser_context=self.browser_context)
|
||||||
|
|
||||||
crawler_type_var.set(self.crawler_type)
|
crawler_type_var.set(self.crawler_type)
|
||||||
@ -183,7 +192,7 @@ class WeiboCrawler(AbstractCrawler):
|
|||||||
# Download comments
|
# Download comments
|
||||||
all_comments = await self.wb_client.get_note_all_comments(
|
all_comments = await self.wb_client.get_note_all_comments(
|
||||||
note_id=note_id,
|
note_id=note_id,
|
||||||
crawl_interval=random.random(),
|
crawl_interval=random.randint(1,10), # 微博对API的限流比较严重,所以延时提高一些
|
||||||
)
|
)
|
||||||
|
|
||||||
# Filter comments by keyword
|
# Filter comments by keyword
|
||||||
|
@ -43,6 +43,7 @@ class WeiboLogin(AbstractLogin):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
"[WeiboLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...")
|
"[WeiboLogin.begin] Invalid Login Type Currently only supported qrcode or phone or cookie ...")
|
||||||
|
|
||||||
|
|
||||||
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
|
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
|
||||||
async def check_login_state(self, no_logged_in_session: str) -> bool:
|
async def check_login_state(self, no_logged_in_session: str) -> bool:
|
||||||
"""
|
"""
|
||||||
@ -61,8 +62,8 @@ class WeiboLogin(AbstractLogin):
|
|||||||
"""If the login dialog box does not pop up automatically, we will manually click the login button"""
|
"""If the login dialog box does not pop up automatically, we will manually click the login button"""
|
||||||
dialog_selector = "xpath=//div[@class='woo-modal-main']"
|
dialog_selector = "xpath=//div[@class='woo-modal-main']"
|
||||||
try:
|
try:
|
||||||
# check dialog box is auto popup and wait for 10 seconds
|
# check dialog box is auto popup and wait for 4 seconds
|
||||||
await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 10)
|
await self.context_page.wait_for_selector(dialog_selector, timeout=1000 * 4)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
utils.logger.error(
|
utils.logger.error(
|
||||||
f"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, error: {e}")
|
f"[WeiboLogin.popup_login_dialog] login dialog box does not pop up automatically, error: {e}")
|
||||||
@ -71,12 +72,12 @@ class WeiboLogin(AbstractLogin):
|
|||||||
|
|
||||||
# 向下滚动1000像素
|
# 向下滚动1000像素
|
||||||
await self.context_page.mouse.wheel(0,500)
|
await self.context_page.mouse.wheel(0,500)
|
||||||
await asyncio.sleep(2)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# click login button
|
# click login button
|
||||||
login_button_ele = self.context_page.locator(
|
login_button_ele = self.context_page.locator(
|
||||||
"xpath=//a[text()='登录']"
|
"xpath=//a[text()='登录']",
|
||||||
)
|
)
|
||||||
await login_button_ele.click()
|
await login_button_ele.click()
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(0.5)
|
||||||
|
Loading…
Reference in New Issue
Block a user