feat: B站二维码、Cookie登录实现

This commit is contained in:
Relakkes 2023-12-04 00:02:00 +08:00
parent a90b411e68
commit 94b5030ef0
3 changed files with 72 additions and 11 deletions

View File

@ -4,8 +4,8 @@
# 仓库描述 # 仓库描述
**小红书爬虫****抖音爬虫** **快手爬虫**...。 **小红书爬虫****抖音爬虫** **快手爬虫** **B站爬虫**...。
目前能抓取小红书、抖音、快手的视频、图片、评论、点赞、转发等信息。 目前能抓取小红书、抖音、快手、B站的视频、图片、评论、点赞、转发等信息。
原理:利用[playwright](https://playwright.dev/)搭桥保留登录成功后的上下文浏览器环境通过执行JS表达式获取一些加密参数 原理:利用[playwright](https://playwright.dev/)搭桥保留登录成功后的上下文浏览器环境通过执行JS表达式获取一些加密参数
通过使用此方式免去了复现核心加密JS代码逆向难度大大降低。 通过使用此方式免去了复现核心加密JS代码逆向难度大大降低。
@ -24,7 +24,7 @@
| 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | | 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | | 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ |
| B 站 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | | B 站 | ✅ | ✅ | ✕ | ✅ | ✕ | ✅ | ✅ | ✕ | ✕ |
| 微博 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | | 微博 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ |

View File

@ -34,13 +34,74 @@ class BilibiliLogin(AbstractLogin):
self.cookie_str = cookie_str self.cookie_str = cookie_str
async def begin(self): async def begin(self):
pass """Start login xiaohongshu"""
utils.logger.info("Begin login Bilibili ...")
if self.login_type == "qrcode":
await self.login_by_qrcode()
elif self.login_type == "phone":
await self.login_by_mobile()
elif self.login_type == "cookie":
await self.login_by_cookies()
else:
raise ValueError("Invalid Login Type Currently only supported qrcode or phone or cookie ...")
@retry(stop=stop_after_attempt(20), wait=wait_fixed(1), retry=retry_if_result(lambda value: value is False))
async def check_login_state(self) -> bool:
"""
Check if the current login status is successful and return True otherwise return False
retry decorator will retry 20 times if the return value is False, and the retry interval is 1 second
if max retry times reached, raise RetryError
"""
current_cookie = await self.browser_context.cookies()
_, cookie_dict = utils.convert_cookies(current_cookie)
if cookie_dict.get("SESSDATA", "") or cookie_dict.get("DedeUserID"):
return True
return False
async def login_by_qrcode(self): async def login_by_qrcode(self):
pass """login bilibili website and keep webdriver login state"""
utils.logger.info("Begin login bilibili by qrcode ...")
# click login button
login_button_ele = self.context_page.locator(
"xpath=//div[@class='right-entry__outside go-login-btn']//div"
)
await login_button_ele.click()
# find login qrcode
qrcode_img_selector = "//div[@class='login-scan-box']//img"
base64_qrcode_img = await utils.find_login_qrcode(
self.context_page,
selector=qrcode_img_selector
)
if not base64_qrcode_img:
utils.logger.info("login failed , have not found qrcode please check ....")
sys.exit()
# show login qrcode
partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img)
asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode)
utils.logger.info(f"Waiting for scan code login, remaining time is 20s")
try:
await self.check_login_state()
except RetryError:
utils.logger.info("Login bilibili failed by qrcode login method ...")
sys.exit()
wait_redirect_seconds = 5
utils.logger.info(f"Login successful then wait for {wait_redirect_seconds} seconds redirect ...")
await asyncio.sleep(wait_redirect_seconds)
async def login_by_mobile(self): async def login_by_mobile(self):
pass pass
async def login_by_cookies(self): async def login_by_cookies(self):
pass utils.logger.info("Begin login bilibili by cookie ...")
for key, value in utils.convert_str_cookie_to_dict(self.cookie_str).items():
await self.browser_context.add_cookies([{
'name': key,
'value': value,
'domain': ".bilibili.com",
'path': "/"
}])

View File

@ -98,6 +98,6 @@ class KuaishouLogin(AbstractLogin):
await self.browser_context.add_cookies([{ await self.browser_context.add_cookies([{
'name': key, 'name': key,
'value': value, 'value': value,
'domain': ".douyin.com", 'domain': ".kuaishou.com",
'path': "/" 'path': "/"
}]) }])