From c0a783fa350a499e345cf5723f4d71e1d7b7d321 Mon Sep 17 00:00:00 2001 From: Relakkes Date: Tue, 13 Jun 2023 14:00:04 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E7=99=BB=E5=BD=95?= =?UTF-8?q?=E4=BA=8C=E7=BB=B4=E7=A0=81=E6=89=AB=E6=8F=8F=E4=B8=8D=E4=B8=8A?= =?UTF-8?q?=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- utils.py | 16 ++++++++++------ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f273969..1a050cc 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ # 仓库描述 这个代码仓库是一个利用[playwright](https://playwright.dev/)的爬虫程序 -可以准确地爬取小红书、抖音的笔记、评论等信息,大概原理是:利用playwright登录成功后,保留登录成功后的上下文浏览器环境,通过上下文浏览器环境执行JS表达式获取一些加密参数,再使用python的httpx发起异步请求,相当于使用Playwright搭桥,免去了复现核心加密JS代码,逆向难度大大降低。 +可以准确地爬取小红书、抖音的笔记、评论等信息,原理是:利用playwright登录成功后,保留登录成功后的上下文浏览器环境,通过上下文浏览器环境执行JS表达式获取一些加密参数,再使用python的httpx发起异步请求,相当于使用Playwright搭桥,免去了复现核心加密JS代码,逆向难度大大降低。 ## 主要功能 diff --git a/utils.py b/utils.py index afd6521..9ca4121 100644 --- a/utils.py +++ b/utils.py @@ -1,12 +1,9 @@ -import json -import time -import base64 import random -import hashlib +import base64 from io import BytesIO from typing import Optional, Dict, List, Tuple -from PIL import Image +from PIL import Image, ImageDraw from playwright.async_api import Cookie from playwright.async_api import Page @@ -30,7 +27,14 @@ def show_qrcode(qr_code: str): qr_code = qr_code.split(",")[1] qr_code = base64.b64decode(qr_code) image = Image.open(BytesIO(qr_code)) - image.show() + + # Add a square border around the QR code and display it within the border to improve scanning accuracy. + width, height = image.size + new_image = Image.new('RGB', (width + 20, height + 20), color=(255, 255, 255)) + new_image.paste(image, (10, 10)) + draw = ImageDraw.Draw(new_image) + draw.rectangle((0, 0, width + 19, height + 19), outline=(0, 0, 0), width=1) + new_image.show() def get_user_agent() -> str: