From 6908da6f707d5869dc3864dc71f55156c49bdcd6 Mon Sep 17 00:00:00 2001 From: Relakkes Date: Thu, 23 Nov 2023 23:27:35 +0800 Subject: [PATCH 1/3] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E5=B0=8F=E7=BA=A2?= =?UTF-8?q?=E4=B9=A6client=E4=B8=ADget=E8=AF=B7=E6=B1=82=E4=B8=AD=E6=96=87?= =?UTF-8?q?bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- media_platform/xhs/client.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/media_platform/xhs/client.py b/media_platform/xhs/client.py index 69cc2d6..7a60161 100644 --- a/media_platform/xhs/client.py +++ b/media_platform/xhs/client.py @@ -1,6 +1,7 @@ import asyncio import json -from typing import Dict, Optional +from typing import Dict +from urllib.parse import urlencode import httpx from playwright.async_api import BrowserContext, Page @@ -70,7 +71,7 @@ class XHSClient: final_uri = uri if isinstance(params, dict): final_uri = (f"{uri}?" - f"{'&'.join([f'{k}={v}' for k, v in params.items()])}") + f"{urlencode(params)}") headers = await self._pre_headers(final_uri) return await self.request(method="GET", url=f"{self._host}{final_uri}", headers=headers) From 253888980c35e63cfa1dc554b010cfda40f898a7 Mon Sep 17 00:00:00 2001 From: Relakkes Date: Sat, 25 Nov 2023 12:32:43 +0800 Subject: [PATCH 2/3] =?UTF-8?q?doc:=20=E5=A2=9E=E5=8A=A0=E5=A6=82=E4=BD=95?= =?UTF-8?q?=E6=9B=B4=E6=8D=A2=E8=B4=A6=E5=8F=B7=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 4a112a4..4173c4f 100644 --- a/README.md +++ b/README.md @@ -84,6 +84,9 @@ # Q: 刚开始能爬取数据,过一段时间就是失效了? # A:出现这种情况多半是由于你的账号触发了平台风控机制了,❗️❗️请勿大规模对平台进行爬虫,影响平台。 + +# Q: 如何更换登录账号? +# A:删除项目根目录下的 brower_data/ 文件夹即可 ``` ## 项目代码结构 From 523c5f380ea4c1c017875178dcdfc14a8543a6ae Mon Sep 17 00:00:00 2001 From: Relakkes Date: Sun, 26 Nov 2023 15:38:38 +0800 Subject: [PATCH 3/3] doc: add command usage --- README.md | 4 ++++ requirements.txt | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4173c4f..20a4ede 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,10 @@ # 从配置文件中读取指定的帖子ID列表获取指定帖子的信息与评论信息 python main.py --platform xhs --lt qrcode --type detail + + # 其他平台爬虫使用示例, 执行下面的命令查看 + python3 main.py --help + ``` 5. 打开对应APP扫二维码登录 diff --git a/requirements.txt b/requirements.txt index a6df28d..0422fb5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,8 +5,8 @@ tenacity==8.2.2 tornado==6.3.2 PyExecJS==1.5.1 opencv-python==4.7.0.72 -tortoise-orm[asyncmy]==0.19.3 +tortoise-orm +aiomysql==0.2.0 aerich==0.7.2 numpy~=1.24.4 -redis~=4.6.0 -Pydantic==1.7 \ No newline at end of file +redis~=4.6.0 \ No newline at end of file