Merge branch 'main' into feat/kuaishou_support
This commit is contained in:
commit
2f8541a351
@ -65,6 +65,10 @@
|
||||
|
||||
# 从配置文件中读取指定的帖子ID列表获取指定帖子的信息与评论信息
|
||||
python main.py --platform xhs --lt qrcode --type detail
|
||||
|
||||
# 其他平台爬虫使用示例, 执行下面的命令查看
|
||||
python3 main.py --help
|
||||
|
||||
```
|
||||
|
||||
5. 打开对应APP扫二维码登录
|
||||
@ -84,6 +88,9 @@
|
||||
|
||||
# Q: 刚开始能爬取数据,过一段时间就是失效了?
|
||||
# A:出现这种情况多半是由于你的账号触发了平台风控机制了,❗️❗️请勿大规模对平台进行爬虫,影响平台。
|
||||
|
||||
# Q: 如何更换登录账号?
|
||||
# A:删除项目根目录下的 brower_data/ 文件夹即可
|
||||
```
|
||||
|
||||
## 项目代码结构
|
||||
|
@ -1,6 +1,7 @@
|
||||
import asyncio
|
||||
import json
|
||||
from typing import Dict, Optional
|
||||
from typing import Dict
|
||||
from urllib.parse import urlencode
|
||||
|
||||
import httpx
|
||||
from playwright.async_api import BrowserContext, Page
|
||||
@ -70,7 +71,7 @@ class XHSClient:
|
||||
final_uri = uri
|
||||
if isinstance(params, dict):
|
||||
final_uri = (f"{uri}?"
|
||||
f"{'&'.join([f'{k}={v}' for k, v in params.items()])}")
|
||||
f"{urlencode(params)}")
|
||||
headers = await self._pre_headers(final_uri)
|
||||
return await self.request(method="GET", url=f"{self._host}{final_uri}", headers=headers)
|
||||
|
||||
|
@ -5,8 +5,8 @@ tenacity==8.2.2
|
||||
tornado==6.3.2
|
||||
PyExecJS==1.5.1
|
||||
opencv-python==4.7.0.72
|
||||
tortoise-orm[asyncmy]==0.19.3
|
||||
tortoise-orm
|
||||
aiomysql==0.2.0
|
||||
aerich==0.7.2
|
||||
numpy~=1.24.4
|
||||
redis~=4.6.0
|
||||
Pydantic==1.7
|
||||
redis~=4.6.0
|
Loading…
Reference in New Issue
Block a user