From b1441ab4ae0f1900b2143846b54c556bfdb79901 Mon Sep 17 00:00:00 2001 From: Relakkes Date: Sun, 24 Dec 2023 18:19:26 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=BE=AE=E5=8D=9A=E5=B8=96=E5=AD=90?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E4=BF=9D=E5=AD=98=E5=88=B0=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E5=BA=93=E4=B8=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 13 ++++++++----- models/__init__.py | 1 + models/weibo.py | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 109fc08..963d13c 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ | 小红书 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | | 抖音 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | | 快手 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | -| B 站 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | -| 微博 | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | ✕ | +| B 站 | ✅ | ✅ | ✕ | ✅ | ✅ | ✅ | ✅ | ✅ | ✕ | +| 微博 | ✅ | ✕ | ✕ | ✅ | ✕ | ✅ | ✅ | ✅ | ✕ | ## 使用方法 @@ -66,14 +66,17 @@ # 从配置文件中读取指定的帖子ID列表获取指定帖子的信息与评论信息 python3 main.py --platform xhs --lt qrcode --type detail - + + # 打开对应APP扫二维码登录 + # 其他平台爬虫使用示例, 执行下面的命令查看 python3 main.py --help ``` -打开对应APP扫二维码登录 -等待爬虫程序执行完毕,数据会保存到 `data/xhs` 目录下 +### 数据保存 +- 支持保存到关系型数据库(Mysql、PgSQL等) +- 支持保存到csv中(data/目录下) ## 如何使用 IP 代理 ➡️➡️➡️ [IP代理使用方法](docs/代理使用.md) diff --git a/models/__init__.py b/models/__init__.py index cd278e6..09a27c2 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -2,3 +2,4 @@ from .bilibili import * from .douyin import * from .kuaishou import * from .xiaohongshu import * +from .weibo import * \ No newline at end of file diff --git a/models/weibo.py b/models/weibo.py index 2f433a9..3aa3f62 100644 --- a/models/weibo.py +++ b/models/weibo.py @@ -35,14 +35,14 @@ class WeiboNote(WeiboBaseModel): note_id = fields.CharField(max_length=64, index=True, description="帖子ID") content = fields.TextField(null=True, description="帖子正文内容") create_time = fields.BigIntField(description="帖子发布时间戳", index=True) - create_date_time = fields.BigIntField(description="帖子发布日期时间", index=True) + create_date_time = fields.CharField(description="帖子发布日期时间",max_length=32, index=True) liked_count = fields.CharField(null=True, max_length=16, description="帖子点赞数") comments_count = fields.CharField(null=True, max_length=16, description="帖子评论数量") shared_count = fields.CharField(null=True, max_length=16, description="帖子转发数量") note_url = fields.CharField(null=True, max_length=512, description="帖子详情URL") class Meta: - table = "weibo_video" + table = "weibo_note" table_description = "微博帖子" def __str__(self): @@ -54,7 +54,7 @@ class WeiboComment(WeiboBaseModel): note_id = fields.CharField(max_length=64, index=True, description="帖子ID") content = fields.TextField(null=True, description="评论内容") create_time = fields.BigIntField(description="评论时间戳") - create_date_time = fields.BigIntField(description="评论日期时间", index=True) + create_date_time = fields.CharField(description="评论日期时间", max_length=32, index=True) comment_like_count = fields.CharField(max_length=16, description="评论点赞数量") sub_comment_count = fields.CharField(max_length=16, description="评论回复数") @@ -75,16 +75,16 @@ async def update_weibo_note(note_item: Dict): "note_id": note_id, "content": mblog.get("text"), "create_time": utils.rfc2822_to_timestamp(mblog.get("created_at")), - "create_date_time": utils.rfc2822_to_china_datetime(mblog.get("created_at")), - "liked_count": mblog.get("attitudes_count", 0), - "comments_count": mblog.get("comments_count", 0), - "shared_count": mblog.get("reposts_count", 0), + "create_date_time": str(utils.rfc2822_to_china_datetime(mblog.get("created_at"))), + "liked_count": str(mblog.get("attitudes_count", 0)), + "comments_count": str(mblog.get("comments_count", 0)), + "shared_count": str(mblog.get("reposts_count", 0)), "last_modify_ts": utils.get_current_timestamp(), "note_url": f"https://m.weibo.cn/detail/{note_id}", "ip_location": mblog.get("region_name", "").replace("发布于 ", ""), # 用户信息 - "user_id": user_info.get("id"), + "user_id": str(user_info.get("id")), "nickname": user_info.get("screen_name", ""), "gender": user_info.get("gender", ""), "profile_url": user_info.get("profile_url", ""), @@ -130,7 +130,7 @@ async def update_weibo_video_comment(note_id: str, comment_item: Dict): local_db_item = { "comment_id": comment_id, "create_time": utils.rfc2822_to_timestamp(comment_item.get("created_at")), - "create_date_time": utils.rfc2822_to_china_datetime(comment_item.get("created_at")), + "create_date_time": str(utils.rfc2822_to_china_datetime(comment_item.get("created_at"))), "note_id": note_id, "content": content.get("message"), "sub_comment_count": str(comment_item.get("total_number", 0)), @@ -139,7 +139,7 @@ async def update_weibo_video_comment(note_id: str, comment_item: Dict): "ip_location": comment_item.get("source", "").replace("来自", ""), # 用户信息 - "user_id": user_info.get("id"), + "user_id": str(user_info.get("id")), "nickname": user_info.get("screen_name", ""), "gender": user_info.get("gender", ""), "profile_url": user_info.get("profile_url", ""),