feat: 数据保存支持JSON格式

This commit is contained in:
Relakkes 2024-01-14 22:40:01 +08:00
parent 894dabcf63
commit 4dfa0d3fbf
8 changed files with 278 additions and 15 deletions

View File

@ -76,6 +76,7 @@
### 数据保存 ### 数据保存
- 支持保存到关系型数据库Mysql、PgSQL等 - 支持保存到关系型数据库Mysql、PgSQL等
- 支持保存到csv中data/目录下) - 支持保存到csv中data/目录下)
- 支持保存到json中data/目录下)
## 如何使用 IP 代理 ## 如何使用 IP 代理
➡️➡️➡️ [IP代理使用方法](docs/代理使用.md) ➡️➡️➡️ [IP代理使用方法](docs/代理使用.md)

View File

@ -21,7 +21,7 @@ HEADLESS = True
SAVE_LOGIN_STATE = True SAVE_LOGIN_STATE = True
# 数据保存类型选项配置,支持三种类型csv、db、json # 数据保存类型选项配置,支持三种类型csv、db、json
SAVE_DATA_OPTION = "csv" # csv or db or json SAVE_DATA_OPTION = "json" # csv or db or json
# 用户浏览器缓存的浏览器文件配置 # 用户浏览器缓存的浏览器文件配置
USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name

View File

@ -2,8 +2,9 @@
# @Author : relakkes@gmail.com # @Author : relakkes@gmail.com
# @Time : 2024/1/14 19:34 # @Time : 2024/1/14 19:34
# @Desc : B站存储实现类 # @Desc : B站存储实现类
import csv import csv
import json
import os
import pathlib import pathlib
from typing import Dict from typing import Dict
@ -122,8 +123,59 @@ class BiliDbStoreImplement(AbstractStore):
class BiliJsonStoreImplement(AbstractStore): class BiliJsonStoreImplement(AbstractStore):
json_store_path: str = "data/bilibili"
def make_save_file_name(self, store_type: str) -> str:
"""
make save file name by store type
Args:
store_type: Save type contains content and commentscontents | comments
Returns:
"""
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
async def save_data_to_json(self, save_item: Dict, store_type: str):
"""
Below is a simple way to save it in json format.
Args:
save_item: save content dict info
store_type: Save type contains content and commentscontents | comments
Returns:
"""
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
save_file_name = self.make_save_file_name(store_type=store_type)
save_data = []
if os.path.exists(save_file_name):
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
save_data = json.loads(await file.read())
save_data.append(save_item)
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
await file.write(json.dumps(save_data, ensure_ascii=False))
async def store_content(self, content_item: Dict): async def store_content(self, content_item: Dict):
pass """
content JSON storage implementation
Args:
content_item:
Returns:
"""
await self.save_data_to_json(content_item, "contents")
async def store_comment(self, comment_item: Dict): async def store_comment(self, comment_item: Dict):
pass """
comment JSON storage implementatio
Args:
comment_item:
Returns:
"""
await self.save_data_to_json(comment_item, "comments")

View File

@ -2,8 +2,9 @@
# @Author : relakkes@gmail.com # @Author : relakkes@gmail.com
# @Time : 2024/1/14 18:46 # @Time : 2024/1/14 18:46
# @Desc : 抖音存储实现类 # @Desc : 抖音存储实现类
import csv import csv
import json
import os
import pathlib import pathlib
from typing import Dict from typing import Dict
@ -122,9 +123,59 @@ class DouyinDbStoreImplement(AbstractStore):
class DouyinJsonStoreImplement(AbstractStore): class DouyinJsonStoreImplement(AbstractStore):
json_store_path: str = "data/douyin"
def make_save_file_name(self, store_type: str) -> str:
"""
make save file name by store type
Args:
store_type: Save type contains content and commentscontents | comments
Returns:
"""
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
async def save_data_to_json(self, save_item: Dict, store_type: str):
"""
Below is a simple way to save it in json format.
Args:
save_item: save content dict info
store_type: Save type contains content and commentscontents | comments
Returns:
"""
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
save_file_name = self.make_save_file_name(store_type=store_type)
save_data = []
if os.path.exists(save_file_name):
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
save_data = json.loads(await file.read())
save_data.append(save_item)
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
await file.write(json.dumps(save_data, ensure_ascii=False))
async def store_content(self, content_item: Dict): async def store_content(self, content_item: Dict):
pass """
content JSON storage implementation
Args:
content_item:
Returns:
"""
await self.save_data_to_json(content_item, "contents")
async def store_comment(self, comment_item: Dict): async def store_comment(self, comment_item: Dict):
pass """
comment JSON storage implementatio
Args:
comment_item:
Returns:
"""
await self.save_data_to_json(comment_item, "comments")

View File

@ -3,6 +3,8 @@
# @Time : 2024/1/14 20:03 # @Time : 2024/1/14 20:03
# @Desc : 快手存储实现类 # @Desc : 快手存储实现类
import csv import csv
import json
import os
import pathlib import pathlib
from typing import Dict from typing import Dict
@ -121,8 +123,59 @@ class KuaishouDbStoreImplement(AbstractStore):
class KuaishouJsonStoreImplement(AbstractStore): class KuaishouJsonStoreImplement(AbstractStore):
json_store_path: str = "data/kuaishou"
def make_save_file_name(self, store_type: str) -> str:
"""
make save file name by store type
Args:
store_type: Save type contains content and commentscontents | comments
Returns:
"""
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
async def save_data_to_json(self, save_item: Dict, store_type: str):
"""
Below is a simple way to save it in json format.
Args:
save_item: save content dict info
store_type: Save type contains content and commentscontents | comments
Returns:
"""
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
save_file_name = self.make_save_file_name(store_type=store_type)
save_data = []
if os.path.exists(save_file_name):
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
save_data = json.loads(await file.read())
save_data.append(save_item)
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
await file.write(json.dumps(save_data, ensure_ascii=False))
async def store_content(self, content_item: Dict): async def store_content(self, content_item: Dict):
pass """
content JSON storage implementation
Args:
content_item:
Returns:
"""
await self.save_data_to_json(content_item, "contents")
async def store_comment(self, comment_item: Dict): async def store_comment(self, comment_item: Dict):
pass """
comment JSON storage implementatio
Args:
comment_item:
Returns:
"""
await self.save_data_to_json(comment_item, "comments")

View File

@ -15,7 +15,7 @@ class WeibostoreFactory:
STORES = { STORES = {
"csv": WeiboCsvStoreImplement, "csv": WeiboCsvStoreImplement,
"db": WeiboDbStoreImplement, "db": WeiboDbStoreImplement,
"json": BiliJsonStoreImplement "json": WeiboJsonStoreImplement
} }
@staticmethod @staticmethod

View File

@ -3,6 +3,8 @@
# @Time : 2024/1/14 21:35 # @Time : 2024/1/14 21:35
# @Desc : 微博存储实现类 # @Desc : 微博存储实现类
import csv import csv
import json
import os
import pathlib import pathlib
from typing import Dict from typing import Dict
@ -120,9 +122,60 @@ class WeiboDbStoreImplement(AbstractStore):
await WeiboComment.filter(comment_id=comment_id).update(**comment_data.model_dump()) await WeiboComment.filter(comment_id=comment_id).update(**comment_data.model_dump())
class BiliJsonStoreImplement(AbstractStore): class WeiboJsonStoreImplement(AbstractStore):
json_store_path: str = "data/weibo"
def make_save_file_name(self, store_type: str) -> str:
"""
make save file name by store type
Args:
store_type: Save type contains content and commentscontents | comments
Returns:
"""
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
async def save_data_to_json(self, save_item: Dict, store_type: str):
"""
Below is a simple way to save it in json format.
Args:
save_item: save content dict info
store_type: Save type contains content and commentscontents | comments
Returns:
"""
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
save_file_name = self.make_save_file_name(store_type=store_type)
save_data = []
if os.path.exists(save_file_name):
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
save_data = json.loads(await file.read())
save_data.append(save_item)
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
await file.write(json.dumps(save_data, ensure_ascii=False))
async def store_content(self, content_item: Dict): async def store_content(self, content_item: Dict):
pass """
content JSON storage implementation
Args:
content_item:
Returns:
"""
await self.save_data_to_json(content_item, "contents")
async def store_comment(self, comment_item: Dict): async def store_comment(self, comment_item: Dict):
pass """
comment JSON storage implementatio
Args:
comment_item:
Returns:
"""
await self.save_data_to_json(comment_item, "comments")

View File

@ -3,6 +3,8 @@
# @Time : 2024/1/14 16:58 # @Time : 2024/1/14 16:58
# @Desc : 小红书存储实现类 # @Desc : 小红书存储实现类
import csv import csv
import json
import os
import pathlib import pathlib
from typing import Dict from typing import Dict
@ -120,8 +122,59 @@ class XhsDbStoreImplement(AbstractStore):
class XhsJsonStoreImplement(AbstractStore): class XhsJsonStoreImplement(AbstractStore):
json_store_path: str = "data/xhs"
def make_save_file_name(self, store_type: str) -> str:
"""
make save file name by store type
Args:
store_type: Save type contains content and commentscontents | comments
Returns:
"""
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
async def save_data_to_json(self, save_item: Dict, store_type: str):
"""
Below is a simple way to save it in json format.
Args:
save_item: save content dict info
store_type: Save type contains content and commentscontents | comments
Returns:
"""
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
save_file_name = self.make_save_file_name(store_type=store_type)
save_data = []
if os.path.exists(save_file_name):
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
save_data = json.loads(await file.read())
save_data.append(save_item)
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
await file.write(json.dumps(save_data, ensure_ascii=False))
async def store_content(self, content_item: Dict): async def store_content(self, content_item: Dict):
pass """
content JSON storage implementation
Args:
content_item:
Returns:
"""
await self.save_data_to_json(content_item, "contents")
async def store_comment(self, comment_item: Dict): async def store_comment(self, comment_item: Dict):
pass """
comment JSON storage implementatio
Args:
comment_item:
Returns:
"""
await self.save_data_to_json(comment_item, "comments")