feat: 数据保存支持JSON格式
This commit is contained in:
parent
894dabcf63
commit
4dfa0d3fbf
@ -76,6 +76,7 @@
|
|||||||
### 数据保存
|
### 数据保存
|
||||||
- 支持保存到关系型数据库(Mysql、PgSQL等)
|
- 支持保存到关系型数据库(Mysql、PgSQL等)
|
||||||
- 支持保存到csv中(data/目录下)
|
- 支持保存到csv中(data/目录下)
|
||||||
|
- 支持保存到json中(data/目录下)
|
||||||
|
|
||||||
## 如何使用 IP 代理
|
## 如何使用 IP 代理
|
||||||
➡️➡️➡️ [IP代理使用方法](docs/代理使用.md)
|
➡️➡️➡️ [IP代理使用方法](docs/代理使用.md)
|
||||||
|
@ -21,7 +21,7 @@ HEADLESS = True
|
|||||||
SAVE_LOGIN_STATE = True
|
SAVE_LOGIN_STATE = True
|
||||||
|
|
||||||
# 数据保存类型选项配置,支持三种类型:csv、db、json
|
# 数据保存类型选项配置,支持三种类型:csv、db、json
|
||||||
SAVE_DATA_OPTION = "csv" # csv or db or json
|
SAVE_DATA_OPTION = "json" # csv or db or json
|
||||||
|
|
||||||
# 用户浏览器缓存的浏览器文件配置
|
# 用户浏览器缓存的浏览器文件配置
|
||||||
USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
|
USER_DATA_DIR = "%s_user_data_dir" # %s will be replaced by platform name
|
||||||
|
@ -2,8 +2,9 @@
|
|||||||
# @Author : relakkes@gmail.com
|
# @Author : relakkes@gmail.com
|
||||||
# @Time : 2024/1/14 19:34
|
# @Time : 2024/1/14 19:34
|
||||||
# @Desc : B站存储实现类
|
# @Desc : B站存储实现类
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
@ -122,8 +123,59 @@ class BiliDbStoreImplement(AbstractStore):
|
|||||||
|
|
||||||
|
|
||||||
class BiliJsonStoreImplement(AbstractStore):
|
class BiliJsonStoreImplement(AbstractStore):
|
||||||
|
json_store_path: str = "data/bilibili"
|
||||||
|
|
||||||
|
def make_save_file_name(self, store_type: str) -> str:
|
||||||
|
"""
|
||||||
|
make save file name by store type
|
||||||
|
Args:
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||||
|
|
||||||
|
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||||
|
"""
|
||||||
|
Below is a simple way to save it in json format.
|
||||||
|
Args:
|
||||||
|
save_item: save content dict info
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||||
|
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||||
|
save_data = []
|
||||||
|
|
||||||
|
if os.path.exists(save_file_name):
|
||||||
|
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||||
|
save_data = json.loads(await file.read())
|
||||||
|
|
||||||
|
save_data.append(save_item)
|
||||||
|
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||||
|
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
content JSON storage implementation
|
||||||
|
Args:
|
||||||
|
content_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(content_item, "contents")
|
||||||
|
|
||||||
async def store_comment(self, comment_item: Dict):
|
async def store_comment(self, comment_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
comment JSON storage implementatio
|
||||||
|
Args:
|
||||||
|
comment_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(comment_item, "comments")
|
||||||
|
@ -2,8 +2,9 @@
|
|||||||
# @Author : relakkes@gmail.com
|
# @Author : relakkes@gmail.com
|
||||||
# @Time : 2024/1/14 18:46
|
# @Time : 2024/1/14 18:46
|
||||||
# @Desc : 抖音存储实现类
|
# @Desc : 抖音存储实现类
|
||||||
|
|
||||||
import csv
|
import csv
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
@ -122,9 +123,59 @@ class DouyinDbStoreImplement(AbstractStore):
|
|||||||
|
|
||||||
|
|
||||||
class DouyinJsonStoreImplement(AbstractStore):
|
class DouyinJsonStoreImplement(AbstractStore):
|
||||||
|
json_store_path: str = "data/douyin"
|
||||||
|
|
||||||
|
def make_save_file_name(self, store_type: str) -> str:
|
||||||
|
"""
|
||||||
|
make save file name by store type
|
||||||
|
Args:
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||||
|
|
||||||
|
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||||
|
"""
|
||||||
|
Below is a simple way to save it in json format.
|
||||||
|
Args:
|
||||||
|
save_item: save content dict info
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||||
|
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||||
|
save_data = []
|
||||||
|
|
||||||
|
if os.path.exists(save_file_name):
|
||||||
|
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||||
|
save_data = json.loads(await file.read())
|
||||||
|
|
||||||
|
save_data.append(save_item)
|
||||||
|
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||||
|
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
content JSON storage implementation
|
||||||
|
Args:
|
||||||
|
content_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(content_item, "contents")
|
||||||
|
|
||||||
async def store_comment(self, comment_item: Dict):
|
async def store_comment(self, comment_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
comment JSON storage implementatio
|
||||||
|
Args:
|
||||||
|
comment_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(comment_item, "comments")
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
# @Time : 2024/1/14 20:03
|
# @Time : 2024/1/14 20:03
|
||||||
# @Desc : 快手存储实现类
|
# @Desc : 快手存储实现类
|
||||||
import csv
|
import csv
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
@ -121,8 +123,59 @@ class KuaishouDbStoreImplement(AbstractStore):
|
|||||||
|
|
||||||
|
|
||||||
class KuaishouJsonStoreImplement(AbstractStore):
|
class KuaishouJsonStoreImplement(AbstractStore):
|
||||||
|
json_store_path: str = "data/kuaishou"
|
||||||
|
|
||||||
|
def make_save_file_name(self, store_type: str) -> str:
|
||||||
|
"""
|
||||||
|
make save file name by store type
|
||||||
|
Args:
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||||
|
|
||||||
|
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||||
|
"""
|
||||||
|
Below is a simple way to save it in json format.
|
||||||
|
Args:
|
||||||
|
save_item: save content dict info
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||||
|
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||||
|
save_data = []
|
||||||
|
|
||||||
|
if os.path.exists(save_file_name):
|
||||||
|
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||||
|
save_data = json.loads(await file.read())
|
||||||
|
|
||||||
|
save_data.append(save_item)
|
||||||
|
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||||
|
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
content JSON storage implementation
|
||||||
|
Args:
|
||||||
|
content_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(content_item, "contents")
|
||||||
|
|
||||||
async def store_comment(self, comment_item: Dict):
|
async def store_comment(self, comment_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
comment JSON storage implementatio
|
||||||
|
Args:
|
||||||
|
comment_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(comment_item, "comments")
|
||||||
|
@ -15,7 +15,7 @@ class WeibostoreFactory:
|
|||||||
STORES = {
|
STORES = {
|
||||||
"csv": WeiboCsvStoreImplement,
|
"csv": WeiboCsvStoreImplement,
|
||||||
"db": WeiboDbStoreImplement,
|
"db": WeiboDbStoreImplement,
|
||||||
"json": BiliJsonStoreImplement
|
"json": WeiboJsonStoreImplement
|
||||||
}
|
}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
# @Time : 2024/1/14 21:35
|
# @Time : 2024/1/14 21:35
|
||||||
# @Desc : 微博存储实现类
|
# @Desc : 微博存储实现类
|
||||||
import csv
|
import csv
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
@ -120,9 +122,60 @@ class WeiboDbStoreImplement(AbstractStore):
|
|||||||
await WeiboComment.filter(comment_id=comment_id).update(**comment_data.model_dump())
|
await WeiboComment.filter(comment_id=comment_id).update(**comment_data.model_dump())
|
||||||
|
|
||||||
|
|
||||||
class BiliJsonStoreImplement(AbstractStore):
|
class WeiboJsonStoreImplement(AbstractStore):
|
||||||
|
json_store_path: str = "data/weibo"
|
||||||
|
|
||||||
|
def make_save_file_name(self, store_type: str) -> str:
|
||||||
|
"""
|
||||||
|
make save file name by store type
|
||||||
|
Args:
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||||
|
|
||||||
|
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||||
|
"""
|
||||||
|
Below is a simple way to save it in json format.
|
||||||
|
Args:
|
||||||
|
save_item: save content dict info
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||||
|
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||||
|
save_data = []
|
||||||
|
|
||||||
|
if os.path.exists(save_file_name):
|
||||||
|
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||||
|
save_data = json.loads(await file.read())
|
||||||
|
|
||||||
|
save_data.append(save_item)
|
||||||
|
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||||
|
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
content JSON storage implementation
|
||||||
|
Args:
|
||||||
|
content_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(content_item, "contents")
|
||||||
|
|
||||||
async def store_comment(self, comment_item: Dict):
|
async def store_comment(self, comment_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
comment JSON storage implementatio
|
||||||
|
Args:
|
||||||
|
comment_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(comment_item, "comments")
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
# @Time : 2024/1/14 16:58
|
# @Time : 2024/1/14 16:58
|
||||||
# @Desc : 小红书存储实现类
|
# @Desc : 小红书存储实现类
|
||||||
import csv
|
import csv
|
||||||
|
import json
|
||||||
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
@ -120,8 +122,59 @@ class XhsDbStoreImplement(AbstractStore):
|
|||||||
|
|
||||||
|
|
||||||
class XhsJsonStoreImplement(AbstractStore):
|
class XhsJsonStoreImplement(AbstractStore):
|
||||||
|
json_store_path: str = "data/xhs"
|
||||||
|
|
||||||
|
def make_save_file_name(self, store_type: str) -> str:
|
||||||
|
"""
|
||||||
|
make save file name by store type
|
||||||
|
Args:
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
return f"{self.json_store_path}/{crawler_type_var.get()}_{store_type}_{utils.get_current_date()}.json"
|
||||||
|
|
||||||
|
async def save_data_to_json(self, save_item: Dict, store_type: str):
|
||||||
|
"""
|
||||||
|
Below is a simple way to save it in json format.
|
||||||
|
Args:
|
||||||
|
save_item: save content dict info
|
||||||
|
store_type: Save type contains content and comments(contents | comments)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
pathlib.Path(self.json_store_path).mkdir(parents=True, exist_ok=True)
|
||||||
|
save_file_name = self.make_save_file_name(store_type=store_type)
|
||||||
|
save_data = []
|
||||||
|
|
||||||
|
if os.path.exists(save_file_name):
|
||||||
|
async with aiofiles.open(save_file_name, 'r', encoding='utf-8') as file:
|
||||||
|
save_data = json.loads(await file.read())
|
||||||
|
|
||||||
|
save_data.append(save_item)
|
||||||
|
async with aiofiles.open(save_file_name, 'w', encoding='utf-8') as file:
|
||||||
|
await file.write(json.dumps(save_data, ensure_ascii=False))
|
||||||
|
|
||||||
async def store_content(self, content_item: Dict):
|
async def store_content(self, content_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
content JSON storage implementation
|
||||||
|
Args:
|
||||||
|
content_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(content_item, "contents")
|
||||||
|
|
||||||
async def store_comment(self, comment_item: Dict):
|
async def store_comment(self, comment_item: Dict):
|
||||||
pass
|
"""
|
||||||
|
comment JSON storage implementatio
|
||||||
|
Args:
|
||||||
|
comment_item:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
await self.save_data_to_json(comment_item, "comments")
|
||||||
|
Loading…
Reference in New Issue
Block a user