xhs_crawler/store/xhs/__init__.py

75 lines
2.8 KiB
Python

# -*- coding: utf-8 -*-
# @Author : relakkes@gmail.com
# @Time : 2024/1/14 17:34
# @Desc :
from typing import List
import config
from . import xhs_store_impl
from .xhs_store_db_types import *
from .xhs_store_impl import *
class XhsStoreFactory:
STORES = {
"csv": XhsCsvStoreImplement,
"db": XhsDbStoreImplement,
"json": XhsJsonStoreImplement
}
@staticmethod
def create_store() -> AbstractStore:
store_class = XhsStoreFactory.STORES.get(config.SAVE_DATA_OPTION)
if not store_class:
raise ValueError("[XhsStoreFactory.create_store] Invalid save option only supported csv or db or json ...")
return store_class()
async def update_xhs_note(note_item: Dict):
note_id = note_item.get("note_id")
user_info = note_item.get("user", {})
interact_info = note_item.get("interact_info", {})
image_list: List[Dict] = note_item.get("image_list", [])
local_db_item = {
"note_id": note_item.get("note_id"),
"type": note_item.get("type"),
"title": note_item.get("title") or note_item.get("desc", "")[:255],
"desc": note_item.get("desc", ""),
"time": note_item.get("time"),
"last_update_time": note_item.get("last_update_time", 0),
"user_id": user_info.get("user_id"),
"nickname": user_info.get("nickname"),
"avatar": user_info.get("avatar"),
"liked_count": interact_info.get("liked_count"),
"collected_count": interact_info.get("collected_count"),
"comment_count": interact_info.get("comment_count"),
"share_count": interact_info.get("share_count"),
"ip_location": note_item.get("ip_location", ""),
"image_list": ','.join([img.get('url', '') for img in image_list]),
"last_modify_ts": utils.get_current_timestamp(),
"note_url": f"https://www.xiaohongshu.com/explore/{note_id}"
}
utils.logger.info(f"[store.xhs.update_xhs_note] xhs note: {local_db_item}")
await XhsStoreFactory.create_store().store_content(local_db_item)
async def update_xhs_note_comment(note_id: str, comment_item: Dict):
user_info = comment_item.get("user_info", {})
comment_id = comment_item.get("id")
local_db_item = {
"comment_id": comment_id,
"create_time": comment_item.get("create_time"),
"ip_location": comment_item.get("ip_location"),
"note_id": note_id,
"content": comment_item.get("content"),
"user_id": user_info.get("user_id"),
"nickname": user_info.get("nickname"),
"avatar": user_info.get("image"),
"sub_comment_count": comment_item.get("sub_comment_count"),
"last_modify_ts": utils.get_current_timestamp(),
}
utils.logger.info(f"[store.xhs.update_xhs_note_comment] xhs note comment:{local_db_item}")
await XhsStoreFactory.create_store().store_comment(local_db_item)