From 67ec49498ad0358599688fef039f7deecefb2a08 Mon Sep 17 00:00:00 2001 From: Relakkes Date: Sat, 30 Mar 2024 21:17:33 +0800 Subject: [PATCH] refactor: rename xhs to xiaohongshu --- media_platform/xhs/client.py | 14 +++++++------- media_platform/xhs/core.py | 12 ++++++------ media_platform/xhs/login.py | 30 +++++++++++++++--------------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/media_platform/xhs/client.py b/media_platform/xhs/client.py index 7586578..5247ef7 100644 --- a/media_platform/xhs/client.py +++ b/media_platform/xhs/client.py @@ -14,7 +14,7 @@ from .field import SearchNoteType, SearchSortType from .help import get_search_id, sign -class XHSClient: +class XiaoHongShuClient: def __init__( self, timeout=10, @@ -134,14 +134,14 @@ class XHSClient: """ """get a note to check if login state is ok""" - utils.logger.info("[XHSClient.pong] Begin to pong xhs...") + utils.logger.info("[XiaoHongShuClient.pong] Begin to pong xhs...") ping_flag = False try: note_card: Dict = await self.get_note_by_keyword(keyword="小红书") if note_card.get("items"): ping_flag = True except Exception as e: - utils.logger.error(f"[XHSClient.pong] Ping xhs failed: {e}, and try to login again...") + utils.logger.error(f"[XiaoHongShuClient.pong] Ping xhs failed: {e}, and try to login again...") ping_flag = False return ping_flag @@ -202,7 +202,7 @@ class XHSClient: if res and res.get("items"): res_dict: Dict = res["items"][0]["note_card"] return res_dict - utils.logger.error(f"[XHSClient.get_note_by_id] get note empty and res:{res}") + utils.logger.error(f"[XiaoHongShuClient.get_note_by_id] get note empty and res:{res}") return dict() async def get_note_comments(self, note_id: str, cursor: str = "") -> Dict: @@ -266,7 +266,7 @@ class XHSClient: comments_cursor = comments_res.get("cursor", "") if "comments" not in comments_res: utils.logger.info( - f"[XHSClient.get_note_all_comments] No 'comments' key found in response: {comments_res}") + f"[XiaoHongShuClient.get_note_all_comments] No 'comments' key found in response: {comments_res}") break comments = comments_res["comments"] if callback: @@ -337,11 +337,11 @@ class XHSClient: notes_has_more = notes_res.get("has_more", False) notes_cursor = notes_res.get("cursor", "") if "notes" not in notes_res: - utils.logger.info(f"[XHSClient.get_all_notes_by_creator] No 'notes' key found in response: {notes_res}") + utils.logger.info(f"[XiaoHongShuClient.get_all_notes_by_creator] No 'notes' key found in response: {notes_res}") break notes = notes_res["notes"] - utils.logger.info(f"[XHSClient.get_all_notes_by_creator] got user_id:{user_id} notes len : {len(notes)}") + utils.logger.info(f"[XiaoHongShuClient.get_all_notes_by_creator] got user_id:{user_id} notes len : {len(notes)}") if callback: await callback(notes) await asyncio.sleep(crawl_interval) diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index bf8a941..ef1e2ea 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -14,10 +14,10 @@ from store import xhs as xhs_store from tools import utils from var import crawler_type_var -from .client import XHSClient +from .client import XiaoHongShuClient from .exception import DataFetchError from .field import SearchSortType -from .login import XHSLogin +from .login import XiaoHongShuLogin class XiaoHongShuCrawler(AbstractCrawler): @@ -25,7 +25,7 @@ class XiaoHongShuCrawler(AbstractCrawler): login_type: str crawler_type: str context_page: Page - xhs_client: XHSClient + xhs_client: XiaoHongShuClient browser_context: BrowserContext def __init__(self) -> None: @@ -68,7 +68,7 @@ class XiaoHongShuCrawler(AbstractCrawler): # Create a client to interact with the xiaohongshu website. self.xhs_client = await self.create_xhs_client(httpx_proxy_format) if not await self.xhs_client.pong(): - login_obj = XHSLogin( + login_obj = XiaoHongShuLogin( login_type=self.login_type, login_phone="", # input your phone number browser_context=self.browser_context, @@ -219,11 +219,11 @@ class XiaoHongShuCrawler(AbstractCrawler): } return playwright_proxy, httpx_proxy - async def create_xhs_client(self, httpx_proxy: Optional[str]) -> XHSClient: + async def create_xhs_client(self, httpx_proxy: Optional[str]) -> XiaoHongShuClient: """Create xhs client""" utils.logger.info("[XiaoHongShuCrawler.create_xhs_client] Begin create xiaohongshu API client ...") cookie_str, cookie_dict = utils.convert_cookies(await self.browser_context.cookies()) - xhs_client_obj = XHSClient( + xhs_client_obj = XiaoHongShuClient( proxies=httpx_proxy, headers={ "User-Agent": self.user_agent, diff --git a/media_platform/xhs/login.py b/media_platform/xhs/login.py index cba1219..7923585 100644 --- a/media_platform/xhs/login.py +++ b/media_platform/xhs/login.py @@ -13,7 +13,7 @@ from base.base_crawler import AbstractLogin from tools import utils -class XHSLogin(AbstractLogin): +class XiaoHongShuLogin(AbstractLogin): def __init__(self, login_type: str, @@ -37,7 +37,7 @@ class XHSLogin(AbstractLogin): """ if "请通过验证" in await self.context_page.content(): - utils.logger.info("[XHSLogin.check_login_state] 登录过程中出现验证码,请手动验证") + utils.logger.info("[XiaoHongShuLogin.check_login_state] 登录过程中出现验证码,请手动验证") current_cookie = await self.browser_context.cookies() _, cookie_dict = utils.convert_cookies(current_cookie) @@ -48,7 +48,7 @@ class XHSLogin(AbstractLogin): async def begin(self): """Start login xiaohongshu""" - utils.logger.info("[XHSLogin.begin] Begin login xiaohongshu ...") + utils.logger.info("[XiaoHongShuLogin.begin] Begin login xiaohongshu ...") if self.login_type == "qrcode": await self.login_by_qrcode() elif self.login_type == "phone": @@ -56,11 +56,11 @@ class XHSLogin(AbstractLogin): elif self.login_type == "cookie": await self.login_by_cookies() else: - raise ValueError("[XHSLogin.begin]I nvalid Login Type Currently only supported qrcode or phone or cookies ...") + raise ValueError("[XiaoHongShuLogin.begin]I nvalid Login Type Currently only supported qrcode or phone or cookies ...") async def login_by_mobile(self): """Login xiaohongshu by mobile""" - utils.logger.info("[XHSLogin.login_by_mobile] Begin login xiaohongshu by mobile ...") + utils.logger.info("[XiaoHongShuLogin.login_by_mobile] Begin login xiaohongshu by mobile ...") await asyncio.sleep(1) try: # 小红书进入首页后,有可能不会自动弹出登录框,需要手动点击登录按钮 @@ -77,7 +77,7 @@ class XHSLogin(AbstractLogin): ) await element.click() except Exception as e: - utils.logger.info("[XHSLogin.login_by_mobile] have not found mobile button icon and keep going ...") + utils.logger.info("[XiaoHongShuLogin.login_by_mobile] have not found mobile button icon and keep going ...") await asyncio.sleep(1) login_container_ele = await self.context_page.wait_for_selector("div.login-container") @@ -93,7 +93,7 @@ class XHSLogin(AbstractLogin): max_get_sms_code_time = 60 * 2 # 最长获取验证码的时间为2分钟 no_logged_in_session = "" while max_get_sms_code_time > 0: - utils.logger.info(f"[XHSLogin.login_by_mobile] get sms code from redis remaining time {max_get_sms_code_time}s ...") + utils.logger.info(f"[XiaoHongShuLogin.login_by_mobile] get sms code from redis remaining time {max_get_sms_code_time}s ...") await asyncio.sleep(1) sms_code_key = f"xhs_{self.login_phone}" sms_code_value = redis_obj.get(sms_code_key) @@ -119,16 +119,16 @@ class XHSLogin(AbstractLogin): try: await self.check_login_state(no_logged_in_session) except RetryError: - utils.logger.info("[XHSLogin.login_by_mobile] Login xiaohongshu failed by mobile login method ...") + utils.logger.info("[XiaoHongShuLogin.login_by_mobile] Login xiaohongshu failed by mobile login method ...") sys.exit() wait_redirect_seconds = 5 - utils.logger.info(f"[XHSLogin.login_by_mobile] Login successful then wait for {wait_redirect_seconds} seconds redirect ...") + utils.logger.info(f"[XiaoHongShuLogin.login_by_mobile] Login successful then wait for {wait_redirect_seconds} seconds redirect ...") await asyncio.sleep(wait_redirect_seconds) async def login_by_qrcode(self): """login xiaohongshu website and keep webdriver login state""" - utils.logger.info("[XHSLogin.login_by_qrcode] Begin login xiaohongshu by qrcode ...") + utils.logger.info("[XiaoHongShuLogin.login_by_qrcode] Begin login xiaohongshu by qrcode ...") # login_selector = "div.login-container > div.left > div.qrcode > img" qrcode_img_selector = "xpath=//img[@class='qrcode-img']" # find login qrcode @@ -137,7 +137,7 @@ class XHSLogin(AbstractLogin): selector=qrcode_img_selector ) if not base64_qrcode_img: - utils.logger.info("[XHSLogin.login_by_qrcode] login failed , have not found qrcode please check ....") + utils.logger.info("[XiaoHongShuLogin.login_by_qrcode] login failed , have not found qrcode please check ....") # if this website does not automatically popup login dialog box, we will manual click login button await asyncio.sleep(0.5) login_button_ele = self.context_page.locator("xpath=//*[@id='app']/div[1]/div[2]/div[1]/ul/div[1]/button") @@ -161,20 +161,20 @@ class XHSLogin(AbstractLogin): partial_show_qrcode = functools.partial(utils.show_qrcode, base64_qrcode_img) asyncio.get_running_loop().run_in_executor(executor=None, func=partial_show_qrcode) - utils.logger.info(f"[XHSLogin.login_by_qrcode] waiting for scan code login, remaining time is 120s") + utils.logger.info(f"[XiaoHongShuLogin.login_by_qrcode] waiting for scan code login, remaining time is 120s") try: await self.check_login_state(no_logged_in_session) except RetryError: - utils.logger.info("[XHSLogin.login_by_qrcode] Login xiaohongshu failed by qrcode login method ...") + utils.logger.info("[XiaoHongShuLogin.login_by_qrcode] Login xiaohongshu failed by qrcode login method ...") sys.exit() wait_redirect_seconds = 5 - utils.logger.info(f"[XHSLogin.login_by_qrcode] Login successful then wait for {wait_redirect_seconds} seconds redirect ...") + utils.logger.info(f"[XiaoHongShuLogin.login_by_qrcode] Login successful then wait for {wait_redirect_seconds} seconds redirect ...") await asyncio.sleep(wait_redirect_seconds) async def login_by_cookies(self): """login xiaohongshu website by cookies""" - utils.logger.info("[XHSLogin.login_by_cookies] Begin login xiaohongshu by cookie ...") + utils.logger.info("[XiaoHongShuLogin.login_by_cookies] Begin login xiaohongshu by cookie ...") for key, value in utils.convert_str_cookie_to_dict(self.cookie_str).items(): if key != "web_session": # only set web_session cookie attr continue