import json import mysql.connector from mysql.connector import Error import os from datetime import datetime # 数据库连接配置 db_config = { 'user': 'root', 'password': 'zaq12wsx@9Xin', 'host': '183.11.229.79', 'port': 3316, 'database': '9xin', 'auth_plugin': 'mysql_native_password' } def connect_to_database(): """建立数据库连接""" try: connection = mysql.connector.connect(**db_config) if connection.is_connected(): print('成功连接到MySQL数据库') return connection except Error as e: print(f'连接数据库时出错: {e}') return None def create_table(connection): """创建表格(如果不存在)""" create_table_query = """ CREATE TABLE IF NOT EXISTS xhs_notes ( id INT AUTO_INCREMENT PRIMARY KEY, note_id VARCHAR(50) NOT NULL UNIQUE, type VARCHAR(20), title VARCHAR(255), description TEXT, video_url TEXT, time BIGINT, last_update_time BIGINT, user_id VARCHAR(50), nickname VARCHAR(100), avatar TEXT, liked_count VARCHAR(20), collected_count VARCHAR(20), comment_count VARCHAR(20), share_count VARCHAR(20), ip_location VARCHAR(100), image_list TEXT, tag_list TEXT, last_modify_ts BIGINT, note_url TEXT, source_keyword VARCHAR(50), created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, download_flag BOOLEAN DEFAULT FALSE ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; """ try: cursor = connection.cursor() cursor.execute(create_table_query) connection.commit() print('表格创建成功') except Error as e: print(f'创建表格时出错: {e}') def check_record_exists(cursor, note_id): """检查记录是否已存在""" query = "SELECT COUNT(*) FROM xhs_notes WHERE note_id = %s" cursor.execute(query, (note_id,)) count = cursor.fetchone()[0] return count > 0 def insert_data(connection, data): """插入数据到数据库""" insert_query = """ INSERT INTO xhs_notes ( note_id, type, title, description, video_url, time, last_update_time, user_id, nickname, avatar, liked_count, collected_count, comment_count, share_count, ip_location, image_list, tag_list, last_modify_ts, note_url, source_keyword ) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s ) """ try: cursor = connection.cursor() inserted_count = 0 skipped_count = 0 for item in data: note_id = item.get('note_id') # 检查记录是否已存在 if check_record_exists(cursor, note_id): skipped_count += 1 continue values = ( note_id, item.get('type'), item.get('title'), item.get('desc'), item.get('video_url'), item.get('time'), item.get('last_update_time'), item.get('user_id'), item.get('nickname'), item.get('avatar'), item.get('liked_count'), item.get('collected_count'), item.get('comment_count'), item.get('share_count'), item.get('ip_location'), item.get('image_list'), item.get('tag_list'), item.get('last_modify_ts'), item.get('note_url'), item.get('source_keyword') ) cursor.execute(insert_query, values) inserted_count += 1 connection.commit() print(f'成功插入 {inserted_count} 条新数据') print(f'跳过 {skipped_count} 条已存在的数据') except Error as e: print(f'插入数据时出错: {e}') connection.rollback() def main(): # 读取JSON文件 json_file_path = './search_contents_2024-12-04.json' try: with open(json_file_path, 'r', encoding='utf-8') as file: data = json.load(file) except Exception as e: print(f'读取JSON文件时出错: {e}') return # 连接数据库 connection = connect_to_database() if connection is None: return try: # 创建表格 create_table(connection) # 插入数据 insert_data(connection, data) except Exception as e: print(f'处理数据时出错: {e}') finally: if connection.is_connected(): connection.close() print('数据库连接已关闭') if __name__ == "__main__": main()