xhs_server/import_xhs_notes.py

162 lines
4.9 KiB
Python
Raw Permalink Normal View History

2024-12-16 02:31:07 +00:00
import json
import mysql.connector
from mysql.connector import Error
import os
from datetime import datetime
# 数据库连接配置
db_config = {
'user': 'root',
'password': 'zaq12wsx@9Xin',
'host': '183.11.229.79',
'port': 3316,
'database': '9xin',
'auth_plugin': 'mysql_native_password'
}
def connect_to_database():
"""建立数据库连接"""
try:
connection = mysql.connector.connect(**db_config)
if connection.is_connected():
print('成功连接到MySQL数据库')
return connection
except Error as e:
print(f'连接数据库时出错: {e}')
return None
def create_table(connection):
"""创建表格(如果不存在)"""
create_table_query = """
CREATE TABLE IF NOT EXISTS xhs_notes (
id INT AUTO_INCREMENT PRIMARY KEY,
note_id VARCHAR(50) NOT NULL UNIQUE,
type VARCHAR(20),
title VARCHAR(255),
description TEXT,
video_url TEXT,
time BIGINT,
last_update_time BIGINT,
user_id VARCHAR(50),
nickname VARCHAR(100),
avatar TEXT,
liked_count VARCHAR(20),
collected_count VARCHAR(20),
comment_count VARCHAR(20),
share_count VARCHAR(20),
ip_location VARCHAR(100),
image_list TEXT,
tag_list TEXT,
last_modify_ts BIGINT,
note_url TEXT,
source_keyword VARCHAR(50),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
download_flag BOOLEAN DEFAULT FALSE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
"""
try:
cursor = connection.cursor()
cursor.execute(create_table_query)
connection.commit()
print('表格创建成功')
except Error as e:
print(f'创建表格时出错: {e}')
def check_record_exists(cursor, note_id):
"""检查记录是否已存在"""
query = "SELECT COUNT(*) FROM xhs_notes WHERE note_id = %s"
cursor.execute(query, (note_id,))
count = cursor.fetchone()[0]
return count > 0
def insert_data(connection, data):
"""插入数据到数据库"""
insert_query = """
INSERT INTO xhs_notes (
note_id, type, title, description, video_url, time,
last_update_time, user_id, nickname, avatar,
liked_count, collected_count, comment_count, share_count,
ip_location, image_list, tag_list, last_modify_ts,
note_url, source_keyword
) VALUES (
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s
)
"""
try:
cursor = connection.cursor()
inserted_count = 0
skipped_count = 0
for item in data:
note_id = item.get('note_id')
# 检查记录是否已存在
if check_record_exists(cursor, note_id):
skipped_count += 1
continue
values = (
note_id,
item.get('type'),
item.get('title'),
item.get('desc'),
item.get('video_url'),
item.get('time'),
item.get('last_update_time'),
item.get('user_id'),
item.get('nickname'),
item.get('avatar'),
item.get('liked_count'),
item.get('collected_count'),
item.get('comment_count'),
item.get('share_count'),
item.get('ip_location'),
item.get('image_list'),
item.get('tag_list'),
item.get('last_modify_ts'),
item.get('note_url'),
item.get('source_keyword')
)
cursor.execute(insert_query, values)
inserted_count += 1
connection.commit()
print(f'成功插入 {inserted_count} 条新数据')
print(f'跳过 {skipped_count} 条已存在的数据')
except Error as e:
print(f'插入数据时出错: {e}')
connection.rollback()
def main():
# 读取JSON文件
json_file_path = './search_contents_2024-12-04.json'
try:
with open(json_file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
except Exception as e:
print(f'读取JSON文件时出错: {e}')
return
# 连接数据库
connection = connect_to_database()
if connection is None:
return
try:
# 创建表格
create_table(connection)
# 插入数据
insert_data(connection, data)
except Exception as e:
print(f'处理数据时出错: {e}')
finally:
if connection.is_connected():
connection.close()
print('数据库连接已关闭')
if __name__ == "__main__":
main()