162 lines
4.9 KiB
Python
162 lines
4.9 KiB
Python
|
import json
|
||
|
import mysql.connector
|
||
|
from mysql.connector import Error
|
||
|
import os
|
||
|
from datetime import datetime
|
||
|
|
||
|
# 数据库连接配置
|
||
|
db_config = {
|
||
|
'user': 'root',
|
||
|
'password': 'zaq12wsx@9Xin',
|
||
|
'host': '183.11.229.79',
|
||
|
'port': 3316,
|
||
|
'database': '9xin',
|
||
|
'auth_plugin': 'mysql_native_password'
|
||
|
}
|
||
|
|
||
|
def connect_to_database():
|
||
|
"""建立数据库连接"""
|
||
|
try:
|
||
|
connection = mysql.connector.connect(**db_config)
|
||
|
if connection.is_connected():
|
||
|
print('成功连接到MySQL数据库')
|
||
|
return connection
|
||
|
except Error as e:
|
||
|
print(f'连接数据库时出错: {e}')
|
||
|
return None
|
||
|
|
||
|
def create_table(connection):
|
||
|
"""创建表格(如果不存在)"""
|
||
|
create_table_query = """
|
||
|
CREATE TABLE IF NOT EXISTS xhs_notes (
|
||
|
id INT AUTO_INCREMENT PRIMARY KEY,
|
||
|
note_id VARCHAR(50) NOT NULL UNIQUE,
|
||
|
type VARCHAR(20),
|
||
|
title VARCHAR(255),
|
||
|
description TEXT,
|
||
|
video_url TEXT,
|
||
|
time BIGINT,
|
||
|
last_update_time BIGINT,
|
||
|
user_id VARCHAR(50),
|
||
|
nickname VARCHAR(100),
|
||
|
avatar TEXT,
|
||
|
liked_count VARCHAR(20),
|
||
|
collected_count VARCHAR(20),
|
||
|
comment_count VARCHAR(20),
|
||
|
share_count VARCHAR(20),
|
||
|
ip_location VARCHAR(100),
|
||
|
image_list TEXT,
|
||
|
tag_list TEXT,
|
||
|
last_modify_ts BIGINT,
|
||
|
note_url TEXT,
|
||
|
source_keyword VARCHAR(50),
|
||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||
|
download_flag BOOLEAN DEFAULT FALSE
|
||
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
|
||
|
"""
|
||
|
try:
|
||
|
cursor = connection.cursor()
|
||
|
cursor.execute(create_table_query)
|
||
|
connection.commit()
|
||
|
print('表格创建成功')
|
||
|
except Error as e:
|
||
|
print(f'创建表格时出错: {e}')
|
||
|
|
||
|
def check_record_exists(cursor, note_id):
|
||
|
"""检查记录是否已存在"""
|
||
|
query = "SELECT COUNT(*) FROM xhs_notes WHERE note_id = %s"
|
||
|
cursor.execute(query, (note_id,))
|
||
|
count = cursor.fetchone()[0]
|
||
|
return count > 0
|
||
|
|
||
|
def insert_data(connection, data):
|
||
|
"""插入数据到数据库"""
|
||
|
insert_query = """
|
||
|
INSERT INTO xhs_notes (
|
||
|
note_id, type, title, description, video_url, time,
|
||
|
last_update_time, user_id, nickname, avatar,
|
||
|
liked_count, collected_count, comment_count, share_count,
|
||
|
ip_location, image_list, tag_list, last_modify_ts,
|
||
|
note_url, source_keyword
|
||
|
) VALUES (
|
||
|
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
|
||
|
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s
|
||
|
)
|
||
|
"""
|
||
|
try:
|
||
|
cursor = connection.cursor()
|
||
|
inserted_count = 0
|
||
|
skipped_count = 0
|
||
|
|
||
|
for item in data:
|
||
|
note_id = item.get('note_id')
|
||
|
|
||
|
# 检查记录是否已存在
|
||
|
if check_record_exists(cursor, note_id):
|
||
|
skipped_count += 1
|
||
|
continue
|
||
|
|
||
|
values = (
|
||
|
note_id,
|
||
|
item.get('type'),
|
||
|
item.get('title'),
|
||
|
item.get('desc'),
|
||
|
item.get('video_url'),
|
||
|
item.get('time'),
|
||
|
item.get('last_update_time'),
|
||
|
item.get('user_id'),
|
||
|
item.get('nickname'),
|
||
|
item.get('avatar'),
|
||
|
item.get('liked_count'),
|
||
|
item.get('collected_count'),
|
||
|
item.get('comment_count'),
|
||
|
item.get('share_count'),
|
||
|
item.get('ip_location'),
|
||
|
item.get('image_list'),
|
||
|
item.get('tag_list'),
|
||
|
item.get('last_modify_ts'),
|
||
|
item.get('note_url'),
|
||
|
item.get('source_keyword')
|
||
|
)
|
||
|
cursor.execute(insert_query, values)
|
||
|
inserted_count += 1
|
||
|
|
||
|
connection.commit()
|
||
|
print(f'成功插入 {inserted_count} 条新数据')
|
||
|
print(f'跳过 {skipped_count} 条已存在的数据')
|
||
|
except Error as e:
|
||
|
print(f'插入数据时出错: {e}')
|
||
|
connection.rollback()
|
||
|
|
||
|
def main():
|
||
|
# 读取JSON文件
|
||
|
json_file_path = './search_contents_2024-12-04.json'
|
||
|
|
||
|
try:
|
||
|
with open(json_file_path, 'r', encoding='utf-8') as file:
|
||
|
data = json.load(file)
|
||
|
except Exception as e:
|
||
|
print(f'读取JSON文件时出错: {e}')
|
||
|
return
|
||
|
|
||
|
# 连接数据库
|
||
|
connection = connect_to_database()
|
||
|
if connection is None:
|
||
|
return
|
||
|
|
||
|
try:
|
||
|
# 创建表格
|
||
|
create_table(connection)
|
||
|
|
||
|
# 插入数据
|
||
|
insert_data(connection, data)
|
||
|
|
||
|
except Exception as e:
|
||
|
print(f'处理数据时出错: {e}')
|
||
|
finally:
|
||
|
if connection.is_connected():
|
||
|
connection.close()
|
||
|
print('数据库连接已关闭')
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|