xhs_crawler/process_xhs_note.py
phezzan f74ad7926e
Some checks failed
Deploy VitePress site to Pages / build (push) Has been cancelled
Deploy VitePress site to Pages / Deploy (push) Has been cancelled
first commit
2024-12-17 16:14:10 +08:00

40 lines
1.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import json
import os
def excel_to_json():
# 创建输出文件夹
output_dir = 'sheet_notes'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# 读取Excel文件
excel_file = 'input.xlsx'
# 获取所有sheet名称
xl = pd.ExcelFile(excel_file)
sheet_names = xl.sheet_names
# 处理每个sheet
for sheet_name in sheet_names:
# 读取当前sheet跳过第一行使用第二行作为列名
df = pd.read_excel(excel_file,
sheet_name=sheet_name,
header=1)
# 获取"笔记标题"列的内容
if '笔记标题' in df.columns:
# 将笔记标题转换为列表,并去除空值
notes = df['笔记标题'].dropna().tolist()
# 保存为JSON文件
output_file = os.path.join(output_dir, f'{sheet_name}.json')
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(notes, f, ensure_ascii=False, indent=4)
print(f'已保存 {sheet_name} 的笔记标题到 {output_file}')
else:
print(f'警告: {sheet_name} 中没有找到"笔记标题"')
if __name__ == '__main__':
excel_to_json()