haikal/slug_data.py
2025-05-18 20:43:33 +03:00

56 lines
1.8 KiB
Python

import json
import time
from pathlib import Path
from slugify import slugify
def process_json_file(input_file, output_file=None, batch_size=10000):
"""Add slugs to JSON data file with optimal performance"""
if output_file is None:
output_file = f"{Path(input_file).stem}_with_slugs.json"
start_time = time.time()
with open(input_file, 'r', encoding='utf-8') as f:
data = json.load(f)
total = len(data)
processed = 0
print(f"Processing {total} records...")
for item in data:
# Generate slug from name field
name = item['fields'].get('name', '')
pk = item['pk']
if name:
slug = slugify(name)[:50] # Truncate to 50 chars
# Append PK to ensure uniqueness
item['fields']['slug'] = f"{slug}"
else:
# Fallback to model-pk if name is empty
model_name = item['model'].split('.')[-1]
item['fields']['slug'] = f"{model_name}-{pk}"
processed += 1
if processed % batch_size == 0:
print(f"Processed {processed}/{total} records...")
# Save the modified data
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
print(f"Completed in {time.time() - start_time:.2f} seconds")
print(f"Output saved to {output_file}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('input_file', help='Path to input JSON file')
parser.add_argument('-o', '--output', help='Output file path')
parser.add_argument('-b', '--batch', type=int, default=10000,
help='Progress reporting batch size')
args = parser.parse_args()
process_json_file(args.input_file, args.output, args.batch)