import json import time from pathlib import Path from slugify import slugify def process_json_file(input_file, output_file=None, batch_size=10000): """Add slugs to JSON data file with optimal performance""" if output_file is None: output_file = f"{Path(input_file).stem}_with_slugs.json" start_time = time.time() with open(input_file, 'r', encoding='utf-8') as f: data = json.load(f) total = len(data) processed = 0 print(f"Processing {total} records...") for item in data: # Generate slug from name field name = item['fields'].get('name', '') pk = item['pk'] if name: slug = slugify(name)[:50] # Truncate to 50 chars # Append PK to ensure uniqueness item['fields']['slug'] = f"{slug}" else: # Fallback to model-pk if name is empty model_name = item['model'].split('.')[-1] item['fields']['slug'] = f"{model_name}-{pk}" processed += 1 if processed % batch_size == 0: print(f"Processed {processed}/{total} records...") # Save the modified data with open(output_file, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) print(f"Completed in {time.time() - start_time:.2f} seconds") print(f"Output saved to {output_file}") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument('input_file', help='Path to input JSON file') parser.add_argument('-o', '--output', help='Output file path') parser.add_argument('-b', '--batch', type=int, default=10000, help='Progress reporting batch size') args = parser.parse_args() process_json_file(args.input_file, args.output, args.batch)