import json import time from pathlib import Path from slugify import slugify def process_json_file(input_file, output_file=None, batch_size=10000): """Add slugs to JSON data file with optimal performance""" if output_file is None: output_file = f"{Path(input_file).stem}_with_slugs.json" start_time = time.time() with open(input_file, "r", encoding="utf-8") as f: data = json.load(f) total = len(data) processed = 0 print(f"Processing {total} records...") for item in data: # Generate slug from name field name = item["fields"].get("name", "") pk = item["pk"] if name: slug = slugify(name)[:50] # Truncate to 50 chars # Append PK to ensure uniqueness item["fields"]["slug"] = f"{slug}-{pk}" else: # Fallback to model-pk if name is empty model_name = item["model"].split(".")[-1] item["fields"]["slug"] = f"{model_name}-{pk}" processed += 1 if processed % batch_size == 0: print(f"Processed {processed}/{total} records...") # Save the modified data with open(output_file, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) print(f"Completed in {time.time() - start_time:.2f} seconds") print(f"Output saved to {output_file}") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("input_file", help="Path to input JSON file") parser.add_argument("-o", "--output", help="Output file path") parser.add_argument( "-b", "--batch", type=int, default=10000, help="Progress reporting batch size" ) args = parser.parse_args() process_json_file(args.input_file, args.output, args.batch)