60 lines
1.8 KiB
Python
60 lines
1.8 KiB
Python
import json
|
|
import time
|
|
from pathlib import Path
|
|
from slugify import slugify
|
|
|
|
|
|
def process_json_file(input_file, output_file=None, batch_size=10000):
|
|
"""Add slugs to JSON data file with optimal performance"""
|
|
if output_file is None:
|
|
output_file = f"{Path(input_file).stem}_with_slugs.json"
|
|
|
|
start_time = time.time()
|
|
|
|
with open(input_file, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
total = len(data)
|
|
processed = 0
|
|
|
|
print(f"Processing {total} records...")
|
|
|
|
for item in data:
|
|
# Generate slug from name field
|
|
name = item["fields"].get("name", "")
|
|
pk = item["pk"]
|
|
|
|
if name:
|
|
slug = slugify(name)[:50] # Truncate to 50 chars
|
|
# Append PK to ensure uniqueness
|
|
item["fields"]["slug"] = f"{slug}-{pk}"
|
|
else:
|
|
# Fallback to model-pk if name is empty
|
|
model_name = item["model"].split(".")[-1]
|
|
item["fields"]["slug"] = f"{model_name}-{pk}"
|
|
|
|
processed += 1
|
|
if processed % batch_size == 0:
|
|
print(f"Processed {processed}/{total} records...")
|
|
|
|
# Save the modified data
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"Completed in {time.time() - start_time:.2f} seconds")
|
|
print(f"Output saved to {output_file}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("input_file", help="Path to input JSON file")
|
|
parser.add_argument("-o", "--output", help="Output file path")
|
|
parser.add_argument(
|
|
"-b", "--batch", type=int, default=10000, help="Progress reporting batch size"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
process_json_file(args.input_file, args.output, args.batch)
|