174 lines
7.0 KiB
Python
174 lines
7.0 KiB
Python
# myapp/management/commands/import_icd10.py
|
||
import xmlschema
|
||
from django.core.management.base import BaseCommand, CommandError
|
||
from django.db import transaction
|
||
from emr.models import Icd10
|
||
|
||
class Command(BaseCommand):
|
||
help = "Import ICD-10-CM tabular XML into Icd10 model (auto-detects root)."
|
||
|
||
def add_arguments(self, parser):
|
||
parser.add_argument("--xsd", required=True, help="Path to icd10cm-tabular-2026.xsd")
|
||
parser.add_argument("--xml", required=True, help="Path to icd10cm-tabular-2026.xml")
|
||
parser.add_argument("--truncate", action="store_true", help="Delete existing Icd10 rows first")
|
||
|
||
# ------------------------ helpers ------------------------
|
||
|
||
def _as_text(self, val):
|
||
if val is None:
|
||
return None
|
||
if isinstance(val, dict):
|
||
# xmlschema may convert text/attributes into #text/@value/etc.
|
||
return val.get("#text") or val.get("@value") or val.get("value") or str(val)
|
||
return str(val)
|
||
|
||
def _ensure_list(self, maybe_list):
|
||
if maybe_list is None:
|
||
return []
|
||
if isinstance(maybe_list, list):
|
||
return maybe_list
|
||
return [maybe_list]
|
||
|
||
def _find_first_with_key(self, data, key):
|
||
"""Depth-first search: return the first dict that directly contains `key`."""
|
||
if isinstance(data, dict):
|
||
if key in data:
|
||
return data
|
||
for v in data.values():
|
||
found = self._find_first_with_key(v, key)
|
||
if found is not None:
|
||
return found
|
||
elif isinstance(data, list):
|
||
for item in data:
|
||
found = self._find_first_with_key(item, key)
|
||
if found is not None:
|
||
return found
|
||
return None
|
||
|
||
def _collect_rows(self, chapters):
|
||
"""
|
||
Build Icd10 rows + parent links from a chapters dict/list.
|
||
Expected minimal structure:
|
||
chapter -> section? -> diag (recursive)
|
||
"""
|
||
rows = []
|
||
parent_links = []
|
||
|
||
def import_diag(diag, chapter_name, section_name, parent_code=None):
|
||
code = self._as_text(diag.get("name"))
|
||
desc = self._as_text(diag.get("desc"))
|
||
if not code:
|
||
return
|
||
|
||
children = self._ensure_list(diag.get("diag"))
|
||
is_header = bool(children) and not (desc and desc.strip())
|
||
|
||
rows.append(Icd10(
|
||
code=code,
|
||
description=desc,
|
||
chapter_name=self._as_text(chapter_name),
|
||
section_name=self._as_text(section_name),
|
||
parent=None, # set later
|
||
is_header=is_header,
|
||
))
|
||
if parent_code:
|
||
parent_links.append((code, parent_code))
|
||
|
||
for child in children:
|
||
import_diag(child, chapter_name, section_name, parent_code=code)
|
||
|
||
# Normalize chapters to a list
|
||
chapters = self._ensure_list(chapters)
|
||
for ch in chapters:
|
||
ch_name = self._as_text(ch.get("name"))
|
||
|
||
# Sections may be missing in some packs; diags may be directly under chapter
|
||
sections = self._ensure_list(ch.get("section"))
|
||
if sections:
|
||
for sec in sections:
|
||
sec_name = self._as_text(sec.get("name"))
|
||
for d in self._ensure_list(sec.get("diag")):
|
||
import_diag(d, ch_name, sec_name, parent_code=None)
|
||
else:
|
||
# If no sections, look for diags at chapter level
|
||
for d in self._ensure_list(ch.get("diag")):
|
||
import_diag(d, ch_name, None, parent_code=None)
|
||
|
||
return rows, parent_links
|
||
|
||
def handle(self, *args, **opts):
|
||
xsd_path = opts["xsd"]
|
||
xml_path = opts["xml"]
|
||
|
||
try:
|
||
xs = xmlschema.XMLSchema(xsd_path)
|
||
except Exception as e:
|
||
raise CommandError(f"Failed to load XSD: {e}")
|
||
|
||
try:
|
||
# to_dict() already flattens namespaces into keys; we’ll auto-detect paths.
|
||
data = xs.to_dict(xml_path)
|
||
except Exception as e:
|
||
raise CommandError(f"Failed to parse XML: {e}")
|
||
|
||
# If the root is a single-key dict, unwrap while keeping reference
|
||
if isinstance(data, dict) and len(data) == 1:
|
||
root_key, root_val = next(iter(data.items()))
|
||
root = root_val
|
||
else:
|
||
root = data
|
||
|
||
# Find the dict that *contains* the "chapter" key (any depth)
|
||
container_with_chapter = self._find_first_with_key(root, "chapter")
|
||
if not container_with_chapter:
|
||
# Fall back: sometimes structure uses "chapters"
|
||
container_with_chapter = self._find_first_with_key(root, "chapters")
|
||
if container_with_chapter and isinstance(container_with_chapter["chapters"], dict):
|
||
# Normalize "chapters" -> "chapter" if it’s nested like {"chapters": {"chapter": [...]}}
|
||
if "chapter" in container_with_chapter["chapters"]:
|
||
container_with_chapter = container_with_chapter["chapters"]
|
||
|
||
if not container_with_chapter or ("chapter" not in container_with_chapter):
|
||
# Give user a quick peek at available top-level keys to debug
|
||
preview_keys = list(root.keys()) if isinstance(root, dict) else type(root)
|
||
raise CommandError(
|
||
"Could not locate 'chapter' anywhere in the parsed XML. "
|
||
f"Top-level preview: {preview_keys}"
|
||
)
|
||
|
||
chapters = container_with_chapter.get("chapter")
|
||
if chapters is None:
|
||
raise CommandError("Found container for chapters, but 'chapter' is empty.")
|
||
|
||
# Optionally truncate
|
||
if opts["truncate"]:
|
||
self.stdout.write(self.style.WARNING("Truncating existing Icd10 data..."))
|
||
Icd10.objects.all().delete()
|
||
|
||
# Collect rows + parent links
|
||
self.stdout.write("Collecting ICD-10 rows...")
|
||
rows, parent_links = self._collect_rows(chapters)
|
||
|
||
self.stdout.write(self.style.SUCCESS(f"Collected {len(rows)} rows. Inserting..."))
|
||
|
||
BATCH = 1000
|
||
with transaction.atomic():
|
||
for i in range(0, len(rows), BATCH):
|
||
Icd10.objects.bulk_create(rows[i:i+BATCH], ignore_conflicts=True)
|
||
|
||
# Link parents
|
||
if parent_links:
|
||
self.stdout.write("Linking parents...")
|
||
code_to_obj = {o.code: o for o in Icd10.objects.only("id", "code")}
|
||
updates = []
|
||
for child_code, parent_code in parent_links:
|
||
child = code_to_obj.get(child_code)
|
||
parent = code_to_obj.get(parent_code)
|
||
if child and parent and child.parent_id != parent.id:
|
||
child.parent_id = parent.id
|
||
updates.append(child)
|
||
for i in range(0, len(updates), BATCH):
|
||
Icd10.objects.bulk_update(updates[i:i+BATCH], ["parent"])
|
||
self.stdout.write(self.style.SUCCESS(f"Linked {len(updates)} parent relations."))
|
||
|
||
self.stdout.write(self.style.SUCCESS("ICD-10 import completed successfully.")) |