From fb2a8cae58d5f428da59a00625fe718e8786833d Mon Sep 17 00:00:00 2001 From: john Date: Fri, 16 May 2025 13:48:19 +0000 Subject: [PATCH] Add analyize_discovery.py --- analyize_discovery.py | 79 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 analyize_discovery.py diff --git a/analyize_discovery.py b/analyize_discovery.py new file mode 100644 index 0000000..3c07be4 --- /dev/null +++ b/analyize_discovery.py @@ -0,0 +1,79 @@ +import tkinter as tk +from tkinter import filedialog, ttk +import pandas as pd +import os + +def load_and_analyze_file(): + file_path = filedialog.askopenfilename(title="Select DICOM Discovery Output File", filetypes=[("Text Files", "*.txt")]) + if not file_path: + return + + df = pd.read_csv(file_path, delimiter='|', dtype=str).fillna('') + + # Normalize modalities to upper case and flatten list-like strings + def normalize_modalities(val): + if val.startswith("[") and val.endswith("]"): + try: + return ", ".join(eval(val)) + except: + return val + return val + + df['ModalitiesInStudy'] = df['ModalitiesInStudy'].apply(normalize_modalities) + + # Prepare metrics + stats = { + "Distinct StudyInstanceUIDs": df['StudyInstanceUID'].nunique(), + "Distinct AccessionNumbers": df['AccessionNumber'].nunique(), + "AccessionNumbers with 2 StudyInstanceUIDs": (df.groupby('AccessionNumber')['StudyInstanceUID'].nunique() == 2).sum(), + "AccessionNumbers with 3 StudyInstanceUIDs": (df.groupby('AccessionNumber')['StudyInstanceUID'].nunique() == 3).sum(), + "AccessionNumbers with >3 StudyInstanceUIDs": (df.groupby('AccessionNumber')['StudyInstanceUID'].nunique() > 3).sum(), + "Missing AccessionNumbers": (df['AccessionNumber'] == '').sum(), + "Missing StudyDescriptions": (df['StudyDescription'] == '').sum(), + "*QA* in PatientID": df['PatientID'].str.contains('QA', case=False, na=False).sum(), + "*QA* in StudyDescription": df['StudyDescription'].str.contains('QA', case=False, na=False).sum(), + "*QA* in PatientName": df['PatientName'].str.contains('QA', case=False, na=False).sum(), + "*TEST* in PatientID": df['PatientID'].str.contains('TEST', case=False, na=False).sum(), + "*TEST* in StudyDescription": df['StudyDescription'].str.contains('TEST', case=False, na=False).sum(), + "*TEST* in PatientName": df['PatientName'].str.contains('TEST', case=False, na=False).sum() + } + + # Modality breakdown + modality_counts = { + "CR": 0, "DX": 0, "CT": 0, "MR": 0, "NM": 0, "US": 0, "XA": 0, "PR": 0, "SC": 0, "OT": 0, "Other": 0 + } + + for val in df['ModalitiesInStudy']: + for modality in [m.strip().upper() for m in val.split(',') if m.strip()]: + if modality in modality_counts: + modality_counts[modality] += 1 + else: + modality_counts["Other"] += 1 + + # Display results + output_window = tk.Toplevel(root) + output_window.title("DICOM Discovery Analysis") + + row = 0 + for key, val in stats.items(): + tk.Label(output_window, text=f"{key}:").grid(row=row, column=0, sticky='w', padx=5, pady=2) + tk.Label(output_window, text=f"{val}").grid(row=row, column=1, sticky='w', padx=5, pady=2) + row += 1 + + tk.Label(output_window, text="Modality Breakdown:").grid(row=row, column=0, sticky='w', padx=5, pady=10) + row += 1 + for modality, count in modality_counts.items(): + tk.Label(output_window, text=f"{modality}:").grid(row=row, column=0, sticky='w', padx=20, pady=2) + tk.Label(output_window, text=f"{count}").grid(row=row, column=1, sticky='w', padx=5, pady=2) + row += 1 + +# GUI setup +root = tk.Tk() +root.title("DICOM Discovery Analyzer") + +frame = tk.Frame(root, padx=10, pady=10) +frame.pack() + +ttk.Button(frame, text="Select and Analyze Output File", command=load_and_analyze_file).pack() + +root.mainloop()