Add analyize_discovery.py

This commit is contained in:
2025-05-16 13:48:19 +00:00
parent f564c26919
commit fb2a8cae58

79
analyize_discovery.py Normal file
View File

@@ -0,0 +1,79 @@
import tkinter as tk
from tkinter import filedialog, ttk
import pandas as pd
import os
def load_and_analyze_file():
file_path = filedialog.askopenfilename(title="Select DICOM Discovery Output File", filetypes=[("Text Files", "*.txt")])
if not file_path:
return
df = pd.read_csv(file_path, delimiter='|', dtype=str).fillna('')
# Normalize modalities to upper case and flatten list-like strings
def normalize_modalities(val):
if val.startswith("[") and val.endswith("]"):
try:
return ", ".join(eval(val))
except:
return val
return val
df['ModalitiesInStudy'] = df['ModalitiesInStudy'].apply(normalize_modalities)
# Prepare metrics
stats = {
"Distinct StudyInstanceUIDs": df['StudyInstanceUID'].nunique(),
"Distinct AccessionNumbers": df['AccessionNumber'].nunique(),
"AccessionNumbers with 2 StudyInstanceUIDs": (df.groupby('AccessionNumber')['StudyInstanceUID'].nunique() == 2).sum(),
"AccessionNumbers with 3 StudyInstanceUIDs": (df.groupby('AccessionNumber')['StudyInstanceUID'].nunique() == 3).sum(),
"AccessionNumbers with >3 StudyInstanceUIDs": (df.groupby('AccessionNumber')['StudyInstanceUID'].nunique() > 3).sum(),
"Missing AccessionNumbers": (df['AccessionNumber'] == '').sum(),
"Missing StudyDescriptions": (df['StudyDescription'] == '').sum(),
"*QA* in PatientID": df['PatientID'].str.contains('QA', case=False, na=False).sum(),
"*QA* in StudyDescription": df['StudyDescription'].str.contains('QA', case=False, na=False).sum(),
"*QA* in PatientName": df['PatientName'].str.contains('QA', case=False, na=False).sum(),
"*TEST* in PatientID": df['PatientID'].str.contains('TEST', case=False, na=False).sum(),
"*TEST* in StudyDescription": df['StudyDescription'].str.contains('TEST', case=False, na=False).sum(),
"*TEST* in PatientName": df['PatientName'].str.contains('TEST', case=False, na=False).sum()
}
# Modality breakdown
modality_counts = {
"CR": 0, "DX": 0, "CT": 0, "MR": 0, "NM": 0, "US": 0, "XA": 0, "PR": 0, "SC": 0, "OT": 0, "Other": 0
}
for val in df['ModalitiesInStudy']:
for modality in [m.strip().upper() for m in val.split(',') if m.strip()]:
if modality in modality_counts:
modality_counts[modality] += 1
else:
modality_counts["Other"] += 1
# Display results
output_window = tk.Toplevel(root)
output_window.title("DICOM Discovery Analysis")
row = 0
for key, val in stats.items():
tk.Label(output_window, text=f"{key}:").grid(row=row, column=0, sticky='w', padx=5, pady=2)
tk.Label(output_window, text=f"{val}").grid(row=row, column=1, sticky='w', padx=5, pady=2)
row += 1
tk.Label(output_window, text="Modality Breakdown:").grid(row=row, column=0, sticky='w', padx=5, pady=10)
row += 1
for modality, count in modality_counts.items():
tk.Label(output_window, text=f"{modality}:").grid(row=row, column=0, sticky='w', padx=20, pady=2)
tk.Label(output_window, text=f"{count}").grid(row=row, column=1, sticky='w', padx=5, pady=2)
row += 1
# GUI setup
root = tk.Tk()
root.title("DICOM Discovery Analyzer")
frame = tk.Frame(root, padx=10, pady=10)
frame.pack()
ttk.Button(frame, text="Select and Analyze Output File", command=load_and_analyze_file).pack()
root.mainloop()