Source code for subcortexmesh.subseg_getvol

#####################################################################################
##############getting subcortical volumes from subcortical segmentations#############

#Currently compatible with FreeSurfer's ASeg and FSL's FIRST segmentation outputs

import os
import ants
import re
import glob
import tempfile
import time
from typing import Optional, Union
from pathlib import Path
from subcortexmesh import template_data_fetch
import nibabel as nib
import numpy as np
                            
[docs] def subseg_getvol( inputdir: Union[str, Path], outputdir: Union[str, Path], template: str, toolboxdata: Optional[Union[str, Path]] = None, overwrite: bool = True, silent: bool = False, ): """Extracting and coregistering subcortical volumes from a cohort's subcortical volumes This function reads through subjects of a FreeSurfer or FSL FIRST output preprocessing directory and identifies subcortical segmentation volumes. Typically, such volumes are "aseg.mgz" for FreeSurfer, and "*all_fast_firstseg" for FSL FIRST (plus, if applicable, "*Cereb_first.nii.gz"). For each subject, subseg_getvol() uses antspy to coregister the segmentation volumes to their respective template space (fsaverage/MNI305 for FreeSurfer, MNI152 for FSL), and then extracts each subcortical region-of-interest separately. T1 volumes are required for the coregistration, stored as [sub-ID]/mri/T1.mgz for FreeSurfer and expected to be stored in the same "inputdir" path for FSL FIRST: the function will search for files containing the sub-ID and the "*T1w.nii*" pattern for each subject. Parallel processes: to avoid conflicts, subjects will be skipped if a "isrunning" tmp file exists to mark them as currently processing. The tmp file is removed at the end or replaced if 1 hour old. If a process has been interrupted, remove the tmp manually to rerun a subject before the 1 hour (its path is printed when flagged). Note for (optional) FSL cerebellar segmentation: the cerebella need to be created inside the same output directory as run_first_all's, naming them "*R_Cereb_first" and "*L_Cereb_first". It can be done as follows (do the same for `L_Cereb`): .. code-block:: bash first_flirt [subject T1file] "[subject_directory]/[sub-id]" -cort run_first -i [subject T1file] \\ -t "[subject_directory]/[sub-id_cort.mat]" \\ -n 40 \\ -o "[subject_directory]/[sub-id]-R_Cereb_first" \\ -m "${FSLDIR}/data/first/models_336_bin/intref_puta/R_Cereb.bmv" \\ -intref "${FSLDIR}/data/first/models_336_bin/05mm/R_Puta_05mm.bmv" Parameters ---------- inputdir : str, Path The path to a directory containing each individual subject's directory with preprocessing data inside. For FreeSurfer, typically the $SUBJECTS_DIR given to the recon-all pipeline. For FSL, a directory with one subdirectory per subject, each receiving its output from run_first_all. outputdir : str, Path The path where subcortical volumes will be saved (creates the "sub_volumes" directory). toolboxdata : str, Path, optional The path of the "subcortexmesh_data" package data directory. The default path is assumed to be the user's home directory (pathlib's Path.home()). Users will be prompted to download it if not found. template: str The name of the template the surfaces are supposed to be matching to. For FreeSurfer outputs, it is 'fsaverage'. For FSL FIRST, it is 'fslfirst'. overwrite : bool Whether files are to be overwritten or skipped if already in outputdir. Default is True. silent : bool Whether messages about the process are to be silenced. Default is False. """ #template data is needed toolboxdata=template_data_fetch(datapath=toolboxdata, template = template) #file labelling norm if template=='fsaverage': seglabel='aseg' else: seglabel=template #other paths checks if not os.path.exists(f"{inputdir}"): raise FileNotFoundError("Subjects directory not found. Please verify the path provided as inputdir.") #creates folder where volumes will go subvol_path = os.path.join(outputdir, "sub_volumes") if not silent and not os.path.exists(subvol_path): print(f"Writing the {outputdir}/sub_volumes directory...") os.makedirs(subvol_path, exist_ok=True) #get ROI indices into an array #the aseg table was extracted using mri_segstats --seg on aseg.mgz, 7.4.1's $FREESURFER_HOME/subjects/fsaverage/mri/aseg.mgz #the fslfirst table also used mri_segstats, on a .nii volume combining *all_fast_firstseg and L_Cereb_first, and R_Cereb_first, segmented from $FSLDIR/data/standard/MNI152_T1_1mm_brain.nii.gz. #indices are in the second column (from lines with no #) printed roi_indices = sorted({int(line.split()[1]) for line in open(f"{toolboxdata}/template_data/{template}/{seglabel}_labels.txt") if not line.startswith("#")}) #list subs (1st depth of inputdir, and only counted as sub folders with the right "sub-xxx" pattern) pattern = r'(sub-[^_]+)' sub_list = [f for f in next(os.walk(inputdir))[1] if re.search(pattern, f)] sub_list.sort() subindex=0 for subid in sub_list: subindex=subindex+1 #unique tmp file to avoid parallel loop conflicts fname = os.path.join(tempfile.gettempdir(), f"{subid}_isrunning_vol.tmp") #if exists already, and tmp file is younger than 1h, skip subject if os.path.exists(fname): tmp_lifetime = (time.time() - os.path.getmtime(fname)) / 3600 if tmp_lifetime < 1: if not silent: print(f"{subid} already running (tmp file: {fname}).") continue #creates tmp if loop wasn't skipped with open(fname, "w"): pass #subject id reformatted and limited to sub-xxx for SCM newsubid=re.search(pattern, subid).group(1) if not silent: print(f"Processing {subid} ... [{subindex}/{len(sub_list)}]") ##################################################################################### ##################Coregistration of subject's ROI to template#$$##################### if not os.path.exists(f"{inputdir}/{subid}/mri/aseg.mgz") and template=='fsaverage': if not silent: print(f"{subid} has no aseg.mgz file in its mri/ directory.") continue if not glob.glob(f"{inputdir}/{subid}/*all_fast_firstseg.nii.gz") and template=='fslfirst': if not silent: print(f"{subid} has no *all_fast_firstseg.nii.gz file in the subject directory.") continue #will make a inputdirectory called "ants" in the freesurfer output for the warped volumes for tidiness antsdir=f"{outputdir}/sub_volumes/{newsubid}/ants_coreg" os.makedirs(os.path.join(f"{antsdir}"), exist_ok=True) #check if coregistration done already segvol=None #default in case coregistration fails if not os.path.exists(f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz") or overwrite: #resample subject's own segmentations to fsaverage space for alignment if not silent: print(f"Coregistering {subid}'s segmentations to the {template} template before volumes are extracted...") #For FreeSurfer, T1.mgz converted to nii.gz as ANTS uses those if template=='fsaverage': nib.save(nib.load(f"{inputdir}/{subid}/mri/aseg.mgz"), f"{antsdir}/aseg.nii.gz") nib.save(nib.load(f"{inputdir}/{subid}/mri/T1.mgz"), f"{antsdir}/T1.nii.gz") subvol=f"{antsdir}/aseg.nii.gz" T1vol=f"{antsdir}/T1.nii.gz" #template imported from 7.4.1's $FREESURFER_HOME/subjects/fsaverage/mri/T1.mgz templatevol=f"{toolboxdata}/template_data/fsaverage/T1.mgz" #For FSL, since data tree is not standard, T1 is searched for in the inputdir or #1st depth subdirectories. The first one to be found is used if template=='fslfirst': subvol=(glob.glob(f"{inputdir}/*{subid}*all_fast_firstseg.nii.gz") or \ glob.glob(f"{inputdir}/*/*{subid}*all_fast_firstseg.nii.gz", recursive=True)) if subvol != []: subvol=subvol[0] else: if not silent: print(f"{subid} has no segmentation file (*all_fast_firstseg.nii.gz) file in inputdir.") continue T1vol=(glob.glob(f"{inputdir}/*{subid}*T1w.nii*") or \ glob.glob(f"{inputdir}/*/*{subid}*T1w.nii*", recursive=True)) if T1vol != []: T1vol=T1vol[0] else: if not silent: print(f"{subid} has no T1w file (*T1w.nii*) file in inputdir.") continue templatevol=f"{toolboxdata}/template_data/fslfirst/MNI152_T1_1mm_brain.nii.gz" #If present, will include cerebellar volumes cerebvol=(glob.glob(f"{inputdir}/*{subid}*Cereb_first.nii*") or \ glob.glob(f"{inputdir}/*/*{subid}*Cereb_first.nii*", recursive=True)) if cerebvol != []: if not silent: print('Cerebellar volumes found. Including their segmentations...') #FSL's masking out of cerebellar ROIs (nibabel way is faster) #max_args = " ".join([f"-max {f}" for f in cerebvol]) #depends on whether 1 or 2 are present #_ = os.system(f"fslmaths {subvol} {max_args} {os.path.dirname(subvol)}/all_fast_firstseg_wcereb.nii.gz") #subvol=f"{os.path.dirname(subvol)}/all_fast_firstseg_wcereb.nii.gz" #Merges cerebellar volumes to the main segmentation volume subvol_img = nib.load(subvol) combined = subvol_img.get_fdata().copy() for f in cerebvol: combined = np.maximum(combined, nib.load(f).get_fdata()) #header data from cerebellar segmentations differs, so needs reformatting combined = np.round(combined).astype(np.int16) clean_header = subvol_img.header.copy() clean_header.set_data_dtype(np.int16) clean_header['scl_slope'] = 1.0 clean_header['scl_inter'] = 0.0 subvol = f"{os.path.dirname(subvol)}/all_fast_firstseg_wcereb.nii.gz" nib.save(nib.Nifti1Image(combined, subvol_img.affine, subvol_img.header), subvol) if not os.path.exists(f"{T1vol}"): if not silent: print(f"No T1 .nii file found for '{subid}'. It is needed for the coregistration.") elif not os.path.exists(f"{subvol}"): if not silent: print(f"No subcortical segmentation file found for '{subid}'.") else: #register subject's T1 to faverage, and use that warp to register segmentations #apply rigid (-t r) transformation to preserve actual size of ROI fixedT1=ants.image_read(filename=templatevol, dimension=3) movingT1=ants.image_read(filename=T1vol, dimension=3) mytx=ants.registration(fixed=fixedT1, moving=movingT1, type_of_transform="Rigid", outprefix=f"{antsdir}/T1_to_template_rigid") movingaseg=ants.image_read(filename=subvol, dimension=3) #applying transformation matrix on the segmentation volume warpedimg=ants.apply_transforms(fixed=fixedT1, moving=movingaseg, transformlist=mytx['fwdtransforms'], interpolator="multiLabel") _ = ants.image_write(warpedimg, f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz") #Save a coregistered T1 too for later surface-to-T1 matching warpedT1 = ants.apply_transforms(fixed=fixedT1, moving=movingT1, transformlist=mytx['fwdtransforms'], interpolator="linear") ants.image_write(warpedT1, f"{antsdir}/T1_{template}_rigid_coreg.nii.gz") #to visualise: #os.system(f"freeview -v {templatevol} {antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz:colormap=LUT") #error if registered volumes did not output successfully if os.path.exists(f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz"): if not silent: print(f" => Done: coregistered volume stored to {antsdir}") segvol=f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz" else: if not silent: print(f"Coregistration of {seglabel} for {subid} failed. Subcortical volumes will not be outputted.") else: segvol=f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz" ##################################################################################### ##################Saving each ROI as separate volumes################################ #if coregistration worked, separating each ROI from the coregistered aseg if segvol is not None and os.path.exists(segvol): #get volumes of each aseg subcortical region separately #read through lookup table, which contains aseg ROI labels, and extract their volume masks independently if template=='fsaverage': #7.4.1's "$FREESURFER_HOME/FreeSurferColorLUT.txt" lookuptable=f"{toolboxdata}/template_data/fsaverage/FreeSurferColorLUT.txt" #indices for ROIs we don't care about (manually checked in the LUT what they referred to) skip_indices={0, 2, 3, 4, 5, 6, 7, 14, 15, 24, 30, 31, 41, 42, 43, 44, 45, 46, 62, 63, 77, 85, 251, 252, 253, 254, 255} if template=='fslfirst': #custom ROI indices table and lookup table, adapted and based on the FreeSurfer LUT lookuptable=f"{toolboxdata}/template_data/fslfirst/fslfirstColorLUT.txt" skip_indices={107,147} #Look into segmentation data and pick which match roi index niivol = nib.load(segvol) seg_data = np.asarray(niivol.dataobj) present_indices = set(np.unique(seg_data).astype(int)) #indices actually present nib.openers.Opener.default_compresslevel = 9 #size comparable to mri_convert's outputs with open(lookuptable, "r", encoding="utf-8") as f: for line in f: line = line.rstrip("\n") # Skip empty lines or comments if not line or line.startswith("#"): continue #get index (first column) parts = line.split() index = int(parts[0]) #stop once index reaches 251 (subsequent ROI labels ignored) if index >= 251: break #if index not among indices found in aseg.mgz,skip if index not in roi_indices: continue if index in skip_indices: continue if index not in present_indices: continue #get ROI name label = parts[1] if len(parts) > 1 else None #Check if volume was extracted already if not os.path.exists(f"{outputdir}/sub_volumes/{newsubid}/{label}.nii.gz") or overwrite: if not silent: print(f" => Extracting {label} ...") #Nibabel is much faster than freesurfer commands (which require mgz to nii #conversions) roi_data = np.where(seg_data == index, index, 0) nib.save(nib.Nifti1Image(roi_data, niivol.affine, niivol.header), f"{outputdir}/sub_volumes/{newsubid}/{label}.nii.gz") else: print(f"The coregistered T1 may have failed to be computed. Expected file: {antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz") os.remove(fname) #cleanup tmp file