#####################################################################################
##############getting subcortical volumes from subcortical segmentations#############
#Currently compatible with FreeSurfer's ASeg and FSL's FIRST segmentation outputs
import os
import ants
import re
import glob
import tempfile
import time
from typing import Optional, Union
from pathlib import Path
from subcortexmesh import template_data_fetch
import nibabel as nib
import numpy as np
[docs]
def subseg_getvol(
inputdir: Union[str, Path],
outputdir: Union[str, Path],
template: str,
toolboxdata: Optional[Union[str, Path]] = None,
overwrite: bool = True,
silent: bool = False,
):
"""Extracting and coregistering subcortical volumes from a cohort's subcortical volumes
This function reads through subjects of a FreeSurfer or FSL FIRST output preprocessing
directory and identifies subcortical segmentation volumes. Typically, such volumes are
"aseg.mgz" for FreeSurfer, and "*all_fast_firstseg" for FSL FIRST (plus, if applicable,
"*Cereb_first.nii.gz"). For each subject, subseg_getvol() uses antspy to coregister the
segmentation volumes to their respective template space (fsaverage/MNI305 for FreeSurfer,
MNI152 for FSL), and then extracts each subcortical region-of-interest separately.
T1 volumes are required for the coregistration, stored as [sub-ID]/mri/T1.mgz for
FreeSurfer and expected to be stored in the same "inputdir" path for FSL FIRST:
the function will search for files containing the sub-ID and the "*T1w.nii*" pattern for
each subject.
Parallel processes: to avoid conflicts, subjects will be skipped if a "isrunning" tmp
file exists to mark them as currently processing. The tmp file is removed at the end or
replaced if 1 hour old. If a process has been interrupted, remove the tmp manually to
rerun a subject before the 1 hour (its path is printed when flagged).
Note for (optional) FSL cerebellar segmentation: the cerebella need to be created inside
the same output directory as run_first_all's, naming them "*R_Cereb_first" and
"*L_Cereb_first". It can be done as follows (do the same for `L_Cereb`):
.. code-block:: bash
first_flirt [subject T1file] "[subject_directory]/[sub-id]" -cort
run_first -i [subject T1file] \\
-t "[subject_directory]/[sub-id_cort.mat]" \\
-n 40 \\
-o "[subject_directory]/[sub-id]-R_Cereb_first" \\
-m "${FSLDIR}/data/first/models_336_bin/intref_puta/R_Cereb.bmv" \\
-intref "${FSLDIR}/data/first/models_336_bin/05mm/R_Puta_05mm.bmv"
Parameters
----------
inputdir : str, Path
The path to a directory containing each individual subject's directory with
preprocessing data inside. For FreeSurfer, typically the $SUBJECTS_DIR given
to the recon-all pipeline. For FSL, a directory with one subdirectory per
subject, each receiving its output from run_first_all.
outputdir : str, Path
The path where subcortical volumes will be saved (creates the "sub_volumes"
directory).
toolboxdata : str, Path, optional
The path of the "subcortexmesh_data" package data directory. The default path
is assumed to be the user's home directory (pathlib's Path.home()). Users will
be prompted to download it if not found.
template: str
The name of the template the surfaces are supposed to be matching to. For FreeSurfer
outputs, it is 'fsaverage'. For FSL FIRST, it is 'fslfirst'.
overwrite : bool
Whether files are to be overwritten or skipped if already in outputdir. Default is True.
silent : bool
Whether messages about the process are to be silenced. Default is False.
"""
#template data is needed
toolboxdata=template_data_fetch(datapath=toolboxdata, template = template)
#file labelling norm
if template=='fsaverage':
seglabel='aseg'
else:
seglabel=template
#other paths checks
if not os.path.exists(f"{inputdir}"):
raise FileNotFoundError("Subjects directory not found. Please verify the path provided as inputdir.")
#creates folder where volumes will go
subvol_path = os.path.join(outputdir, "sub_volumes")
if not silent and not os.path.exists(subvol_path):
print(f"Writing the {outputdir}/sub_volumes directory...")
os.makedirs(subvol_path, exist_ok=True)
#get ROI indices into an array
#the aseg table was extracted using mri_segstats --seg on aseg.mgz, 7.4.1's $FREESURFER_HOME/subjects/fsaverage/mri/aseg.mgz
#the fslfirst table also used mri_segstats, on a .nii volume combining *all_fast_firstseg and L_Cereb_first, and R_Cereb_first, segmented from $FSLDIR/data/standard/MNI152_T1_1mm_brain.nii.gz.
#indices are in the second column (from lines with no #) printed
roi_indices = sorted({int(line.split()[1]) for line in open(f"{toolboxdata}/template_data/{template}/{seglabel}_labels.txt") if not line.startswith("#")})
#list subs (1st depth of inputdir, and only counted as sub folders with the right "sub-xxx" pattern)
pattern = r'(sub-[^_]+)'
sub_list = [f for f in next(os.walk(inputdir))[1] if re.search(pattern, f)]
sub_list.sort()
subindex=0
for subid in sub_list:
subindex=subindex+1
#unique tmp file to avoid parallel loop conflicts
fname = os.path.join(tempfile.gettempdir(), f"{subid}_isrunning_vol.tmp")
#if exists already, and tmp file is younger than 1h, skip subject
if os.path.exists(fname):
tmp_lifetime = (time.time() - os.path.getmtime(fname)) / 3600
if tmp_lifetime < 1:
if not silent:
print(f"{subid} already running (tmp file: {fname}).")
continue
#creates tmp if loop wasn't skipped
with open(fname, "w"):
pass
#subject id reformatted and limited to sub-xxx for SCM
newsubid=re.search(pattern, subid).group(1)
if not silent:
print(f"Processing {subid} ... [{subindex}/{len(sub_list)}]")
#####################################################################################
##################Coregistration of subject's ROI to template#$$#####################
if not os.path.exists(f"{inputdir}/{subid}/mri/aseg.mgz") and template=='fsaverage':
if not silent:
print(f"{subid} has no aseg.mgz file in its mri/ directory.")
continue
if not glob.glob(f"{inputdir}/{subid}/*all_fast_firstseg.nii.gz") and template=='fslfirst':
if not silent:
print(f"{subid} has no *all_fast_firstseg.nii.gz file in the subject directory.")
continue
#will make a inputdirectory called "ants" in the freesurfer output for the warped volumes for tidiness
antsdir=f"{outputdir}/sub_volumes/{newsubid}/ants_coreg"
os.makedirs(os.path.join(f"{antsdir}"), exist_ok=True)
#check if coregistration done already
segvol=None #default in case coregistration fails
if not os.path.exists(f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz") or overwrite:
#resample subject's own segmentations to fsaverage space for alignment
if not silent:
print(f"Coregistering {subid}'s segmentations to the {template} template before volumes are extracted...")
#For FreeSurfer, T1.mgz converted to nii.gz as ANTS uses those
if template=='fsaverage':
nib.save(nib.load(f"{inputdir}/{subid}/mri/aseg.mgz"), f"{antsdir}/aseg.nii.gz")
nib.save(nib.load(f"{inputdir}/{subid}/mri/T1.mgz"), f"{antsdir}/T1.nii.gz")
subvol=f"{antsdir}/aseg.nii.gz"
T1vol=f"{antsdir}/T1.nii.gz"
#template imported from 7.4.1's $FREESURFER_HOME/subjects/fsaverage/mri/T1.mgz
templatevol=f"{toolboxdata}/template_data/fsaverage/T1.mgz"
#For FSL, since data tree is not standard, T1 is searched for in the inputdir or
#1st depth subdirectories. The first one to be found is used
if template=='fslfirst':
subvol=(glob.glob(f"{inputdir}/*{subid}*all_fast_firstseg.nii.gz") or \
glob.glob(f"{inputdir}/*/*{subid}*all_fast_firstseg.nii.gz", recursive=True))
if subvol != []:
subvol=subvol[0]
else:
if not silent:
print(f"{subid} has no segmentation file (*all_fast_firstseg.nii.gz) file in inputdir.")
continue
T1vol=(glob.glob(f"{inputdir}/*{subid}*T1w.nii*") or \
glob.glob(f"{inputdir}/*/*{subid}*T1w.nii*", recursive=True))
if T1vol != []:
T1vol=T1vol[0]
else:
if not silent:
print(f"{subid} has no T1w file (*T1w.nii*) file in inputdir.")
continue
templatevol=f"{toolboxdata}/template_data/fslfirst/MNI152_T1_1mm_brain.nii.gz"
#If present, will include cerebellar volumes
cerebvol=(glob.glob(f"{inputdir}/*{subid}*Cereb_first.nii*") or \
glob.glob(f"{inputdir}/*/*{subid}*Cereb_first.nii*", recursive=True))
if cerebvol != []:
if not silent:
print('Cerebellar volumes found. Including their segmentations...')
#FSL's masking out of cerebellar ROIs (nibabel way is faster)
#max_args = " ".join([f"-max {f}" for f in cerebvol]) #depends on whether 1 or 2 are present
#_ = os.system(f"fslmaths {subvol} {max_args} {os.path.dirname(subvol)}/all_fast_firstseg_wcereb.nii.gz")
#subvol=f"{os.path.dirname(subvol)}/all_fast_firstseg_wcereb.nii.gz"
#Merges cerebellar volumes to the main segmentation volume
subvol_img = nib.load(subvol)
combined = subvol_img.get_fdata().copy()
for f in cerebvol:
combined = np.maximum(combined, nib.load(f).get_fdata())
#header data from cerebellar segmentations differs, so needs reformatting
combined = np.round(combined).astype(np.int16)
clean_header = subvol_img.header.copy()
clean_header.set_data_dtype(np.int16)
clean_header['scl_slope'] = 1.0
clean_header['scl_inter'] = 0.0
subvol = f"{os.path.dirname(subvol)}/all_fast_firstseg_wcereb.nii.gz"
nib.save(nib.Nifti1Image(combined, subvol_img.affine, subvol_img.header), subvol)
if not os.path.exists(f"{T1vol}"):
if not silent:
print(f"No T1 .nii file found for '{subid}'. It is needed for the coregistration.")
elif not os.path.exists(f"{subvol}"):
if not silent:
print(f"No subcortical segmentation file found for '{subid}'.")
else:
#register subject's T1 to faverage, and use that warp to register segmentations
#apply rigid (-t r) transformation to preserve actual size of ROI
fixedT1=ants.image_read(filename=templatevol, dimension=3)
movingT1=ants.image_read(filename=T1vol, dimension=3)
mytx=ants.registration(fixed=fixedT1, moving=movingT1, type_of_transform="Rigid", outprefix=f"{antsdir}/T1_to_template_rigid")
movingaseg=ants.image_read(filename=subvol, dimension=3)
#applying transformation matrix on the segmentation volume
warpedimg=ants.apply_transforms(fixed=fixedT1, moving=movingaseg, transformlist=mytx['fwdtransforms'], interpolator="multiLabel")
_ = ants.image_write(warpedimg, f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz")
#Save a coregistered T1 too for later surface-to-T1 matching
warpedT1 = ants.apply_transforms(fixed=fixedT1, moving=movingT1,
transformlist=mytx['fwdtransforms'], interpolator="linear")
ants.image_write(warpedT1, f"{antsdir}/T1_{template}_rigid_coreg.nii.gz")
#to visualise:
#os.system(f"freeview -v {templatevol} {antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz:colormap=LUT")
#error if registered volumes did not output successfully
if os.path.exists(f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz"):
if not silent:
print(f" => Done: coregistered volume stored to {antsdir}")
segvol=f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz"
else:
if not silent:
print(f"Coregistration of {seglabel} for {subid} failed. Subcortical volumes will not be outputted.")
else:
segvol=f"{antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz"
#####################################################################################
##################Saving each ROI as separate volumes################################
#if coregistration worked, separating each ROI from the coregistered aseg
if segvol is not None and os.path.exists(segvol):
#get volumes of each aseg subcortical region separately
#read through lookup table, which contains aseg ROI labels, and extract their volume masks independently
if template=='fsaverage':
#7.4.1's "$FREESURFER_HOME/FreeSurferColorLUT.txt"
lookuptable=f"{toolboxdata}/template_data/fsaverage/FreeSurferColorLUT.txt"
#indices for ROIs we don't care about (manually checked in the LUT what they referred to)
skip_indices={0, 2, 3, 4, 5, 6, 7, 14, 15, 24, 30, 31, 41, 42, 43, 44, 45, 46, 62, 63, 77, 85, 251, 252, 253, 254, 255}
if template=='fslfirst':
#custom ROI indices table and lookup table, adapted and based on the FreeSurfer LUT
lookuptable=f"{toolboxdata}/template_data/fslfirst/fslfirstColorLUT.txt"
skip_indices={107,147}
#Look into segmentation data and pick which match roi index
niivol = nib.load(segvol)
seg_data = np.asarray(niivol.dataobj)
present_indices = set(np.unique(seg_data).astype(int)) #indices actually present
nib.openers.Opener.default_compresslevel = 9 #size comparable to mri_convert's outputs
with open(lookuptable, "r", encoding="utf-8") as f:
for line in f:
line = line.rstrip("\n")
# Skip empty lines or comments
if not line or line.startswith("#"):
continue
#get index (first column)
parts = line.split()
index = int(parts[0])
#stop once index reaches 251 (subsequent ROI labels ignored)
if index >= 251:
break
#if index not among indices found in aseg.mgz,skip
if index not in roi_indices:
continue
if index in skip_indices:
continue
if index not in present_indices:
continue
#get ROI name
label = parts[1] if len(parts) > 1 else None
#Check if volume was extracted already
if not os.path.exists(f"{outputdir}/sub_volumes/{newsubid}/{label}.nii.gz") or overwrite:
if not silent:
print(f" => Extracting {label} ...")
#Nibabel is much faster than freesurfer commands (which require mgz to nii
#conversions)
roi_data = np.where(seg_data == index, index, 0)
nib.save(nib.Nifti1Image(roi_data, niivol.affine, niivol.header),
f"{outputdir}/sub_volumes/{newsubid}/{label}.nii.gz")
else:
print(f"The coregistered T1 may have failed to be computed. Expected file: {antsdir}/{seglabel}_{template}_rigid_coreg.nii.gz")
os.remove(fname) #cleanup tmp file