"""DCE modality class."""
import warnings
import numpy as np
import SimpleITK as sitk
from datetime import datetime
from scipy.interpolate import interp1d
from .temporal_modality import TemporalModality
from ..utils import find_nearest
from ..utils.validation import check_path_data
[docs]class DCEModality(TemporalModality):
"""Class to handle DCE-MRI modality.
Parameters
----------
path_data : str or None, optional (default=None)
Path where the data are located. Can also be specified
when reading the data. It will be overidden with this function.
Attributes
----------
path_data_ : str
Location of the data.
data_ : ndarray, shape (T, Y, X, Z)
The different volume of the DCE serie. The data are saved in
T, Y, X, Z ordered.
metadata_ : dict
A dictionary containing the metadata related to the DCE serie.
pdf_series_ : list of ndarray, length (n_serie)
List of the PDF for each serie.
bin_series_ : list of ndarray, length (n_serie)
List of the bins used to plot the pdfs.
max_series_ : float
Maximum intensity of all the DCE series.
min_series_ : float
Minimum intensity of all the DCE series.
n_serie_ : int
Number of serie in this DCE sequence.
max_series_list_ : list of float
List of the maximum intensity for each DCE serie.
min_series_list_ : list of float
List of the minimum intensity for each DCE serie.
time_info_ : ndarray, shape (n_serie, )
Array containing the time information of the acquisition time
in seconds
"""
[docs] def __init__(self, path_data=None):
super(DCEModality, self).__init__(path_data=path_data)
[docs] def get_pdf_list(self, roi_data=None, nb_bins='auto'):
""" Extract the a list of pdf related with the data.
Parameters
----------
roi_data : tuple
Indices of elements to consider while computing the histogram.
The ROI is a 3D volume which will be used for each time serie.
nb_bins : list of int or str, optional (default='auto')
The numbers of bins to use to compute the histogram.
The possibilities are:
- If 'auto', the number of bins is found at fitting time.
- Otherwise, a list of integer needs to be given.
Returns
-------
pdf_list : list of ndarray, length (n_serie)
List of the pdf with the associated series.
bin_list : list of ndarray, length (n_series + 1)
List of the bins associated with the list of pdf.
"""
# Check that the data have been read
if self.data_ is None:
raise ValueError('You need to load the data first. Refer to the'
' function read_data_from_path().')
# Check if we have to auto compute the range of the histogram
# Check that we have a proper list of bins
if isinstance(nb_bins, basestring):
if nb_bins == 'auto':
nb_bins = []
for data_serie in self.data_:
if roi_data is None:
nb_bins.append(int(np.round(
np.ndarray.max(data_serie) -
np.ndarray.min(data_serie))))
else:
nb_bins.append(int(np.round(
np.ndarray.max(data_serie[roi_data]) -
np.ndarray.min(data_serie[roi_data]))))
else:
raise ValueError('Wrong string argument to specify the'
' number of bins.')
elif isinstance(nb_bins, list):
if (len(nb_bins) != len(self.data_) or
not all(isinstance(x, int) for x in nb_bins)):
raise ValueError('Provide a list of number of bins with'
' the same size as the number of serie in'
' the data.')
else:
raise ValueError('Unknown arguments for `nb_bins`.')
pdf_list = []
bin_list = []
# Go through each serie to compute the pdfs
for data_serie, bins in zip(self.data_, nb_bins):
if roi_data is None:
pdf_s, bin_s = np.histogram(data_serie,
bins=bins,
density=True)
else:
pdf_s, bin_s = np.histogram(data_serie[roi_data],
bins=bins,
density=True)
pdf_list.append(pdf_s)
bin_list.append(bin_s)
return pdf_list, bin_list
[docs] def update_histogram(self, nb_bins=None):
"""Update the histogram of each serie and first-order statistics.
Parameters
----------
nb_bins : list of int, str, or None, optional (default=None)
The numbers of bins to use to compute the histogram.
The possibilities are:
- If None, the number of bins found at reading will be used.
- If 'auto', the number of bins is found at fitting time.
- Otherwise, a list of integer needs to be given.
Returns
-------
self : object
Returns self.
"""
# Check if the data have been read
if self.data_ is None:
raise ValueError('You need to read the data first. Call the'
' function read_data_from_path()')
# Compute the min and max from all DCE series
self.max_series_ = np.ndarray.max(self.data_)
self.min_series_ = np.ndarray.min(self.data_)
# For each serie compute the pdfs and store them
pdf_series = []
bin_series = []
min_series_list = []
max_series_list = []
# Check that we have a proper list of bins
if isinstance(nb_bins, basestring):
if nb_bins == 'auto':
nb_bins = []
for data_serie in self.data_:
nb_bins.append(int(np.round(np.ndarray.max(data_serie) -
np.ndarray.min(data_serie))))
else:
raise ValueError('Unknown string for `nb_bins`.')
elif isinstance(nb_bins, list):
if (len(nb_bins) != len(self.data_) or
not all(isinstance(x, int) for x in nb_bins)):
raise ValueError('Provide a list of integer of bins'
' with the same size as the number'
' of serie in the data.')
elif nb_bins is None:
nb_bins = self.nb_bins_
else:
raise ValueError('Unknown arguments for `nb_bins`.')
for data_serie, bins in zip(self.data_, nb_bins):
pdf_s, bin_s = np.histogram(data_serie,
bins=bins,
density=True)
pdf_series.append(pdf_s)
bin_series.append(bin_s)
min_series_list.append(np.ndarray.min(data_serie))
max_series_list.append(np.ndarray.max(data_serie))
# Keep these data in the object
self.pdf_series_ = pdf_series
self.bin_series_ = bin_series
self.min_series_list_ = min_series_list
self.max_series_list_ = max_series_list
return self
[docs] def build_heatmap(self, roi_data=None, nb_bins='auto'):
"""Function which return a heatmap using the pdf of each serie.
Parameters
----------
roi_data : tuple
Indices of elements to consider while computing the histogram.
The ROI is a 3D volume which will be used for each time serie.
nb_bins : int, str or None, optional (default=None)
The numbers of bins to use to compute the histogram.
The possibilities are:
- If 'None', the number of bins found at reading will be used.
- If 'auto', the number of bins is found at fitting time.
- Otherwise, an integer needs to be given.
Returns
-------
heatmap : ndarray, shape (n_serie, intensity_range)
Return an heatmap of the different pdfs. This equivalent to
pdf_series_ but properly shifted inside an array.
bins_heatmap : ndarray, shape (intensity_range, )
Returns the bins associated with the heatmap.
"""
# Check that the data have been read
if self.data_ is None:
raise ValueError('You need to load the data first. Refer to the'
' function read_data_from_path().')
if nb_bins is None:
# We will used the value found during the data reading
nb_bins = self.nb_bins_
# Compute the list of pdfs
pdf_list, bins_list = self.get_pdf_list(roi_data=roi_data,
nb_bins=nb_bins)
# We will take the center for each bins
center_bins_list = []
for bins_serie in bins_list:
center_bins_list.append((bins_serie[:-1] + bins_serie[1:]) / 2.)
# Find the extrema
max_series = []
min_series = []
ratio_bins = []
for bins_serie in center_bins_list:
# Find the max and min
min_series.append(np.ndarray.min(bins_serie))
max_series.append(np.ndarray.max(bins_serie))
# Compute the width of each bin
ratio_bins.append((np.ndarray.max(bins_serie) -
np.ndarray.min(bins_serie)) /
float(bins_serie.size))
# Allocate the data for the heatmap
bins_heatmap = np.linspace(np.min(min_series), np.max(max_series),
int((np.max(max_series) -
np.min(min_series)) /
np.min(ratio_bins)))
heatmap = np.zeros((self.n_serie_, bins_heatmap.size))
# Build the heatmap
# Go through each serie and paste it inside the array
for idx_serie, (bin_serie, pdf_serie) in enumerate(
zip(center_bins_list,
pdf_list)):
# We need to interpolate the histogram values
min_value, min_idx = find_nearest(bins_heatmap,
min_series[idx_serie])
max_value, _ = find_nearest(bins_heatmap,
max_series[idx_serie])
# Interpolate the value using nearest neighbour
f = interp1d(bin_serie, pdf_serie, kind='nearest',
bounds_error=False, fill_value=0.)
nb_bin = int((max_value - min_value) / np.min(ratio_bins))
bin_serie_interpolated = np.linspace(min_value, max_value, nb_bin)
pdf_serie_interpolated = f(bin_serie_interpolated)
# Copy the data at the right position
heatmap[idx_serie, min_idx:min_idx+nb_bin] = pdf_serie_interpolated
return heatmap, bins_heatmap
def _build_dict_metadata(self, vol, filename, refit=False):
"""Build the dictionary from the dicom header.
Parameters
----------
vol : SimpleITK.Image
A SimpleITK image from which we can get some metadata information.
filename : string
The filename to use to open a single image to get additional
information from the DICOM header.
Returns
-------
refit : bool
Update the refit variable
"""
if not refit:
# Store the DICOM metadata
self.metadata_ = {}
# Get the information that have been created by SimpleITK
# Information about data reconstruction
self.metadata_['size'] = vol.GetSize()
self.metadata_['origin'] = vol.GetOrigin()
self.metadata_['direction'] = vol.GetDirection()
self.metadata_['spacing'] = vol.GetSpacing()
# Information about the MRI sequence
# Read the first image for the sequence
im = sitk.ReadImage(filename)
self.metadata_['TR'] = float(im.GetMetaData('0018|0080'))
self.metadata_['TE'] = float(im.GetMetaData('0018|0081'))
self.metadata_['flip-angle'] = float(
im.GetMetaData('0018|1314'))
# Store the data related
self.metadata_['acq-time'] = [datetime.strptime(
im.GetMetaData('0008|0032').replace(' ', ''),
'%H%M%S.%f')]
refit = True
else:
im = sitk.ReadImage(filename)
self.metadata_['acq-time'].append(datetime.strptime(
im.GetMetaData('0008|0032').replace(' ', ''),
'%H%M%S.%f'))
return refit
[docs] def read_data_from_path(self, path_data=None):
"""Function to read temporal images which represent a 3D volume
over time.
Parameters
----------
path_data : str, list of str, or None, optional (default=None)
Path to the temporal data. It will overrides the path given
in the constructor.
Returns
-------
self : object
Returns self.
"""
# Check the consistency of the path data
if self.path_data_ is not None and path_data is not None:
# We will overide the path and raise a warning
warnings.warn('The data path will be overriden using the path'
' given in the function.')
self.path_data_ = check_path_data(path_data)
elif self.path_data_ is None and path_data is not None:
self.path_data_ = check_path_data(path_data)
elif self.path_data_ is None and path_data is None:
raise ValueError('You need to give a path_data from where to read'
' the data.')
# There is two possibilities to open the data. If path_data is a list,
# each path will contain only one serie and we can open the data as
# in standalone. If path_data is a single string, the folder contain
# several series and we can go through each of them.
# Case that we have a single string
if isinstance(self.path_data_, basestring):
# Create a reader object
reader = sitk.ImageSeriesReader()
# Find the different series present inside the folder
series_time = np.array(reader.GetGDCMSeriesIDs(self.path_data_))
# Check that you have more than one serie
if len(series_time) < 2:
raise ValueError('The time serie should at least contain'
' 2 series.')
# The IDs need to be re-ordered in an incremental manner
# Create a list by converting to integer the number after
# the last full stop
id_series_time_int = np.array([int(s[s.rfind('.')+1:])
for s in series_time])
# Sort and get the corresponding index
idx_series_sorted = np.argsort(id_series_time_int)
# Open the volume in the sorted order
list_volume = []
is_dict_built = False
for id_time in series_time[idx_series_sorted]:
# Get the filenames corresponding to the current ID
dicom_names_serie = reader.GetGDCMSeriesFileNames(self.path_data_,
id_time)
# Set the list of files to read the volume
reader.SetFileNames(dicom_names_serie)
# Read the data for the current volume
vol = reader.Execute()
# Get a numpy volume
vol_numpy = sitk.GetArrayFromImage(vol)
# The Matlab convention is (Y, X, Z)
# The Numpy convention is (Z, Y, X)
# We have to swap these axis
# Swap Z and X
vol_numpy = np.swapaxes(vol_numpy, 0, 2)
vol_numpy = np.swapaxes(vol_numpy, 0, 1)
# Convert the volume to float
vol_numpy = vol_numpy.astype(np.float64)
# Concatenate the different volume
list_volume.append(vol_numpy)
# Build the dictionary
is_dict_built = self._build_dict_metadata(vol,
dicom_names_serie[0],
is_dict_built)
# Compute the time information from the DICOM tag kept
# Initialize the first value
self.time_info_ = [0.]
for idx_time in range(1, len(self.metadata_['acq-time'])):
delta_sec = (self.metadata_['acq-time'][idx_time] -
self.metadata_['acq-time'][0])
self.time_info_.append(delta_sec.total_seconds())
self.time_info_ = np.array(self.time_info_)
# We can create a numpy array
# The first dimension corresponds to the time dimension
# When processing the data, we need to slice the data
# considering this dimension emphasizing the decision to let
# it at the first position.
self.data_ = np.array(list_volume)
self.n_serie_ = self.data_.shape[0]
# Case that we have a list of string
else:
# We have to iterate through each folder and check that we have
# only one serie
# Create a reader object
# Check that you have more than one serie
if len(self.path_data_) < 2:
raise ValueError('The multisequence should at least contain'
' 2 sequences.')
list_volume = []
is_dict_built = False
for path_serie in self.path_data_:
reader = sitk.ImageSeriesReader()
# Find the different series present inside the folder
series = np.array(reader.GetGDCMSeriesIDs(path_serie))
# Check that you have more than one serie
if len(series) > 1:
raise ValueError('The number of series should not be'
' larger than 1 when a list of path is'
' given.')
# The data can be read
dicom_names_serie = reader.GetGDCMSeriesFileNames(path_serie)
# Set the list of files to read the volume
reader.SetFileNames(dicom_names_serie)
# Read the data for the current volume
vol = reader.Execute()
# Get a numpy volume
vol_numpy = sitk.GetArrayFromImage(vol)
# The Matlab convention is (Y, X, Z)
# The Numpy convention is (Z, Y, X)
# We have to swap these axis
# Swap Z and X
vol_numpy = np.swapaxes(vol_numpy, 0, 2)
vol_numpy = np.swapaxes(vol_numpy, 0, 1)
# Convert the volume to float
vol_numpy = vol_numpy.astype(np.float64)
# Append inside the volume list
list_volume.append(vol_numpy)
# Build the dictionary
is_dict_built = self._build_dict_metadata(vol,
dicom_names_serie[0],
is_dict_built)
# Compute the time information from the DICOM tag kept
# Initialize the first value
self.time_info_ = [0.]
for idx_time in range(1, len(self.metadata_['acq-time'])):
delta_sec = (self.metadata_['acq-time'][idx_time] -
self.metadata_['acq-time'][0])
self.time_info_.append(delta_sec.total_seconds())
self.time_info_ = np.array(self.time_info_)
# We can create a numpy array
self.data_ = np.array(list_volume)
self.n_serie_ = self.data_.shape[0]
# Create the list of number of bins to compute the histogram
self.nb_bins_ = []
for data_serie in self.data_:
self.nb_bins_.append(int(np.round(np.ndarray.max(data_serie) -
np.ndarray.min(data_serie))))
# Compute the information regarding the pdf of the DCE series
self.update_histogram()
return self