"""Define the Dataset and FileDataset classes.
The Dataset class represents the DICOM Dataset while the FileDataset class
adds extra functionality to Dataset when data is read from or written to file.
Overview of DICOM object model
------------------------------
Dataset (dict subclass)
Contains DataElement instances, each of which has a tag, VR, VM and value.
The DataElement value can be:
* A single value, such as a number, string, etc. (i.e. VM = 1)
* A list of numbers, strings, etc. (i.e. VM > 1)
* A Sequence (list subclass), where each item is a Dataset which
contains its own DataElements, and so on in a recursive manner.
"""
#
# Copyright (c) 2008-2013 Darcy Mason
# This file is part of pydicom, released under a modified MIT license.
# See the file license.txt included with this distribution, also
# available at https://github.com/darcymason/pydicom
#
import inspect # for __dir__
import io
import os.path
import sys
from pydicom import compat
from pydicom.charset import default_encoding, convert_encodings
from pydicom.datadict import dictionary_VR
from pydicom.datadict import tag_for_keyword, keyword_for_tag, repeater_has_keyword
from pydicom.tag import Tag, BaseTag
from pydicom.dataelem import DataElement, DataElement_from_raw, RawDataElement
from pydicom.uid import NotCompressedPixelTransferSyntaxes, UncompressedPixelTransferSyntaxes
from pydicom.tagtools import tag_in_exception
import pydicom # for write_file
import pydicom.charset
from pydicom.config import logger
import pydicom.encaps
sys_is_little_endian = (sys.byteorder == 'little')
have_numpy = True
try:
import numpy
except ImportError:
have_numpy = False
have_gdcm = True
try:
import gdcm
except ImportError:
have_gdcm = False
stat_available = True
try:
from os import stat
except ImportError:
stat_available = False
have_jpeg_ls = True
try:
import jpeg_ls
except ImportError:
have_jpeg_ls = False
have_pillow = True
try:
from PIL import Image as PILImg
except ImportError:
have_pillow = False
# If that failed, try the alternate import syntax for PIL.
try:
import Image as PILImg
except ImportError:
# Neither worked, so it's likely not installed.
have_pillow = False
class PropertyError(Exception):
"""For AttributeErrors caught in a property, so do not go to __getattr__"""
# http://docs.python.org/release/3.1.3/tutorial/errors.html#tut-userexceptions
pass
[docs]class Dataset(dict):
"""A collection (dictionary) of DICOM DataElements.
Examples
--------
Add DataElements to the Dataset (for elements in the DICOM dictionary).
>>> ds = Dataset()
>>> ds.PatientName = "CITIZEN^Joan"
>>> ds.add_new(0x00100020, 'LO', '12345')
>>> ds[0x0010, 0x0030] = DataElement(0x00100030, 'DA', '20010101')
Add Sequence DataElement to the Dataset
>>> ds.BeamSequence = [Dataset(), Dataset(), Dataset()]
>>> ds.BeamSequence[0].Manufacturer = "Linac, co."
>>> ds.BeamSequence[1].Manufacturer = "Linac and Sons, co."
>>> ds.BeamSequence[2].Manufacturer = "Linac and Daughters, co."
Add private DataElements to the Dataset
>>> ds.add(DataElement(0x0043102b, 'SS', [4, 4, 0, 0]))
>>> ds.add_new(0x0043102b, 'SS', [4, 4, 0, 0])
>>> ds[0x0043, 0x102b] = DataElement(0x0043102b, 'SS', [4, 4, 0, 0])
Updating and retrieving DataElement values
>>> ds.PatientName = "CITIZEN^Joan"
>>> ds.PatientName
'CITIZEN^Joan"
>>> ds.PatientName = "CITIZEN^John"
>>> ds.PatientName
'CITIZEN^John'
Retrieving a DataElement's value from a Sequence
>>> ds.BeamSequence[0].Manufacturer
'Linac, co.'
>>> ds.BeamSequence[1].Manufacturer
'Linac and Sons, co.'
Retrieving DataElements
>>> elem = ds[0x00100010]
>>> elem = ds.data_element('PatientName')
>>> elem
(0010, 0010) Patient's Name PN: 'CITIZEN^Joan'
Deleting a DataElement from the Dataset
>>> del ds.PatientID
>>> del ds.BeamSequence[1].Manufacturer
>>> del ds.BeamSequence[2]
Deleting a private DataElement from the Dataset
>>> del ds[0x0043, 0x102b]
Determining if a DataElement is present in the Dataset
>>> 'PatientName' in ds
True
>>> 'PatientID' in ds
False
>>> 0x00100030 in ds
True
>>> 'Manufacturer' in ds.BeamSequence[0]
True
Iterating through the top level of a Dataset only (excluding Sequences)
>>> for elem in ds:
>>> print(elem)
Iterating through the entire Dataset (including Sequences)
>>> for elem in ds.iterall():
>>> print(elem)
Recursively iterate through a Dataset (including Sequences)
>>> def recurse(ds):
>>> for elem in ds:
>>> if elem.VR == 'SQ':
>>> [recurse(item) for item in elem]
>>> else:
>>> # Do something useful with each DataElement
Attributes
----------
default_element_format : str
The default formatting for string display.
default_sequence_element_format : str
The default formatting for string display of sequences.
indent_chars : str
For string display, the characters used to indent nested Sequences.
Default is " ".
"""
indent_chars = " "
# Python 2: Classes which define __eq__ should flag themselves as unhashable
__hash__ = None
def __init__(self, *args, **kwargs):
"""Create a new Dataset instance."""
self._parent_encoding = kwargs.get('parent_encoding', default_encoding)
dict.__init__(self, *args)
def __enter__(self):
"""Method invoked on entry to a with statement."""
return self
def __exit__(self, exc_type, exc_val, exc_tb):
"""Method invoked on exit from a with statement."""
return False
[docs] def add(self, data_element):
"""Add a DataElement to the Dataset.
Equivalent to ds[data_element.tag] = data_element
Parameters
----------
data_element : pydicom.dataelem.DataElement
The DataElement to add to the Dataset.
"""
self[data_element.tag] = data_element
[docs] def add_new(self, tag, VR, value):
"""Add a DataElement to the Dataset.
Parameters
----------
tag
The DICOM (group, element) tag in any form accepted by
pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010,
etc.
VR : str
The 2 character DICOM value representation (see DICOM standard part
5, Section 6.2).
value
The value of the data element. One of the following:
* a single string or number
* a list or tuple with all strings or all numbers
* a multi-value string with backslash separator
* for a sequence DataElement, an empty list or list of Dataset
"""
data_element = DataElement(tag, VR, value)
# use data_element.tag since DataElement verified it
self[data_element.tag] = data_element
[docs] def data_element(self, name):
"""Return the DataElement corresponding to the element keyword `name`.
Parameters
----------
name : str
A DICOM element keyword.
Returns
-------
pydicom.dataelem.DataElement or None
For the given DICOM element `keyword`, return the corresponding
Dataset DataElement if present, None otherwise.
"""
tag = tag_for_keyword(name)
# Test against None as (0000,0000) is a possible tag
if tag is not None:
return self[tag]
return None
def __contains__(self, name):
"""Extend dict.__contains__() to handle DICOM keywords.
This is called for code like:
>>> 'SliceLocation' in ds
True
Parameters
----------
name : str or int or 2-tuple
The Element keyword or tag to search for.
Returns
-------
bool
True if the DataElement is in the Dataset, False otherwise.
"""
if isinstance(name, (str, compat.text_type)):
tag = tag_for_keyword(name)
else:
try:
tag = Tag(name)
except:
return False
# Test against None as (0000,0000) is a possible tag
if tag is not None:
return dict.__contains__(self, tag)
else:
return dict.__contains__(self, name) # will no doubt raise an exception
[docs] def decode(self):
"""Apply character set decoding to all DataElements in the Dataset.
See DICOM PS3.5-2008 6.1.1.
"""
# Find specific character set. 'ISO_IR 6' is default
# May be multi-valued, but let pydicom.charset handle all logic on that
dicom_character_set = self._character_set
# Shortcut to the decode function in pydicom.charset
decode_data_element = pydicom.charset.decode
# Callback for walk(), to decode the chr strings if necessary
# This simply calls the pydicom.charset.decode function
def decode_callback(ds, data_element):
"""Callback to decode `data_element`."""
if data_element.VR == 'SQ':
for dset in data_element.value:
dset.decode()
else:
decode_data_element(data_element, dicom_character_set)
self.walk(decode_callback, recursive=False)
def __delattr__(self, name):
"""Intercept requests to delete an attribute by `name`.
If `name` is a DICOM keyword:
Delete the corresponding DataElement from the Dataset.
>>> del ds.PatientName
Else:
Delete the class attribute as any other class would do.
>>> del ds._is_some_attribute
Parameters
----------
name : str
The keyword for the DICOM element or the class attribute to delete.
"""
# First check if a valid DICOM keyword and if we have that data element
tag = tag_for_keyword(name)
if tag is not None and tag in self:
dict.__delitem__(self, tag) # direct to dict as we know we have key
# If not a DICOM name in this dataset, check for regular instance name
# can't do delete directly, that will call __delattr__ again
elif name in self.__dict__:
del self.__dict__[name]
# Not found, raise an error in same style as python does
else:
raise AttributeError(name)
def __delitem__(self, key):
"""Intercept requests to delete an attribute by key.
Examples
--------
Indexing using DataElement tag
>>> ds = Dataset()
>>> ds.CommandGroupLength = 100
>>> ds.PatientName = 'CITIZEN^Jan'
>>> del ds[0x00000000]
>>> ds
(0010, 0010) Patient's Name PN: 'CITIZEN^Jan'
Slicing using DataElement tag
>>> ds = Dataset()
>>> ds.CommandGroupLength = 100
>>> ds.SOPInstanceUID = '1.2.3'
>>> ds.PatientName = 'CITIZEN^Jan'
>>> del ds[:0x00100000]
>>> ds
(0010, 0010) Patient's Name PN: 'CITIZEN^Jan'
Parameters
----------
key
The key for the attribute to be deleted. If a slice is used then
the tags matching the slice conditions will be deleted.
"""
# If passed a slice, delete the corresponding DataElements
if isinstance(key, slice):
for tag in self._slice_dataset(key.start, key.stop, key.step):
del self[tag]
else:
# Assume is a standard tag (for speed in common case)
try:
dict.__delitem__(self, key)
# If not a standard tag, than convert to Tag and try again
except KeyError:
tag = Tag(key)
dict.__delitem__(self, tag)
def __dir__(self):
"""Give a list of attributes available in the Dataset.
List of attributes is used, for example, in auto-completion in editors
or command-line environments.
"""
# Force zip object into a list in case of python3. Also backwards
# compatible
meths = set(list(zip(*inspect.getmembers(Dataset,
inspect.isroutine)))[0])
props = set(list(zip(*inspect.getmembers(Dataset,
inspect.isdatadescriptor)))[0])
dicom_names = set(self.dir())
alldir = sorted(props | meths | dicom_names)
return alldir
[docs] def dir(self, *filters):
"""Return an alphabetical list of DataElement keywords in the Dataset.
Intended mainly for use in interactive Python sessions. Only lists the
DataElement keywords in the current level of the Dataset (i.e. the
contents of any Sequence elements are ignored).
Parameters
----------
filters : str
Zero or more string arguments to the function. Used for
case-insensitive match to any part of the DICOM keyword.
Returns
-------
list of str
The matching DataElement keywords in the dataset. If no filters are
used then all DataElement keywords are returned.
"""
allnames = [keyword_for_tag(tag) for tag in self.keys()]
# remove blanks - tags without valid names (e.g. private tags)
allnames = [x for x in allnames if x]
# Store found names in a dict, so duplicate names appear only once
matches = {}
for filter_ in filters:
filter_ = filter_.lower()
match = [x for x in allnames if x.lower().find(filter_) != -1]
matches.update(dict([(x, 1) for x in match]))
if filters:
names = sorted(matches.keys())
return names
else:
return sorted(allnames)
def __eq__(self, other):
"""Compare `self` and `other` for equality.
Returns
-------
bool
The result if `self` and `other` are the same class
NotImplemented
If `other` is not the same class as `self` then returning
NotImplemented delegates the result to superclass.__eq__(subclass)
"""
# When comparing against self this will be faster
if other is self:
return True
if isinstance(other, self.__class__):
# Compare Elements using values() and class variables using __dict__
# Convert values() to a list for compatibility between
# python 2 and 3
return (list(self.values()) == list(other.values()) and
self.__dict__ == other.__dict__)
return NotImplemented
[docs] def get(self, key, default=None):
"""Extend dict.get() to handle DICOM DataElement keywords.
Parameters
----------
key : str or pydicom.tag.Tag
The element keyword or Tag or the class attribute name to get.
default : obj or None
If the DataElement or class attribute is not present, return
`default` (default None).
Returns
-------
value
If `key` is the keyword for a DataElement in the Dataset then return
the DataElement's value.
pydicom.dataelem.DataElement
If `key` is a tag for a DataElement in the Dataset then return the
DataElement instance.
value
If `key` is a class attribute then return its value.
"""
if isinstance(key, (str, compat.text_type)):
try:
return getattr(self, key)
except AttributeError:
return default
else:
# is not a string, try to make it into a tag and then hand it
# off to the underlying dict
if not isinstance(key, BaseTag):
try:
key = Tag(key)
except:
raise TypeError("Dataset.get key must be a string or tag")
try:
return_val = self.__getitem__(key)
except KeyError:
return_val = default
return return_val
def __getattr__(self, name):
"""Intercept requests for Dataset attribute names.
If `name` matches a DICOM keyword, return the value for the
DataElement with the corresponding tag.
Parameters
----------
name
A DataElement keyword or tag or a class attribute name.
Returns
-------
value
If `name` matches a DICOM keyword, returns the corresponding
DataElement's value. Otherwise returns the class attribute's value
(if present).
"""
tag = tag_for_keyword(name)
if tag is None: # `name` isn't a DICOM element keyword
# Try the base class attribute getter (fix for issue 332)
return super(Dataset, self).__getattribute__(name)
tag = Tag(tag)
if tag not in self: # DICOM DataElement not in the Dataset
# Try the base class attribute getter (fix for issue 332)
return super(Dataset, self).__getattribute__(name)
else:
return self[tag].value
@property
def _character_set(self):
"""The Dataset's SpecificCharacterSet value (if present)."""
char_set = self.get('SpecificCharacterSet', None)
if not char_set:
char_set = self._parent_encoding
else:
char_set = convert_encodings(char_set)
return char_set
def __getitem__(self, key):
"""Operator for Dataset[key] request.
Any deferred data elements will be read in and an attempt will be made
to correct any elements with ambiguous VRs.
Examples
--------
Indexing using DataElement tag
>>> ds = Dataset()
>>> ds.SOPInstanceUID = '1.2.3'
>>> ds.PatientName = 'CITIZEN^Jan'
>>> ds.PatientID = '12345'
>>> ds[0x00100010]
'CITIZEN^Jan'
Slicing using DataElement tag
All group 0x0010 elements in the dataset
>>> ds[0x00100000:0x0011000]
(0010, 0010) Patient's Name PN: 'CITIZEN^Jan'
(0010, 0020) Patient ID LO: '12345'
All group 0x0002 elements in the dataset
>>> ds[(0x0002, 0x0000):(0x0003, 0x0000)]
Parameters
----------
key
The DICOM (group, element) tag in any form accepted by
pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010,
etc. May also be a slice made up of DICOM tags.
Returns
-------
pydicom.dataelem.DataElement or pydicom.dataset.Dataset
If a single DICOM element tag is used then returns the corresponding
DataElement. If a slice is used then returns a Dataset object
containing the corresponding DataElements.
"""
# If passed a slice, return a Dataset containing the corresponding
# DataElements
if isinstance(key, slice):
ds = Dataset()
for tag in self._slice_dataset(key.start, key.stop, key.step):
ds.add(self[tag])
return ds
tag = Tag(key)
data_elem = dict.__getitem__(self, tag)
if isinstance(data_elem, DataElement):
return data_elem
elif isinstance(data_elem, tuple):
# If a deferred read, then go get the value now
if data_elem.value is None:
from pydicom.filereader import read_deferred_data_element
data_elem = read_deferred_data_element(self.fileobj_type,
self.filename,
self.timestamp,
data_elem)
if tag != (0x08, 0x05):
character_set = self._character_set
else:
character_set = default_encoding
# Not converted from raw form read from file yet; do so now
self[tag] = DataElement_from_raw(data_elem, character_set)
# If the Element has an ambiguous VR, try to correct it
if 'or' in self[tag].VR:
from pydicom.filewriter import correct_ambiguous_vr_element
self[tag] = correct_ambiguous_vr_element(self[tag], self,
data_elem[6])
return dict.__getitem__(self, tag)
[docs] def get_item(self, key):
"""Return the raw data element if possible.
It will be raw if the user has never accessed the value, or set their
own value. Note if the data element is a deferred-read element,
then it is read and converted before being returned.
Parameters
----------
key
The DICOM (group, element) tag in any form accepted by
pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010,
etc.
Returns
-------
pydicom.dataelem.DataElement
"""
tag = Tag(key)
data_elem = dict.__getitem__(self, tag)
# If a deferred read, return using __getitem__ to read and convert it
if isinstance(data_elem, tuple) and data_elem.value is None:
return self[key]
return data_elem
[docs] def group_dataset(self, group):
"""Return a Dataset containing only DataElements of a certain group.
Parameters
----------
group : int
The group part of a DICOM (group, element) tag.
Returns
-------
pydicom.dataset.Dataset
A dataset instance containing elements of the group specified.
"""
return self[(group, 0x0000):(group + 1, 0x0000)]
def __iter__(self):
"""Iterate through the top-level of the Dataset, yielding DataElements.
>>> for elem in ds:
>>> print(elem)
The DataElements are returned in increasing tag value order.
Sequence items are returned as a single DataElement, so it is up to the
calling code to recurse into the Sequence items if desired.
Yields
------
pydicom.dataelem.DataElement
The Dataset's DataElements, sorted by increasing tag order.
"""
# Note this is different than the underlying dict class,
# which returns the key of the key:value mapping.
# Here the value is returned (but data_element.tag has the key)
taglist = sorted(self.keys())
for tag in taglist:
yield self[tag]
def _is_uncompressed_transfer_syntax(self):
"""Return True if the TransferSyntaxUID is a compressed syntax."""
# FIXME uses file_meta here, should really only be thus for FileDataset
return self.file_meta.TransferSyntaxUID in NotCompressedPixelTransferSyntaxes
def __ne__(self, other):
"""Compare `self` and `other` for inequality."""
return not (self == other)
def _pixel_data_numpy(self):
"""If NumPy is available, return an ndarray of the Pixel Data.
Falls back to GDCM in case of unsupported transfer syntaxes.
Raises
------
TypeError
If there is no Pixel Data or not a supported data type.
ImportError
If NumPy isn't found, or in the case of fallback, if GDCM isn't
found.
Returns
-------
numpy.ndarray
The contents of the Pixel Data element (7FE0,0010) as an ndarray.
"""
if not self._is_uncompressed_transfer_syntax():
if not have_gdcm:
raise NotImplementedError("Pixel Data is compressed in a "
"format pydicom does not yet handle. "
"Cannot return array. Pydicom might "
"be able to convert the pixel data "
"using GDCM if it is installed.")
elif not self.filename:
raise NotImplementedError("GDCM is only supported when the "
"dataset has been created with a "
"filename.")
if not have_numpy:
msg = "The Numpy package is required to use pixel_array, and " \
"numpy could not be imported."
raise ImportError(msg)
if 'PixelData' not in self:
raise TypeError("No pixel data found in this dataset.")
# There are two cases:
# 1) uncompressed PixelData -> use numpy
# 2) compressed PixelData, filename is available and GDCM is
# available -> use GDCM
if self._is_uncompressed_transfer_syntax():
# Make NumPy format code, e.g. "uint16", "int32" etc
# from two pieces of info:
# self.PixelRepresentation -- 0 for unsigned, 1 for signed;
# self.BitsAllocated -- 8, 16, or 32
format_str = '%sint%d' % (('u', '')[self.PixelRepresentation],
self.BitsAllocated)
try:
numpy_dtype = numpy.dtype(format_str)
except TypeError:
msg = ("Data type not understood by NumPy: "
"format='%s', PixelRepresentation=%d, BitsAllocated=%d")
raise TypeError(msg % (format_str, self.PixelRepresentation,
self.BitsAllocated))
if self.is_little_endian != sys_is_little_endian:
numpy_dtype = numpy_dtype.newbyteorder('S')
pixel_bytearray = self.PixelData
elif have_gdcm and self.filename:
# read the file using GDCM
# FIXME this should just use self.PixelData instead of self.filename
# but it is unclear how this should be achieved using GDCM
gdcm_image_reader = gdcm.ImageReader()
gdcm_image_reader.SetFileName(self.filename)
if not gdcm_image_reader.Read():
raise TypeError("GDCM could not read DICOM image")
gdcm_image = gdcm_image_reader.GetImage()
# determine the correct numpy datatype
gdcm_numpy_typemap = {
gdcm.PixelFormat.INT8: numpy.int8,
gdcm.PixelFormat.UINT8: numpy.uint8,
gdcm.PixelFormat.UINT16: numpy.uint16,
gdcm.PixelFormat.INT16: numpy.int16,
gdcm.PixelFormat.UINT32: numpy.uint32,
gdcm.PixelFormat.INT32: numpy.int32,
gdcm.PixelFormat.FLOAT32: numpy.float32,
gdcm.PixelFormat.FLOAT64: numpy.float64
}
gdcm_pixel_format = gdcm_image.GetPixelFormat().GetScalarType()
if gdcm_pixel_format in gdcm_numpy_typemap:
numpy_dtype = gdcm_numpy_typemap[gdcm_pixel_format]
else:
raise TypeError('{0} is not a GDCM supported '
'pixel format'.format(gdcm_pixel_format))
# GDCM returns char* as type str. Under Python 2 `str` are
# byte arrays by default. Python 3 decodes this to
# unicode strings by default.
# The SWIG docs mention that they always decode byte streams
# as utf-8 strings for Python 3, with the `surrogateescape`
# error handler configured.
# Therefore, we can encode them back to their original bytearray
# representation on Python 3 by using the same parameters.
pixel_bytearray = gdcm_image.GetBuffer()
if sys.version_info >= (3, 0):
pixel_bytearray = pixel_bytearray.encode("utf-8",
"surrogateescape")
# if GDCM indicates that a byte swap is in order, make
# sure to inform numpy as well
if gdcm_image.GetNeedByteSwap():
numpy_dtype = numpy_dtype.newbyteorder('S')
# Here we need to be careful because in some cases, GDCM reads a
# buffer that is too large, so we need to make sure we only include
# the first n_rows * n_columns * dtype_size bytes.
n_bytes = self.Rows * self.Columns * numpy.dtype(numpy_dtype).itemsize
if len(pixel_bytearray) > n_bytes:
# We make sure that all the bytes after are in fact zeros
padding = pixel_bytearray[n_bytes:]
if numpy.any(numpy.fromstring(padding, numpy.byte)):
pixel_bytearray = pixel_bytearray[:n_bytes]
else:
# We revert to the old behavior which should then result
# in a Numpy error later on.
pass
pixel_array = numpy.fromstring(pixel_bytearray, dtype=numpy_dtype)
length_of_pixel_array = pixel_array.nbytes
expected_length = self.Rows * self.Columns
if 'NumberOfFrames' in self and self.NumberOfFrames > 1:
expected_length *= self.NumberOfFrames
if 'SamplesPerPixel' in self and self.SamplesPerPixel > 1:
expected_length *= self.SamplesPerPixel
if self.BitsAllocated > 8:
expected_length *= (self.BitsAllocated // 8)
if length_of_pixel_array != expected_length:
raise AttributeError("Amount of pixel data %d does not match the expected data %d" % (length_of_pixel_array, expected_length))
# Note the following reshape operations return a new *view* onto
# pixel_array, but don't copy the data
if 'NumberOfFrames' in self and self.NumberOfFrames > 1:
if self.SamplesPerPixel > 1:
# TODO: Handle Planar Configuration attribute
assert self.PlanarConfiguration == 0
pixel_array = pixel_array.reshape(self.NumberOfFrames,
self.Rows, self.Columns,
self.SamplesPerPixel)
else:
pixel_array = pixel_array.reshape(self.NumberOfFrames,
self.Rows, self.Columns)
else:
if self.SamplesPerPixel > 1:
if self.BitsAllocated == 8:
if self.PlanarConfiguration == 0:
pixel_array = pixel_array.reshape(self.Rows,
self.Columns,
self.SamplesPerPixel)
else:
pixel_array = pixel_array.reshape(self.SamplesPerPixel,
self.Rows,
self.Columns)
pixel_array = pixel_array.transpose(1, 2, 0)
else:
raise NotImplementedError("This code only handles "
"SamplesPerPixel > 1 if Bits "
"Allocated = 8")
else:
pixel_array = pixel_array.reshape(self.Rows, self.Columns)
return pixel_array
def _compressed_pixel_data_numpy(self):
"""Return a NumPy array of the Pixel Data.
NumPy is a numerical package for python. It is used if available.
Returns
-------
numpy.ndarray
The Pixel Data as an array.
Raises
------
TypeError
If no Pixel Data element in the dataset.
ImportError
If cannot import numpy.
"""
if 'PixelData' not in self:
raise TypeError("No pixel data found in this dataset.")
if not have_numpy:
msg = "The Numpy package is required to use pixel_array, and " \
"numpy could not be imported."
raise ImportError(msg)
# determine the type used for the array
need_byteswap = (self.is_little_endian != sys_is_little_endian)
# Make NumPy format code, e.g. "uint16", "int32" etc
# from two pieces of info:
# self.PixelRepresentation -- 0 for unsigned, 1 for signed;
# self.BitsAllocated -- 8, 16, or 32
format_str = '%sint%d' % (('u', '')[self.PixelRepresentation],
self.BitsAllocated)
try:
numpy_format = numpy.dtype(format_str)
except TypeError:
msg = ("Data type not understood by NumPy: "
"format='%s', PixelRepresentation=%d, BitsAllocated=%d")
raise TypeError(msg % (format_str, self.PixelRepresentation,
self.BitsAllocated))
if self.file_meta.TransferSyntaxUID in pydicom.uid.PILSupportedCompressedPixelTransferSyntaxes:
UncompressedPixelData = self._get_PIL_supported_compressed_pixeldata()
elif self.file_meta.TransferSyntaxUID in pydicom.uid.JPEGLSSupportedCompressedPixelTransferSyntaxes:
UncompressedPixelData = self._get_jpeg_ls_supported_compressed_pixeldata()
else:
msg = "The transfer syntax {0} is not currently supported.".format(self.file_meta.TransferSyntaxUID)
raise NotImplementedError(msg)
# Have correct Numpy format, so create the NumPy array
arr = numpy.fromstring(UncompressedPixelData, numpy_format)
# XXX byte swap - may later handle this in read_file!!?
if need_byteswap:
arr.byteswap(True) # True means swap in-place, don't make a new copy
# Note the following reshape operations return a new *view* onto arr, but don't copy the data
if 'NumberOfFrames' in self and self.NumberOfFrames > 1:
if self.SamplesPerPixel > 1:
arr = arr.reshape(self.NumberOfFrames, self.Rows, self.Columns,
self.SamplesPerPixel)
else:
arr = arr.reshape(self.NumberOfFrames, self.Rows, self.Columns)
else:
if self.SamplesPerPixel > 1:
if self.BitsAllocated == 8:
if self.PlanarConfiguration == 0:
arr = arr.reshape(self.Rows, self.Columns,
self.SamplesPerPixel)
else:
arr = arr.reshape(self.SamplesPerPixel, self.Rows,
self.Columns)
arr = arr.transpose(1, 2, 0)
else:
raise NotImplementedError("This code only handles "
"SamplesPerPixel > 1 if Bits "
"Allocated = 8")
else:
arr = arr.reshape(self.Rows, self.Columns)
if self.file_meta.TransferSyntaxUID in pydicom.uid.JPEG2000CompressedPixelTransferSyntaxes and self.BitsStored == 16:
# WHY IS THIS EVEN NECESSARY??
arr &= 0x7FFF
return arr
def _get_PIL_supported_compressed_pixeldata(self):
"""Use PIL to decompress compressed Pixel Data.
Returns
-------
bytes or str
The decompressed Pixel Data
Raises
------
ImportError
If PIL is not available.
NotImplementedError
If unable to decompress the Pixel Data.
"""
if not have_pillow:
msg = "The pillow package is required to use pixel_array for " \
"this transfer syntax {0}, and pillow could not be " \
"imported.".format(self.file_meta.TransferSyntaxUID)
raise ImportError(msg)
# decompress here
if self.file_meta.TransferSyntaxUID in pydicom.uid.JPEGLossyCompressedPixelTransferSyntaxes:
if self.BitsAllocated > 8:
raise NotImplementedError("JPEG Lossy only supported if Bits "
"Allocated = 8")
generic_jpeg_file_header = b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00\x01\x00\x01\x00\x00'
frame_start_from = 2
elif self.file_meta.TransferSyntaxUID in pydicom.uid.JPEG2000CompressedPixelTransferSyntaxes:
generic_jpeg_file_header = b''
# generic_jpeg_file_header = b'\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A'
frame_start_from = 0
else:
generic_jpeg_file_header = b''
frame_start_from = 0
try:
UncompressedPixelData = ''
if 'NumberOfFrames' in self and self.NumberOfFrames > 1:
# multiple compressed frames
CompressedPixelDataSeq = pydicom.encaps.decode_data_sequence(self.PixelData)
for frame in CompressedPixelDataSeq:
data = generic_jpeg_file_header + frame[frame_start_from:]
fio = io.BytesIO(data)
try:
decompressed_image = PILImg.open(fio)
except IOError as e:
try:
message = str(e)
except:
try:
message = unicode(e)
except:
message = ''
raise NotImplementedError(message)
UncompressedPixelData += decompressed_image.tobytes()
else:
# single compressed frame
UncompressedPixelData = pydicom.encaps.defragment_data(self.PixelData)
UncompressedPixelData = generic_jpeg_file_header + UncompressedPixelData[frame_start_from:]
try:
fio = io.BytesIO(UncompressedPixelData)
decompressed_image = PILImg.open(fio)
except IOError as e:
try:
message = str(e)
except:
try:
message = unicode(e)
except:
message = ''
raise NotImplementedError(message)
UncompressedPixelData = decompressed_image.tobytes()
except:
raise
return UncompressedPixelData
def _get_jpeg_ls_supported_compressed_pixeldata(self):
"""Use jpeg_ls to decompress compressed Pixel Data.
Returns
-------
bytes or str
The decompressed Pixel Data
Raises
------
ImportError
If jpeg_ls is not available.
"""
if not have_jpeg_ls:
msg = "The jpeg_ls package is required to use pixel_array for " \
"this transfer syntax {0}, and jpeg_ls could not be " \
"imported.".format(self.file_meta.TransferSyntaxUID)
raise ImportError(msg)
# decompress here
UncompressedPixelData = ''
if 'NumberOfFrames' in self and self.NumberOfFrames > 1:
# multiple compressed frames
CompressedPixelDataSeq = pydicom.encaps.decode_data_sequence(self.PixelData)
# print len(CompressedPixelDataSeq)
for frame in CompressedPixelDataSeq:
decompressed_image = jpeg_ls.decode(numpy.fromstring(frame, dtype=numpy.uint8))
UncompressedPixelData += decompressed_image.tobytes()
else:
# single compressed frame
CompressedPixelData = pydicom.encaps.defragment_data(self.PixelData)
decompressed_image = jpeg_ls.decode(numpy.fromstring(CompressedPixelData, dtype=numpy.uint8))
UncompressedPixelData = decompressed_image.tobytes()
return UncompressedPixelData
# Use by pixel_array property
def _get_pixel_array(self):
"""Convert the Pixel Data to a numpy array.
Returns
-------
numpy.ndarray
The array containing the Pixel Data.
"""
# Check if already have converted to a NumPy array
# Also check if self.PixelData has changed. If so, get new NumPy array
already_have = True
if not hasattr(self, "_pixel_array"):
already_have = False
elif self._pixel_id != id(self.PixelData):
already_have = False
if not already_have and not self._is_uncompressed_transfer_syntax():
try:
# print("Pixel Data is compressed")
self._pixel_array = self._compressed_pixel_data_numpy()
self._pixel_id = id(self.PixelData) # is this guaranteed to work if memory is re-used??
return self._pixel_array
except Exception as I:
logger.info("Pillow or JPLS did not support this transfer syntax")
if not already_have:
self._pixel_array = self._pixel_data_numpy()
self._pixel_id = id(self.PixelData) # is this guaranteed to work if memory is re-used??
return self._pixel_array
@property
def pixel_array(self):
"""Return the Pixel Data as a NumPy array.
Returns
-------
numpy.ndarray
The Pixel Data (7FE0,0010) as a NumPy ndarray.
"""
return self._get_pixel_array()
# Format strings spec'd according to python string formatting options
# See http://docs.python.org/library/stdtypes.html#string-formatting-operations
default_element_format = "%(tag)s %(name)-35.35s %(VR)s: %(repval)s"
default_sequence_element_format = "%(tag)s %(name)-35.35s %(VR)s: %(repval)s"
def _pretty_str(self, indent=0, top_level_only=False):
"""Return a string of the DataElements in the Dataset, with indented levels.
This private method is called by the __str__() method for handling
print statements or str(dataset), and the __repr__() method.
It is also used by top(), which is the reason for the top_level_only flag.
This function recurses, with increasing indentation levels.
Parameters
----------
index : int
The indent level offset (default 0)
top_level_only : bool
When True, only create a string for the top level elements, i.e.
exclude elements within any Sequences (default False).
Returns
-------
str
A string representation of the Dataset.
"""
strings = []
indent_str = self.indent_chars * indent
nextindent_str = self.indent_chars * (indent + 1)
for data_element in self:
with tag_in_exception(data_element.tag):
if data_element.VR == "SQ": # a sequence
strings.append(indent_str + str(data_element.tag) +
" %s %i item(s) ---- "
%(data_element.description(),
len(data_element.value)))
if not top_level_only:
for dataset in data_element.value:
strings.append(dataset._pretty_str(indent + 1))
strings.append(nextindent_str + "---------")
else:
strings.append(indent_str + repr(data_element))
return "\n".join(strings)
[docs] def save_as(self, filename, write_like_original=True):
"""Write the Dataset to `filename`.
Saving a Dataset requires that the Dataset.is_implicit_VR and
Dataset.is_little_endian attributes exist and are set appropriately. If
Dataset.file_meta.TransferSyntaxUID is present then it should be set to
a consistent value to ensure conformance.
Conformance with DICOM File Format
----------------------------------
If `write_like_original` is False, the Dataset will be stored in the
DICOM File Format in accordance with DICOM Standard Part 10 Section 7.
To do so requires that the `Dataset.file_meta` attribute exists and
contains a Dataset with the required (Type 1) File Meta Information
Group elements (see pydicom.filewriter.write_file and
pydicom.filewriter.write_file_meta_info for more information).
If `write_like_original` is True then the Dataset will be written as is
(after minimal validation checking) and may or may not contain all or
parts of the File Meta Information (and hence may or may not be
conformant with the DICOM File Format).
Parameters
----------
filename : str or file-like
Name of file or the file-like to write the new DICOM file to.
write_like_original : bool
If True (default), preserves the following information from
the Dataset (and may result in a non-conformant file):
- preamble -- if the original file has no preamble then none will be
written.
- file_meta -- if the original file was missing any required File
Meta Information Group elements then they will not be added or
written.
If (0002,0000) 'File Meta Information Group Length' is present
then it may have its value updated.
- seq.is_undefined_length -- if original had delimiters, write them
now too, instead of the more sensible length characters
- is_undefined_length_sequence_item -- for datasets that belong to a
sequence, write the undefined length delimiters if that is
what the original had.
If False, produces a file conformant with the DICOM File Format,
with explicit lengths for all elements.
See Also
--------
pydicom.filewriter.write_dataset
Write a DICOM Dataset to a file.
pydicom.filewriter.write_file_meta_info
Write the DICOM File Meta Information Group elements to a file.
pydicom.filewriter.write_file
Write a DICOM file from a FileDataset instance.
"""
# Ensure is_little_endian and is_implicit_VR exist
if not (hasattr(self, 'is_little_endian') and
hasattr(self, 'is_implicit_VR')):
raise AttributeError("'{0}.is_little_endian' and "
"'{0}.is_implicit_VR' must exist and be "
"set appropriately before "
"saving.".format(self.__class__.__name__))
pydicom.write_file(filename, self, write_like_original)
def __setattr__(self, name, value):
"""Intercept any attempts to set a value for an instance attribute.
If name is a DICOM keyword, set the corresponding tag and DataElement.
Else, set an instance (python) attribute as any other class would do.
Parameters
----------
name : str
The element keyword for the DataElement you wish to add/change. If
`name` is not a DICOM element keyword then this will be the
name of the attribute to be added/changed.
value
The value for the attribute to be added/changed.
"""
tag = tag_for_keyword(name)
if tag is not None: # successfully mapped name to a tag
if tag not in self: # don't have this tag yet->create the data_element instance
VR = dictionary_VR(tag)
data_element = DataElement(tag, VR, value)
else: # already have this data_element, just changing its value
data_element = self[tag]
data_element.value = value
# Now have data_element - store it in this dict
self[tag] = data_element
elif repeater_has_keyword(name): # Check if `name` is repeaters element
raise ValueError('{} is a DICOM repeating group element and must '
'be added using the add() or add_new() methods.'
.format(name))
else: # name not in dicom dictionary - setting a non-dicom instance attribute
# XXX note if user mis-spells a dicom data_element - no error!!!
super(Dataset, self).__setattr__(name, value)
def __setitem__(self, key, value):
"""Operator for Dataset[key] = value.
Check consistency, and deal with private tags.
Parameters
----------
key : int
The tag for the element to be added to the Dataset.
value : pydicom.dataelem.DataElement or pydicom.dataelem.RawDataElement
The element to add to the Dataset.
Raises
------
NotImplementedError
If `key` is a slice.
ValueError
If the `key` value doesn't match DataElement.tag.
"""
if isinstance(key, slice):
raise NotImplementedError('Slicing is not supported for setting '
'Dataset elements.')
# OK if is subclass, e.g. DeferredDataElement
if not isinstance(value, (DataElement, RawDataElement)):
raise TypeError("Dataset contents must be DataElement instances.")
tag = Tag(value.tag)
if key != tag:
raise ValueError("DataElement.tag must match the dictionary key")
data_element = value
if tag.is_private:
# See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved
logger.debug("Setting private tag %r" % tag)
private_block = tag.elem >> 8
private_creator_tag = Tag(tag.group, private_block)
if private_creator_tag in self and tag != private_creator_tag:
if isinstance(data_element, RawDataElement):
data_element = DataElement_from_raw(data_element,
self._character_set)
data_element.private_creator = self[private_creator_tag].value
dict.__setitem__(self, tag, data_element)
def _slice_dataset(self, start, stop, step):
"""Return the element tags in the Dataset that match the slice.
Parameters
----------
start : int or None
The slice's starting element tag value.
stop : int or None
The slice's stopping element tag value.
step : int or None
The slice's step size.
Returns
------
list of pydicom.tag.Tag
The tags in the Dataset that meet the conditions of the slice.
"""
# Check the starting/stopping Tags are valid when used
if start and Tag(start):
pass
if stop and Tag(stop):
pass
all_tags = sorted(self.keys())
# If the Dataset is empty, return an empty list
if not all_tags:
return []
# Ensure we have valid Tags when start/stop are None
if start is None:
start = all_tags[0]
if stop is None:
stop = all_tags[-1] + 1
slice_tags = [tag for tag in all_tags if Tag(start) <= tag < Tag(stop)]
return slice_tags[::step]
def __str__(self):
"""Handle str(dataset)."""
return self._pretty_str()
[docs] def top(self):
"""Return a str of the Dataset's top level DataElements only."""
return self._pretty_str(top_level_only=True)
[docs] def trait_names(self):
"""Return a list of valid names for auto-completion code.
Used in IPython, so that data element names can be found and offered
for autocompletion on the IPython command line.
"""
return dir(self) # only valid python >=2.6, else use self.__dir__()
[docs] def update(self, dictionary):
"""Extend dict.update() to handle DICOM keywords."""
for key, value in list(dictionary.items()):
if isinstance(key, (str, compat.text_type)):
setattr(self, key, value)
else:
self[Tag(key)] = value
[docs] def iterall(self):
"""Iterate through the Dataset, yielding all DataElements.
Unlike Dataset.__iter__, this *does* recurse into sequences,
and so returns all data elements as if the file were "flattened".
Yields
------
pydicom.dataelem.DataElement
"""
for data_element in self:
yield data_element
if data_element.VR == "SQ":
sequence = data_element.value
for dataset in sequence:
for elem in dataset.iterall():
yield elem
[docs] def walk(self, callback, recursive=True):
"""Iterate through the DataElements and run `callback` on each.
Visit all DataElements, possibly recursing into sequences and their
datasets. The callback function is called for each DataElement
(including SQ element). Can be used to perform an operation on certain
types of DataElements. E.g., `remove_private_tags`() finds all private
tags and deletes them. DataElement`s will come back in DICOM order (by
increasing tag number within their dataset).
Parameters
----------
callback
A callable that takes two arguments:
* a Dataset
* a DataElement belonging to that Dataset
recursive : bool
Flag to indicate whether to recurse into Sequences.
"""
taglist = sorted(self.keys())
for tag in taglist:
with tag_in_exception(tag):
data_element = self[tag]
callback(self, data_element) # self = this Dataset
# 'tag in self' below needed in case callback deleted data_element
if recursive and tag in self and data_element.VR == "SQ":
sequence = data_element.value
for dataset in sequence:
dataset.walk(callback)
__repr__ = __str__
class FileDataset(Dataset):
"""An extension of Dataset to make reading and writing to file-like easier.
Attributes
----------
preamble : str or bytes or None
The optional DICOM preamble prepended to the dataset, if available.
file_meta : pydicom.dataset.Dataset or None
The Dataset's file meta information as a Dataset, if available (None if
not present). Consists of group 0002 elements.
filename : str or None
The filename that the dataset was read from (if read from file) or None
if the filename is not available (if read from a BytesIO or similar).
fileobj_type
The object type of the file-like the Dataset was read from.
is_implicit_VR : bool
True if the dataset encoding is implicit VR, False otherwise.
is_little_endian : bool
True if the dataset encoding is little endian byte ordering, False
otherwise.
timestamp : float or None
The modification time of the file the dataset was read from, None if
the modification time is not available.
"""
def __init__(self, filename_or_obj, dataset, preamble=None, file_meta=None,
is_implicit_VR=True, is_little_endian=True):
"""Initialize a Dataset read from a DICOM file.
Parameters
----------
filename_or_obj : str or None
Full path and filename to the file. Use None if is a BytesIO.
dataset : Dataset or dict
Some form of dictionary, usually a Dataset from read_dataset().
preamble : bytes or str, optional
The 128-byte DICOM preamble.
file_meta : Dataset, optional
The file meta info dataset, as returned by _read_file_meta,
or an empty dataset if no file meta information is in the file.
is_implicit_VR : bool, optional
True (default) if implicit VR transfer syntax used; False if
explicit VR.
is_little_endian : boolean
True (default) if little-endian transfer syntax used; False if
big-endian.
"""
Dataset.__init__(self, dataset)
self.preamble = preamble
self.file_meta = file_meta
self.is_implicit_VR = is_implicit_VR
self.is_little_endian = is_little_endian
if isinstance(filename_or_obj, compat.string_types):
self.filename = filename_or_obj
self.fileobj_type = open
elif isinstance(filename_or_obj, io.BufferedReader):
self.filename = filename_or_obj.name
# This is the appropriate constructor for io.BufferedReader
self.fileobj_type = open
else:
self.fileobj_type = filename_or_obj.__class__ # use __class__ python <2.7?; http://docs.python.org/reference/datamodel.html
if getattr(filename_or_obj, "name", False):
self.filename = filename_or_obj.name
elif getattr(filename_or_obj, "filename", False): # gzip python <2.7?
self.filename = filename_or_obj.filename
else:
self.filename = None # e.g. came from BytesIO or something file-like
self.timestamp = None
if stat_available and self.filename and os.path.exists(self.filename):
statinfo = os.stat(self.filename)
self.timestamp = statinfo.st_mtime