Source code for pydicom.dataset

"""Define the Dataset and FileDataset classes.

The Dataset class represents the DICOM Dataset while the FileDataset class
adds extra functionality to Dataset when data is read from or written to file.

Overview of DICOM object model
------------------------------
Dataset (dict subclass)
  Contains DataElement instances, each of which has a tag, VR, VM and value.
    The DataElement value can be:
        * A single value, such as a number, string, etc. (i.e. VM = 1)
        * A list of numbers, strings, etc. (i.e. VM > 1)
        * A Sequence (list subclass), where each item is a Dataset which
            contains its own DataElements, and so on in a recursive manner.
"""
#
# Copyright (c) 2008-2013 Darcy Mason
# This file is part of pydicom, released under a modified MIT license.
#    See the file license.txt included with this distribution, also
#    available at https://github.com/darcymason/pydicom
#

import inspect  # for __dir__
import io
import os.path
import sys

from pydicom import compat
from pydicom.charset import default_encoding, convert_encodings
from pydicom.datadict import dictionary_VR
from pydicom.datadict import tag_for_keyword, keyword_for_tag, repeater_has_keyword
from pydicom.tag import Tag, BaseTag
from pydicom.dataelem import DataElement, DataElement_from_raw, RawDataElement
from pydicom.uid import NotCompressedPixelTransferSyntaxes, UncompressedPixelTransferSyntaxes
from pydicom.tagtools import tag_in_exception
import pydicom  # for write_file
import pydicom.charset
from pydicom.config import logger
import pydicom.encaps

sys_is_little_endian = (sys.byteorder == 'little')

have_numpy = True
try:
    import numpy
except ImportError:
    have_numpy = False

have_gdcm = True
try:
    import gdcm
except ImportError:
    have_gdcm = False

stat_available = True
try:
    from os import stat
except ImportError:
    stat_available = False
have_jpeg_ls = True

try:
    import jpeg_ls
except ImportError:
    have_jpeg_ls = False

have_pillow = True
try:
    from PIL import Image as PILImg
except ImportError:
    have_pillow = False
    # If that failed, try the alternate import syntax for PIL.
    try:
        import Image as PILImg
    except ImportError:
        # Neither worked, so it's likely not installed.
        have_pillow = False


class PropertyError(Exception):
    """For AttributeErrors caught in a property, so do not go to __getattr__"""
    #  http://docs.python.org/release/3.1.3/tutorial/errors.html#tut-userexceptions
    pass


[docs]class Dataset(dict): """A collection (dictionary) of DICOM DataElements. Examples -------- Add DataElements to the Dataset (for elements in the DICOM dictionary). >>> ds = Dataset() >>> ds.PatientName = "CITIZEN^Joan" >>> ds.add_new(0x00100020, 'LO', '12345') >>> ds[0x0010, 0x0030] = DataElement(0x00100030, 'DA', '20010101') Add Sequence DataElement to the Dataset >>> ds.BeamSequence = [Dataset(), Dataset(), Dataset()] >>> ds.BeamSequence[0].Manufacturer = "Linac, co." >>> ds.BeamSequence[1].Manufacturer = "Linac and Sons, co." >>> ds.BeamSequence[2].Manufacturer = "Linac and Daughters, co." Add private DataElements to the Dataset >>> ds.add(DataElement(0x0043102b, 'SS', [4, 4, 0, 0])) >>> ds.add_new(0x0043102b, 'SS', [4, 4, 0, 0]) >>> ds[0x0043, 0x102b] = DataElement(0x0043102b, 'SS', [4, 4, 0, 0]) Updating and retrieving DataElement values >>> ds.PatientName = "CITIZEN^Joan" >>> ds.PatientName 'CITIZEN^Joan" >>> ds.PatientName = "CITIZEN^John" >>> ds.PatientName 'CITIZEN^John' Retrieving a DataElement's value from a Sequence >>> ds.BeamSequence[0].Manufacturer 'Linac, co.' >>> ds.BeamSequence[1].Manufacturer 'Linac and Sons, co.' Retrieving DataElements >>> elem = ds[0x00100010] >>> elem = ds.data_element('PatientName') >>> elem (0010, 0010) Patient's Name PN: 'CITIZEN^Joan' Deleting a DataElement from the Dataset >>> del ds.PatientID >>> del ds.BeamSequence[1].Manufacturer >>> del ds.BeamSequence[2] Deleting a private DataElement from the Dataset >>> del ds[0x0043, 0x102b] Determining if a DataElement is present in the Dataset >>> 'PatientName' in ds True >>> 'PatientID' in ds False >>> 0x00100030 in ds True >>> 'Manufacturer' in ds.BeamSequence[0] True Iterating through the top level of a Dataset only (excluding Sequences) >>> for elem in ds: >>> print(elem) Iterating through the entire Dataset (including Sequences) >>> for elem in ds.iterall(): >>> print(elem) Recursively iterate through a Dataset (including Sequences) >>> def recurse(ds): >>> for elem in ds: >>> if elem.VR == 'SQ': >>> [recurse(item) for item in elem] >>> else: >>> # Do something useful with each DataElement Attributes ---------- default_element_format : str The default formatting for string display. default_sequence_element_format : str The default formatting for string display of sequences. indent_chars : str For string display, the characters used to indent nested Sequences. Default is " ". """ indent_chars = " " # Python 2: Classes which define __eq__ should flag themselves as unhashable __hash__ = None def __init__(self, *args, **kwargs): """Create a new Dataset instance.""" self._parent_encoding = kwargs.get('parent_encoding', default_encoding) dict.__init__(self, *args) def __enter__(self): """Method invoked on entry to a with statement.""" return self def __exit__(self, exc_type, exc_val, exc_tb): """Method invoked on exit from a with statement.""" return False
[docs] def add(self, data_element): """Add a DataElement to the Dataset. Equivalent to ds[data_element.tag] = data_element Parameters ---------- data_element : pydicom.dataelem.DataElement The DataElement to add to the Dataset. """ self[data_element.tag] = data_element
[docs] def add_new(self, tag, VR, value): """Add a DataElement to the Dataset. Parameters ---------- tag The DICOM (group, element) tag in any form accepted by pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010, etc. VR : str The 2 character DICOM value representation (see DICOM standard part 5, Section 6.2). value The value of the data element. One of the following: * a single string or number * a list or tuple with all strings or all numbers * a multi-value string with backslash separator * for a sequence DataElement, an empty list or list of Dataset """ data_element = DataElement(tag, VR, value) # use data_element.tag since DataElement verified it self[data_element.tag] = data_element
[docs] def data_element(self, name): """Return the DataElement corresponding to the element keyword `name`. Parameters ---------- name : str A DICOM element keyword. Returns ------- pydicom.dataelem.DataElement or None For the given DICOM element `keyword`, return the corresponding Dataset DataElement if present, None otherwise. """ tag = tag_for_keyword(name) # Test against None as (0000,0000) is a possible tag if tag is not None: return self[tag] return None
def __contains__(self, name): """Extend dict.__contains__() to handle DICOM keywords. This is called for code like: >>> 'SliceLocation' in ds True Parameters ---------- name : str or int or 2-tuple The Element keyword or tag to search for. Returns ------- bool True if the DataElement is in the Dataset, False otherwise. """ if isinstance(name, (str, compat.text_type)): tag = tag_for_keyword(name) else: try: tag = Tag(name) except: return False # Test against None as (0000,0000) is a possible tag if tag is not None: return dict.__contains__(self, tag) else: return dict.__contains__(self, name) # will no doubt raise an exception
[docs] def decode(self): """Apply character set decoding to all DataElements in the Dataset. See DICOM PS3.5-2008 6.1.1. """ # Find specific character set. 'ISO_IR 6' is default # May be multi-valued, but let pydicom.charset handle all logic on that dicom_character_set = self._character_set # Shortcut to the decode function in pydicom.charset decode_data_element = pydicom.charset.decode # Callback for walk(), to decode the chr strings if necessary # This simply calls the pydicom.charset.decode function def decode_callback(ds, data_element): """Callback to decode `data_element`.""" if data_element.VR == 'SQ': for dset in data_element.value: dset.decode() else: decode_data_element(data_element, dicom_character_set) self.walk(decode_callback, recursive=False)
def __delattr__(self, name): """Intercept requests to delete an attribute by `name`. If `name` is a DICOM keyword: Delete the corresponding DataElement from the Dataset. >>> del ds.PatientName Else: Delete the class attribute as any other class would do. >>> del ds._is_some_attribute Parameters ---------- name : str The keyword for the DICOM element or the class attribute to delete. """ # First check if a valid DICOM keyword and if we have that data element tag = tag_for_keyword(name) if tag is not None and tag in self: dict.__delitem__(self, tag) # direct to dict as we know we have key # If not a DICOM name in this dataset, check for regular instance name # can't do delete directly, that will call __delattr__ again elif name in self.__dict__: del self.__dict__[name] # Not found, raise an error in same style as python does else: raise AttributeError(name) def __delitem__(self, key): """Intercept requests to delete an attribute by key. Examples -------- Indexing using DataElement tag >>> ds = Dataset() >>> ds.CommandGroupLength = 100 >>> ds.PatientName = 'CITIZEN^Jan' >>> del ds[0x00000000] >>> ds (0010, 0010) Patient's Name PN: 'CITIZEN^Jan' Slicing using DataElement tag >>> ds = Dataset() >>> ds.CommandGroupLength = 100 >>> ds.SOPInstanceUID = '1.2.3' >>> ds.PatientName = 'CITIZEN^Jan' >>> del ds[:0x00100000] >>> ds (0010, 0010) Patient's Name PN: 'CITIZEN^Jan' Parameters ---------- key The key for the attribute to be deleted. If a slice is used then the tags matching the slice conditions will be deleted. """ # If passed a slice, delete the corresponding DataElements if isinstance(key, slice): for tag in self._slice_dataset(key.start, key.stop, key.step): del self[tag] else: # Assume is a standard tag (for speed in common case) try: dict.__delitem__(self, key) # If not a standard tag, than convert to Tag and try again except KeyError: tag = Tag(key) dict.__delitem__(self, tag) def __dir__(self): """Give a list of attributes available in the Dataset. List of attributes is used, for example, in auto-completion in editors or command-line environments. """ # Force zip object into a list in case of python3. Also backwards # compatible meths = set(list(zip(*inspect.getmembers(Dataset, inspect.isroutine)))[0]) props = set(list(zip(*inspect.getmembers(Dataset, inspect.isdatadescriptor)))[0]) dicom_names = set(self.dir()) alldir = sorted(props | meths | dicom_names) return alldir
[docs] def dir(self, *filters): """Return an alphabetical list of DataElement keywords in the Dataset. Intended mainly for use in interactive Python sessions. Only lists the DataElement keywords in the current level of the Dataset (i.e. the contents of any Sequence elements are ignored). Parameters ---------- filters : str Zero or more string arguments to the function. Used for case-insensitive match to any part of the DICOM keyword. Returns ------- list of str The matching DataElement keywords in the dataset. If no filters are used then all DataElement keywords are returned. """ allnames = [keyword_for_tag(tag) for tag in self.keys()] # remove blanks - tags without valid names (e.g. private tags) allnames = [x for x in allnames if x] # Store found names in a dict, so duplicate names appear only once matches = {} for filter_ in filters: filter_ = filter_.lower() match = [x for x in allnames if x.lower().find(filter_) != -1] matches.update(dict([(x, 1) for x in match])) if filters: names = sorted(matches.keys()) return names else: return sorted(allnames)
def __eq__(self, other): """Compare `self` and `other` for equality. Returns ------- bool The result if `self` and `other` are the same class NotImplemented If `other` is not the same class as `self` then returning NotImplemented delegates the result to superclass.__eq__(subclass) """ # When comparing against self this will be faster if other is self: return True if isinstance(other, self.__class__): # Compare Elements using values() and class variables using __dict__ # Convert values() to a list for compatibility between # python 2 and 3 return (list(self.values()) == list(other.values()) and self.__dict__ == other.__dict__) return NotImplemented
[docs] def get(self, key, default=None): """Extend dict.get() to handle DICOM DataElement keywords. Parameters ---------- key : str or pydicom.tag.Tag The element keyword or Tag or the class attribute name to get. default : obj or None If the DataElement or class attribute is not present, return `default` (default None). Returns ------- value If `key` is the keyword for a DataElement in the Dataset then return the DataElement's value. pydicom.dataelem.DataElement If `key` is a tag for a DataElement in the Dataset then return the DataElement instance. value If `key` is a class attribute then return its value. """ if isinstance(key, (str, compat.text_type)): try: return getattr(self, key) except AttributeError: return default else: # is not a string, try to make it into a tag and then hand it # off to the underlying dict if not isinstance(key, BaseTag): try: key = Tag(key) except: raise TypeError("Dataset.get key must be a string or tag") try: return_val = self.__getitem__(key) except KeyError: return_val = default return return_val
def __getattr__(self, name): """Intercept requests for Dataset attribute names. If `name` matches a DICOM keyword, return the value for the DataElement with the corresponding tag. Parameters ---------- name A DataElement keyword or tag or a class attribute name. Returns ------- value If `name` matches a DICOM keyword, returns the corresponding DataElement's value. Otherwise returns the class attribute's value (if present). """ tag = tag_for_keyword(name) if tag is None: # `name` isn't a DICOM element keyword # Try the base class attribute getter (fix for issue 332) return super(Dataset, self).__getattribute__(name) tag = Tag(tag) if tag not in self: # DICOM DataElement not in the Dataset # Try the base class attribute getter (fix for issue 332) return super(Dataset, self).__getattribute__(name) else: return self[tag].value @property def _character_set(self): """The Dataset's SpecificCharacterSet value (if present).""" char_set = self.get('SpecificCharacterSet', None) if not char_set: char_set = self._parent_encoding else: char_set = convert_encodings(char_set) return char_set def __getitem__(self, key): """Operator for Dataset[key] request. Any deferred data elements will be read in and an attempt will be made to correct any elements with ambiguous VRs. Examples -------- Indexing using DataElement tag >>> ds = Dataset() >>> ds.SOPInstanceUID = '1.2.3' >>> ds.PatientName = 'CITIZEN^Jan' >>> ds.PatientID = '12345' >>> ds[0x00100010] 'CITIZEN^Jan' Slicing using DataElement tag All group 0x0010 elements in the dataset >>> ds[0x00100000:0x0011000] (0010, 0010) Patient's Name PN: 'CITIZEN^Jan' (0010, 0020) Patient ID LO: '12345' All group 0x0002 elements in the dataset >>> ds[(0x0002, 0x0000):(0x0003, 0x0000)] Parameters ---------- key The DICOM (group, element) tag in any form accepted by pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010, etc. May also be a slice made up of DICOM tags. Returns ------- pydicom.dataelem.DataElement or pydicom.dataset.Dataset If a single DICOM element tag is used then returns the corresponding DataElement. If a slice is used then returns a Dataset object containing the corresponding DataElements. """ # If passed a slice, return a Dataset containing the corresponding # DataElements if isinstance(key, slice): ds = Dataset() for tag in self._slice_dataset(key.start, key.stop, key.step): ds.add(self[tag]) return ds tag = Tag(key) data_elem = dict.__getitem__(self, tag) if isinstance(data_elem, DataElement): return data_elem elif isinstance(data_elem, tuple): # If a deferred read, then go get the value now if data_elem.value is None: from pydicom.filereader import read_deferred_data_element data_elem = read_deferred_data_element(self.fileobj_type, self.filename, self.timestamp, data_elem) if tag != (0x08, 0x05): character_set = self._character_set else: character_set = default_encoding # Not converted from raw form read from file yet; do so now self[tag] = DataElement_from_raw(data_elem, character_set) # If the Element has an ambiguous VR, try to correct it if 'or' in self[tag].VR: from pydicom.filewriter import correct_ambiguous_vr_element self[tag] = correct_ambiguous_vr_element(self[tag], self, data_elem[6]) return dict.__getitem__(self, tag)
[docs] def get_item(self, key): """Return the raw data element if possible. It will be raw if the user has never accessed the value, or set their own value. Note if the data element is a deferred-read element, then it is read and converted before being returned. Parameters ---------- key The DICOM (group, element) tag in any form accepted by pydicom.tag.Tag such as [0x0010, 0x0010], (0x10, 0x10), 0x00100010, etc. Returns ------- pydicom.dataelem.DataElement """ tag = Tag(key) data_elem = dict.__getitem__(self, tag) # If a deferred read, return using __getitem__ to read and convert it if isinstance(data_elem, tuple) and data_elem.value is None: return self[key] return data_elem
[docs] def group_dataset(self, group): """Return a Dataset containing only DataElements of a certain group. Parameters ---------- group : int The group part of a DICOM (group, element) tag. Returns ------- pydicom.dataset.Dataset A dataset instance containing elements of the group specified. """ return self[(group, 0x0000):(group + 1, 0x0000)]
def __iter__(self): """Iterate through the top-level of the Dataset, yielding DataElements. >>> for elem in ds: >>> print(elem) The DataElements are returned in increasing tag value order. Sequence items are returned as a single DataElement, so it is up to the calling code to recurse into the Sequence items if desired. Yields ------ pydicom.dataelem.DataElement The Dataset's DataElements, sorted by increasing tag order. """ # Note this is different than the underlying dict class, # which returns the key of the key:value mapping. # Here the value is returned (but data_element.tag has the key) taglist = sorted(self.keys()) for tag in taglist: yield self[tag] def _is_uncompressed_transfer_syntax(self): """Return True if the TransferSyntaxUID is a compressed syntax.""" # FIXME uses file_meta here, should really only be thus for FileDataset return self.file_meta.TransferSyntaxUID in NotCompressedPixelTransferSyntaxes def __ne__(self, other): """Compare `self` and `other` for inequality.""" return not (self == other) def _pixel_data_numpy(self): """If NumPy is available, return an ndarray of the Pixel Data. Falls back to GDCM in case of unsupported transfer syntaxes. Raises ------ TypeError If there is no Pixel Data or not a supported data type. ImportError If NumPy isn't found, or in the case of fallback, if GDCM isn't found. Returns ------- numpy.ndarray The contents of the Pixel Data element (7FE0,0010) as an ndarray. """ if not self._is_uncompressed_transfer_syntax(): if not have_gdcm: raise NotImplementedError("Pixel Data is compressed in a " "format pydicom does not yet handle. " "Cannot return array. Pydicom might " "be able to convert the pixel data " "using GDCM if it is installed.") elif not self.filename: raise NotImplementedError("GDCM is only supported when the " "dataset has been created with a " "filename.") if not have_numpy: msg = "The Numpy package is required to use pixel_array, and " \ "numpy could not be imported." raise ImportError(msg) if 'PixelData' not in self: raise TypeError("No pixel data found in this dataset.") # There are two cases: # 1) uncompressed PixelData -> use numpy # 2) compressed PixelData, filename is available and GDCM is # available -> use GDCM if self._is_uncompressed_transfer_syntax(): # Make NumPy format code, e.g. "uint16", "int32" etc # from two pieces of info: # self.PixelRepresentation -- 0 for unsigned, 1 for signed; # self.BitsAllocated -- 8, 16, or 32 format_str = '%sint%d' % (('u', '')[self.PixelRepresentation], self.BitsAllocated) try: numpy_dtype = numpy.dtype(format_str) except TypeError: msg = ("Data type not understood by NumPy: " "format='%s', PixelRepresentation=%d, BitsAllocated=%d") raise TypeError(msg % (format_str, self.PixelRepresentation, self.BitsAllocated)) if self.is_little_endian != sys_is_little_endian: numpy_dtype = numpy_dtype.newbyteorder('S') pixel_bytearray = self.PixelData elif have_gdcm and self.filename: # read the file using GDCM # FIXME this should just use self.PixelData instead of self.filename # but it is unclear how this should be achieved using GDCM gdcm_image_reader = gdcm.ImageReader() gdcm_image_reader.SetFileName(self.filename) if not gdcm_image_reader.Read(): raise TypeError("GDCM could not read DICOM image") gdcm_image = gdcm_image_reader.GetImage() # determine the correct numpy datatype gdcm_numpy_typemap = { gdcm.PixelFormat.INT8: numpy.int8, gdcm.PixelFormat.UINT8: numpy.uint8, gdcm.PixelFormat.UINT16: numpy.uint16, gdcm.PixelFormat.INT16: numpy.int16, gdcm.PixelFormat.UINT32: numpy.uint32, gdcm.PixelFormat.INT32: numpy.int32, gdcm.PixelFormat.FLOAT32: numpy.float32, gdcm.PixelFormat.FLOAT64: numpy.float64 } gdcm_pixel_format = gdcm_image.GetPixelFormat().GetScalarType() if gdcm_pixel_format in gdcm_numpy_typemap: numpy_dtype = gdcm_numpy_typemap[gdcm_pixel_format] else: raise TypeError('{0} is not a GDCM supported ' 'pixel format'.format(gdcm_pixel_format)) # GDCM returns char* as type str. Under Python 2 `str` are # byte arrays by default. Python 3 decodes this to # unicode strings by default. # The SWIG docs mention that they always decode byte streams # as utf-8 strings for Python 3, with the `surrogateescape` # error handler configured. # Therefore, we can encode them back to their original bytearray # representation on Python 3 by using the same parameters. pixel_bytearray = gdcm_image.GetBuffer() if sys.version_info >= (3, 0): pixel_bytearray = pixel_bytearray.encode("utf-8", "surrogateescape") # if GDCM indicates that a byte swap is in order, make # sure to inform numpy as well if gdcm_image.GetNeedByteSwap(): numpy_dtype = numpy_dtype.newbyteorder('S') # Here we need to be careful because in some cases, GDCM reads a # buffer that is too large, so we need to make sure we only include # the first n_rows * n_columns * dtype_size bytes. n_bytes = self.Rows * self.Columns * numpy.dtype(numpy_dtype).itemsize if len(pixel_bytearray) > n_bytes: # We make sure that all the bytes after are in fact zeros padding = pixel_bytearray[n_bytes:] if numpy.any(numpy.fromstring(padding, numpy.byte)): pixel_bytearray = pixel_bytearray[:n_bytes] else: # We revert to the old behavior which should then result # in a Numpy error later on. pass pixel_array = numpy.fromstring(pixel_bytearray, dtype=numpy_dtype) length_of_pixel_array = pixel_array.nbytes expected_length = self.Rows * self.Columns if 'NumberOfFrames' in self and self.NumberOfFrames > 1: expected_length *= self.NumberOfFrames if 'SamplesPerPixel' in self and self.SamplesPerPixel > 1: expected_length *= self.SamplesPerPixel if self.BitsAllocated > 8: expected_length *= (self.BitsAllocated // 8) if length_of_pixel_array != expected_length: raise AttributeError("Amount of pixel data %d does not match the expected data %d" % (length_of_pixel_array, expected_length)) # Note the following reshape operations return a new *view* onto # pixel_array, but don't copy the data if 'NumberOfFrames' in self and self.NumberOfFrames > 1: if self.SamplesPerPixel > 1: # TODO: Handle Planar Configuration attribute assert self.PlanarConfiguration == 0 pixel_array = pixel_array.reshape(self.NumberOfFrames, self.Rows, self.Columns, self.SamplesPerPixel) else: pixel_array = pixel_array.reshape(self.NumberOfFrames, self.Rows, self.Columns) else: if self.SamplesPerPixel > 1: if self.BitsAllocated == 8: if self.PlanarConfiguration == 0: pixel_array = pixel_array.reshape(self.Rows, self.Columns, self.SamplesPerPixel) else: pixel_array = pixel_array.reshape(self.SamplesPerPixel, self.Rows, self.Columns) pixel_array = pixel_array.transpose(1, 2, 0) else: raise NotImplementedError("This code only handles " "SamplesPerPixel > 1 if Bits " "Allocated = 8") else: pixel_array = pixel_array.reshape(self.Rows, self.Columns) return pixel_array def _compressed_pixel_data_numpy(self): """Return a NumPy array of the Pixel Data. NumPy is a numerical package for python. It is used if available. Returns ------- numpy.ndarray The Pixel Data as an array. Raises ------ TypeError If no Pixel Data element in the dataset. ImportError If cannot import numpy. """ if 'PixelData' not in self: raise TypeError("No pixel data found in this dataset.") if not have_numpy: msg = "The Numpy package is required to use pixel_array, and " \ "numpy could not be imported." raise ImportError(msg) # determine the type used for the array need_byteswap = (self.is_little_endian != sys_is_little_endian) # Make NumPy format code, e.g. "uint16", "int32" etc # from two pieces of info: # self.PixelRepresentation -- 0 for unsigned, 1 for signed; # self.BitsAllocated -- 8, 16, or 32 format_str = '%sint%d' % (('u', '')[self.PixelRepresentation], self.BitsAllocated) try: numpy_format = numpy.dtype(format_str) except TypeError: msg = ("Data type not understood by NumPy: " "format='%s', PixelRepresentation=%d, BitsAllocated=%d") raise TypeError(msg % (format_str, self.PixelRepresentation, self.BitsAllocated)) if self.file_meta.TransferSyntaxUID in pydicom.uid.PILSupportedCompressedPixelTransferSyntaxes: UncompressedPixelData = self._get_PIL_supported_compressed_pixeldata() elif self.file_meta.TransferSyntaxUID in pydicom.uid.JPEGLSSupportedCompressedPixelTransferSyntaxes: UncompressedPixelData = self._get_jpeg_ls_supported_compressed_pixeldata() else: msg = "The transfer syntax {0} is not currently supported.".format(self.file_meta.TransferSyntaxUID) raise NotImplementedError(msg) # Have correct Numpy format, so create the NumPy array arr = numpy.fromstring(UncompressedPixelData, numpy_format) # XXX byte swap - may later handle this in read_file!!? if need_byteswap: arr.byteswap(True) # True means swap in-place, don't make a new copy # Note the following reshape operations return a new *view* onto arr, but don't copy the data if 'NumberOfFrames' in self and self.NumberOfFrames > 1: if self.SamplesPerPixel > 1: arr = arr.reshape(self.NumberOfFrames, self.Rows, self.Columns, self.SamplesPerPixel) else: arr = arr.reshape(self.NumberOfFrames, self.Rows, self.Columns) else: if self.SamplesPerPixel > 1: if self.BitsAllocated == 8: if self.PlanarConfiguration == 0: arr = arr.reshape(self.Rows, self.Columns, self.SamplesPerPixel) else: arr = arr.reshape(self.SamplesPerPixel, self.Rows, self.Columns) arr = arr.transpose(1, 2, 0) else: raise NotImplementedError("This code only handles " "SamplesPerPixel > 1 if Bits " "Allocated = 8") else: arr = arr.reshape(self.Rows, self.Columns) if self.file_meta.TransferSyntaxUID in pydicom.uid.JPEG2000CompressedPixelTransferSyntaxes and self.BitsStored == 16: # WHY IS THIS EVEN NECESSARY?? arr &= 0x7FFF return arr def _get_PIL_supported_compressed_pixeldata(self): """Use PIL to decompress compressed Pixel Data. Returns ------- bytes or str The decompressed Pixel Data Raises ------ ImportError If PIL is not available. NotImplementedError If unable to decompress the Pixel Data. """ if not have_pillow: msg = "The pillow package is required to use pixel_array for " \ "this transfer syntax {0}, and pillow could not be " \ "imported.".format(self.file_meta.TransferSyntaxUID) raise ImportError(msg) # decompress here if self.file_meta.TransferSyntaxUID in pydicom.uid.JPEGLossyCompressedPixelTransferSyntaxes: if self.BitsAllocated > 8: raise NotImplementedError("JPEG Lossy only supported if Bits " "Allocated = 8") generic_jpeg_file_header = b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00\x01\x00\x01\x00\x00' frame_start_from = 2 elif self.file_meta.TransferSyntaxUID in pydicom.uid.JPEG2000CompressedPixelTransferSyntaxes: generic_jpeg_file_header = b'' # generic_jpeg_file_header = b'\x00\x00\x00\x0C\x6A\x50\x20\x20\x0D\x0A\x87\x0A' frame_start_from = 0 else: generic_jpeg_file_header = b'' frame_start_from = 0 try: UncompressedPixelData = '' if 'NumberOfFrames' in self and self.NumberOfFrames > 1: # multiple compressed frames CompressedPixelDataSeq = pydicom.encaps.decode_data_sequence(self.PixelData) for frame in CompressedPixelDataSeq: data = generic_jpeg_file_header + frame[frame_start_from:] fio = io.BytesIO(data) try: decompressed_image = PILImg.open(fio) except IOError as e: try: message = str(e) except: try: message = unicode(e) except: message = '' raise NotImplementedError(message) UncompressedPixelData += decompressed_image.tobytes() else: # single compressed frame UncompressedPixelData = pydicom.encaps.defragment_data(self.PixelData) UncompressedPixelData = generic_jpeg_file_header + UncompressedPixelData[frame_start_from:] try: fio = io.BytesIO(UncompressedPixelData) decompressed_image = PILImg.open(fio) except IOError as e: try: message = str(e) except: try: message = unicode(e) except: message = '' raise NotImplementedError(message) UncompressedPixelData = decompressed_image.tobytes() except: raise return UncompressedPixelData def _get_jpeg_ls_supported_compressed_pixeldata(self): """Use jpeg_ls to decompress compressed Pixel Data. Returns ------- bytes or str The decompressed Pixel Data Raises ------ ImportError If jpeg_ls is not available. """ if not have_jpeg_ls: msg = "The jpeg_ls package is required to use pixel_array for " \ "this transfer syntax {0}, and jpeg_ls could not be " \ "imported.".format(self.file_meta.TransferSyntaxUID) raise ImportError(msg) # decompress here UncompressedPixelData = '' if 'NumberOfFrames' in self and self.NumberOfFrames > 1: # multiple compressed frames CompressedPixelDataSeq = pydicom.encaps.decode_data_sequence(self.PixelData) # print len(CompressedPixelDataSeq) for frame in CompressedPixelDataSeq: decompressed_image = jpeg_ls.decode(numpy.fromstring(frame, dtype=numpy.uint8)) UncompressedPixelData += decompressed_image.tobytes() else: # single compressed frame CompressedPixelData = pydicom.encaps.defragment_data(self.PixelData) decompressed_image = jpeg_ls.decode(numpy.fromstring(CompressedPixelData, dtype=numpy.uint8)) UncompressedPixelData = decompressed_image.tobytes() return UncompressedPixelData # Use by pixel_array property def _get_pixel_array(self): """Convert the Pixel Data to a numpy array. Returns ------- numpy.ndarray The array containing the Pixel Data. """ # Check if already have converted to a NumPy array # Also check if self.PixelData has changed. If so, get new NumPy array already_have = True if not hasattr(self, "_pixel_array"): already_have = False elif self._pixel_id != id(self.PixelData): already_have = False if not already_have and not self._is_uncompressed_transfer_syntax(): try: # print("Pixel Data is compressed") self._pixel_array = self._compressed_pixel_data_numpy() self._pixel_id = id(self.PixelData) # is this guaranteed to work if memory is re-used?? return self._pixel_array except Exception as I: logger.info("Pillow or JPLS did not support this transfer syntax") if not already_have: self._pixel_array = self._pixel_data_numpy() self._pixel_id = id(self.PixelData) # is this guaranteed to work if memory is re-used?? return self._pixel_array @property def pixel_array(self): """Return the Pixel Data as a NumPy array. Returns ------- numpy.ndarray The Pixel Data (7FE0,0010) as a NumPy ndarray. """ return self._get_pixel_array() # Format strings spec'd according to python string formatting options # See http://docs.python.org/library/stdtypes.html#string-formatting-operations default_element_format = "%(tag)s %(name)-35.35s %(VR)s: %(repval)s" default_sequence_element_format = "%(tag)s %(name)-35.35s %(VR)s: %(repval)s"
[docs] def formatted_lines(self, element_format=default_element_format, sequence_element_format=default_sequence_element_format, indent_format=None): """Iterate through the Dataset yielding formatted str for each element. Parameters ---------- element_format : str The string format to use for non-sequence elements. Formatting uses the attributes of DataElement. Default is "%(tag)s %(name)-35.35s %(VR)s: %(repval)s". sequence_element_format : str The string format to use for sequence elements. Formatting uses the attributes of DataElement. Default is "%(tag)s %(name)-35.35s %(VR)s: %(repval)s" indent_format : str or None Placeholder for future functionality. Yields ------ str A string representation of a DataElement. """ for data_element in self.iterall(): # Get all the attributes possible for this data element (e.g. # gets descriptive text name too) # This is the dictionary of names that can be used in the format string elem_dict = dict([(x, getattr(data_element, x)() if callable(getattr(data_element, x)) else getattr(data_element, x)) for x in dir(data_element) if not x.startswith("_")]) if data_element.VR == "SQ": yield sequence_element_format % elem_dict else: yield element_format % elem_dict
def _pretty_str(self, indent=0, top_level_only=False): """Return a string of the DataElements in the Dataset, with indented levels. This private method is called by the __str__() method for handling print statements or str(dataset), and the __repr__() method. It is also used by top(), which is the reason for the top_level_only flag. This function recurses, with increasing indentation levels. Parameters ---------- index : int The indent level offset (default 0) top_level_only : bool When True, only create a string for the top level elements, i.e. exclude elements within any Sequences (default False). Returns ------- str A string representation of the Dataset. """ strings = [] indent_str = self.indent_chars * indent nextindent_str = self.indent_chars * (indent + 1) for data_element in self: with tag_in_exception(data_element.tag): if data_element.VR == "SQ": # a sequence strings.append(indent_str + str(data_element.tag) + " %s %i item(s) ---- " %(data_element.description(), len(data_element.value))) if not top_level_only: for dataset in data_element.value: strings.append(dataset._pretty_str(indent + 1)) strings.append(nextindent_str + "---------") else: strings.append(indent_str + repr(data_element)) return "\n".join(strings)
[docs] def remove_private_tags(self): """Remove all private DataElements in the Dataset.""" def RemoveCallback(dataset, data_element): """Internal method to use as callback to walk() method.""" if data_element.tag.is_private: # can't del self[tag] - won't be right dataset on recursion del dataset[data_element.tag] self.walk(RemoveCallback)
[docs] def save_as(self, filename, write_like_original=True): """Write the Dataset to `filename`. Saving a Dataset requires that the Dataset.is_implicit_VR and Dataset.is_little_endian attributes exist and are set appropriately. If Dataset.file_meta.TransferSyntaxUID is present then it should be set to a consistent value to ensure conformance. Conformance with DICOM File Format ---------------------------------- If `write_like_original` is False, the Dataset will be stored in the DICOM File Format in accordance with DICOM Standard Part 10 Section 7. To do so requires that the `Dataset.file_meta` attribute exists and contains a Dataset with the required (Type 1) File Meta Information Group elements (see pydicom.filewriter.write_file and pydicom.filewriter.write_file_meta_info for more information). If `write_like_original` is True then the Dataset will be written as is (after minimal validation checking) and may or may not contain all or parts of the File Meta Information (and hence may or may not be conformant with the DICOM File Format). Parameters ---------- filename : str or file-like Name of file or the file-like to write the new DICOM file to. write_like_original : bool If True (default), preserves the following information from the Dataset (and may result in a non-conformant file): - preamble -- if the original file has no preamble then none will be written. - file_meta -- if the original file was missing any required File Meta Information Group elements then they will not be added or written. If (0002,0000) 'File Meta Information Group Length' is present then it may have its value updated. - seq.is_undefined_length -- if original had delimiters, write them now too, instead of the more sensible length characters - is_undefined_length_sequence_item -- for datasets that belong to a sequence, write the undefined length delimiters if that is what the original had. If False, produces a file conformant with the DICOM File Format, with explicit lengths for all elements. See Also -------- pydicom.filewriter.write_dataset Write a DICOM Dataset to a file. pydicom.filewriter.write_file_meta_info Write the DICOM File Meta Information Group elements to a file. pydicom.filewriter.write_file Write a DICOM file from a FileDataset instance. """ # Ensure is_little_endian and is_implicit_VR exist if not (hasattr(self, 'is_little_endian') and hasattr(self, 'is_implicit_VR')): raise AttributeError("'{0}.is_little_endian' and " "'{0}.is_implicit_VR' must exist and be " "set appropriately before " "saving.".format(self.__class__.__name__)) pydicom.write_file(filename, self, write_like_original)
def __setattr__(self, name, value): """Intercept any attempts to set a value for an instance attribute. If name is a DICOM keyword, set the corresponding tag and DataElement. Else, set an instance (python) attribute as any other class would do. Parameters ---------- name : str The element keyword for the DataElement you wish to add/change. If `name` is not a DICOM element keyword then this will be the name of the attribute to be added/changed. value The value for the attribute to be added/changed. """ tag = tag_for_keyword(name) if tag is not None: # successfully mapped name to a tag if tag not in self: # don't have this tag yet->create the data_element instance VR = dictionary_VR(tag) data_element = DataElement(tag, VR, value) else: # already have this data_element, just changing its value data_element = self[tag] data_element.value = value # Now have data_element - store it in this dict self[tag] = data_element elif repeater_has_keyword(name): # Check if `name` is repeaters element raise ValueError('{} is a DICOM repeating group element and must ' 'be added using the add() or add_new() methods.' .format(name)) else: # name not in dicom dictionary - setting a non-dicom instance attribute # XXX note if user mis-spells a dicom data_element - no error!!! super(Dataset, self).__setattr__(name, value) def __setitem__(self, key, value): """Operator for Dataset[key] = value. Check consistency, and deal with private tags. Parameters ---------- key : int The tag for the element to be added to the Dataset. value : pydicom.dataelem.DataElement or pydicom.dataelem.RawDataElement The element to add to the Dataset. Raises ------ NotImplementedError If `key` is a slice. ValueError If the `key` value doesn't match DataElement.tag. """ if isinstance(key, slice): raise NotImplementedError('Slicing is not supported for setting ' 'Dataset elements.') # OK if is subclass, e.g. DeferredDataElement if not isinstance(value, (DataElement, RawDataElement)): raise TypeError("Dataset contents must be DataElement instances.") tag = Tag(value.tag) if key != tag: raise ValueError("DataElement.tag must match the dictionary key") data_element = value if tag.is_private: # See PS 3.5-2008 section 7.8.1 (p. 44) for how blocks are reserved logger.debug("Setting private tag %r" % tag) private_block = tag.elem >> 8 private_creator_tag = Tag(tag.group, private_block) if private_creator_tag in self and tag != private_creator_tag: if isinstance(data_element, RawDataElement): data_element = DataElement_from_raw(data_element, self._character_set) data_element.private_creator = self[private_creator_tag].value dict.__setitem__(self, tag, data_element) def _slice_dataset(self, start, stop, step): """Return the element tags in the Dataset that match the slice. Parameters ---------- start : int or None The slice's starting element tag value. stop : int or None The slice's stopping element tag value. step : int or None The slice's step size. Returns ------ list of pydicom.tag.Tag The tags in the Dataset that meet the conditions of the slice. """ # Check the starting/stopping Tags are valid when used if start and Tag(start): pass if stop and Tag(stop): pass all_tags = sorted(self.keys()) # If the Dataset is empty, return an empty list if not all_tags: return [] # Ensure we have valid Tags when start/stop are None if start is None: start = all_tags[0] if stop is None: stop = all_tags[-1] + 1 slice_tags = [tag for tag in all_tags if Tag(start) <= tag < Tag(stop)] return slice_tags[::step] def __str__(self): """Handle str(dataset).""" return self._pretty_str()
[docs] def top(self): """Return a str of the Dataset's top level DataElements only.""" return self._pretty_str(top_level_only=True)
[docs] def trait_names(self): """Return a list of valid names for auto-completion code. Used in IPython, so that data element names can be found and offered for autocompletion on the IPython command line. """ return dir(self) # only valid python >=2.6, else use self.__dir__()
[docs] def update(self, dictionary): """Extend dict.update() to handle DICOM keywords.""" for key, value in list(dictionary.items()): if isinstance(key, (str, compat.text_type)): setattr(self, key, value) else: self[Tag(key)] = value
[docs] def iterall(self): """Iterate through the Dataset, yielding all DataElements. Unlike Dataset.__iter__, this *does* recurse into sequences, and so returns all data elements as if the file were "flattened". Yields ------ pydicom.dataelem.DataElement """ for data_element in self: yield data_element if data_element.VR == "SQ": sequence = data_element.value for dataset in sequence: for elem in dataset.iterall(): yield elem
[docs] def walk(self, callback, recursive=True): """Iterate through the DataElements and run `callback` on each. Visit all DataElements, possibly recursing into sequences and their datasets. The callback function is called for each DataElement (including SQ element). Can be used to perform an operation on certain types of DataElements. E.g., `remove_private_tags`() finds all private tags and deletes them. DataElement`s will come back in DICOM order (by increasing tag number within their dataset). Parameters ---------- callback A callable that takes two arguments: * a Dataset * a DataElement belonging to that Dataset recursive : bool Flag to indicate whether to recurse into Sequences. """ taglist = sorted(self.keys()) for tag in taglist: with tag_in_exception(tag): data_element = self[tag] callback(self, data_element) # self = this Dataset # 'tag in self' below needed in case callback deleted data_element if recursive and tag in self and data_element.VR == "SQ": sequence = data_element.value for dataset in sequence: dataset.walk(callback)
__repr__ = __str__
class FileDataset(Dataset): """An extension of Dataset to make reading and writing to file-like easier. Attributes ---------- preamble : str or bytes or None The optional DICOM preamble prepended to the dataset, if available. file_meta : pydicom.dataset.Dataset or None The Dataset's file meta information as a Dataset, if available (None if not present). Consists of group 0002 elements. filename : str or None The filename that the dataset was read from (if read from file) or None if the filename is not available (if read from a BytesIO or similar). fileobj_type The object type of the file-like the Dataset was read from. is_implicit_VR : bool True if the dataset encoding is implicit VR, False otherwise. is_little_endian : bool True if the dataset encoding is little endian byte ordering, False otherwise. timestamp : float or None The modification time of the file the dataset was read from, None if the modification time is not available. """ def __init__(self, filename_or_obj, dataset, preamble=None, file_meta=None, is_implicit_VR=True, is_little_endian=True): """Initialize a Dataset read from a DICOM file. Parameters ---------- filename_or_obj : str or None Full path and filename to the file. Use None if is a BytesIO. dataset : Dataset or dict Some form of dictionary, usually a Dataset from read_dataset(). preamble : bytes or str, optional The 128-byte DICOM preamble. file_meta : Dataset, optional The file meta info dataset, as returned by _read_file_meta, or an empty dataset if no file meta information is in the file. is_implicit_VR : bool, optional True (default) if implicit VR transfer syntax used; False if explicit VR. is_little_endian : boolean True (default) if little-endian transfer syntax used; False if big-endian. """ Dataset.__init__(self, dataset) self.preamble = preamble self.file_meta = file_meta self.is_implicit_VR = is_implicit_VR self.is_little_endian = is_little_endian if isinstance(filename_or_obj, compat.string_types): self.filename = filename_or_obj self.fileobj_type = open elif isinstance(filename_or_obj, io.BufferedReader): self.filename = filename_or_obj.name # This is the appropriate constructor for io.BufferedReader self.fileobj_type = open else: self.fileobj_type = filename_or_obj.__class__ # use __class__ python <2.7?; http://docs.python.org/reference/datamodel.html if getattr(filename_or_obj, "name", False): self.filename = filename_or_obj.name elif getattr(filename_or_obj, "filename", False): # gzip python <2.7? self.filename = filename_or_obj.filename else: self.filename = None # e.g. came from BytesIO or something file-like self.timestamp = None if stat_available and self.filename and os.path.exists(self.filename): statinfo = os.stat(self.filename) self.timestamp = statinfo.st_mtime