Source code for fsleyes.plugins.controls.filetreepanel.query

#
# query.py - The FileTreeQuery class
#
# Author: Paul McCarthy <pauldmccarthy@gmail.com>
# Author: Michiel Cottaar <michiel.cottaar@.ndcn.ox.ac.uk>
#
"""This module contains the :class:`FileTreeQuery` class, which can be used to
search for files in a directory described by a `FileTree
<https://git.fmrib.ox.ac.uk/ndcn0236/file-tree/>`_. A ``FileTreeQuery`` object
returns :class:`Match` objects which each represent a file that is described
by the ``FileTree``, and which is present in the directory.

The following utility functions, used by the ``FileTreeQuery`` class, are also
defined in this module:

.. autosummary::
   :nosignatures:

   scan
"""


import              logging
import              collections
import functools as ft

import numpy     as np


log = logging.getLogger(__name__)


[docs]class FileTreeQuery: """The ``FileTreeQuery`` class uses a ``FileTree`` to search a directory for files which match a specific query. A ``FileTreeQuery`` scans the contents of a directory which is described by a ``FileTree``, and identifies all file types (a.k.a. *templates* or *short names*) that are present, and the values of variables within each short name that are present. The :meth:`query` method can be used to retrieve files which match a specific template, and variable values. The :meth:`query` method returns a collection of :class:`Match` objects, each of which represents one file which matches the query. Example usage:: >>> from file_tree import FileTree >>> from fsleyes.filetree import FileTreeQuery >>> tree = FileTree.read('bids_raw', './my_bids_data') >>> query = FileTreeQuery(tree) >>> query.axes('anat_image') ['acq', 'ext', 'modality', 'participant', 'rec', 'run_index', 'session'] >>> query.variables('anat_image') {'acq': [None], 'ext': ['.nii.gz'], 'modality': ['T1w', 'T2w'], 'participant': ['01', '02', '03'], 'rec': [None], 'run_index': [None, '01', '02', '03'], 'session': [None]} >>> query.query('anat_image', participant='01') [Match(./my_bids_data/sub-01/anat/sub-01_T1w.nii.gz), Match(./my_bids_data/sub-01/anat/sub-01_T2w.nii.gz)] Matches for templates contained within sub-trees are referred to by constructing a hierarchical path from the sub-tree template name(s), and the template name - see the :meth:`Match.full_name` method. """
[docs] def __init__(self, tree): """Create a ``FileTreeQuery``. The contents of the tree directory are scanned via the :func:`scan` function, which may take some time for large data sets. :arg tree: The ``FileTree`` object """ self.__tree = tree self.__matcharrays = scan(tree)
[docs] def axes(self, template): """Returns a list containing the names of variables present in files of the given ``template`` type. """ return list(self.__matcharrays[template].coords.keys())
[docs] def variables(self, template=None): """Return a dict of ``{variable : [values]}`` mappings. This dict describes all variables and their possible values in the tree. If a ``template`` is specified, only variables which are present in files of that ``template`` type are returned. """ if template is not None: templates = [template] else: templates = self.__matcharrays.keys() variables = collections.defaultdict(set) for template in templates: coords = self.__matcharrays[template].coords for axis in coords.keys(): varvalues = variables[axis] variables[axis] = varvalues.union(set(coords[axis].data)) # Variable values will usually be strings, # but can sometimes be None, so we convert # to str to handle this. variables = {name : sorted(vals, key=str) for name, vals in variables.items()} return variables
@property def tree(self): """Returns the ``FileTree`` associated with this ``FileTreeQuery``. """ return self.__tree @property def templates(self): """Returns a list containing all templates of the ``FileTree`` that are present in the directory. """ return list(self.__matcharrays.keys())
[docs] def matcharray(self, template): """Returns a reference to the ``xarray.DataArray`` which contains the file paths for the given ``template``. """ return self.__matcharrays[template]
[docs] def query(self, template, **variables): """Search for files of the given ``template``, which match the specified ``variables``. All hits are returned for variables that are unspecified. :arg template: Template of files to search for. All other arguments are assumed to be ``variable=value`` pairs, used to restrict which matches are returned. All values are returned for variables that are not specified, or variables which are given a value of ``'*'``. :returns: A list of ``Match`` objects """ # Build a slice containing a value for # every axis of the template array varnames = list(variables.keys()) allvarnames = self.variables(template).keys() matcharray = self.__matcharrays[ template] slc = [] for var in allvarnames: if var in varnames: val = variables[var] else: val = '*' if val == '*': slc.append(slice(None)) else: slc.append(val) # Retrieve the results results = matcharray.loc[tuple(slc)] # Convert xarray.DataArray into a list of # Match objects. I can't find an elegant # way to do this - something like apply_ufunc # would be nice, but we lose the labelling # information. matches = [] riter = np.nditer(results, flags=['multi_index']) for fname in riter: fname = fname.item() if fname == '': continue # Look up the variable values associated # with this file name, and create a # corresponding Match object index = riter.multi_index coords = results[index].coords rvars = {ax : coords[ax].data[()] for ax in coords} matches.append(Match(fname, template, rvars)) return matches
[docs]@ft.total_ordering class Match: """A ``Match`` object represents a file with a name matching a template in a ``FileTree``. The :meth:`FileTree.query` method returns ``Match`` objects. """
[docs] def __init__(self, filename, template, variables): """Create a ``Match`` object. All arguments are added as attributes. :arg template: template identifier :arg value: """ self.__filename = filename self.__template = template self.__variables = variables
@property def filename(self): return self.__filename @property def template(self): return self.__template @property def variables(self): return dict(self.__variables)
[docs] def __eq__(self, other): return (isinstance(other, Match) and self.filename == other.filename and self.template == other.template and self.variables == other.variables)
[docs] def __lt__(self, other): return isinstance(other, Match) and self.filename < other.filename
[docs] def __repr__(self): """Returns a string representation of this ``Match``. """ return 'Match({}: {})'.format(self.template, self.filename)
[docs] def __str__(self): """Returns a string representation of this ``Match``. """ return repr(self)
[docs]def scan(tree, filterEmpty=True): """Scans the directory of the given ``FileTree`` to find all files which match a tree template. A dictionary of ``{template : xarray.DataArray}`` mappings is returned, where each ``DataArray`` has dimensions corresponding to variables used in the template, and contains names (as strings) of matching files present on disk as strings. Entries in an arrays for variable values which do not exist on disk are set to the empty string. See the ``file_tree.FileTree.get_mult_glob`` method for more details. :arg tree: ``FileTree`` to scan :arg filterEmpty: If ``True`` (the default), file tree templates which do not match any files on disk are not returned. :returns: A dict of ``{template : xarray.DataArray}`` objects, one for each template. """ templates = tree.template_keys(only_leaves=True) xarrays = tree.get_mult_glob(templates) results = {} for template in templates: xa = xarrays[template] # Skip templates which do not have # any files present on disk if filterEmpty: if (xa == '').sum() == np.prod(xa.shape): continue results[template] = xa return results