Source code for navis.io.nmx_io

#    This script is part of navis (http://www.github.com/navis-org/navis).
#    Copyright (C) 2018 Philipp Schlegel
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

import io

import networkx as nx
import pandas as pd
import xml.etree.ElementTree as ET

from typing import Union, Dict, Optional, Any, IO, Iterable
from zipfile import ZipFile

from .. import config, core
from . import base


__all__ = ["read_nmx", "read_nml"]

# Set up logging
logger = config.get_logger(__name__)

NODE_COLUMNS = ('node_id', 'label', 'x', 'y', 'z', 'radius', 'parent_id')
DEFAULT_PRECISION = 32
DEFAULT_FMT = "{name}.nmx"


class NMLReader(base.BaseReader):
    def __init__(
        self,
        precision: int = DEFAULT_PRECISION,
        attrs: Optional[Dict[str, Any]] = None
    ):
        super().__init__(fmt='',
                         attrs=attrs,
                         file_ext='.nml',
                         read_binary=False,
                         name_fallback='NML')

        int_, float_ = base.parse_precision(precision)
        self._dtypes = {
            'node_id': int_,
            'parent_id': int_,
            'label': 'category',
            'x': float_,
            'y': float_,
            'z': float_,
            'radius': float_,
        }

    def read_buffer(
        self, f: IO, attrs: Optional[Dict[str, Any]] = None
    ) -> 'core.TreeNeuron':
        """Read .nml buffer into a TreeNeuron.

        NML files are XML-encoded files containing data for a single neuron.

        Parameters
        ----------
        f :         IO
                    Readable buffer (must be bytes).
        attrs :     dict | None
                    Arbitrary attributes to include in the TreeNeuron.

        Returns
        -------
        core.TreeNeuron
        """
        return self.read_nml(f.read(), attrs=attrs)

    def read_nml(
        self, f: IO, attrs: Optional[Dict[str, Any]] = None
    ) -> 'core.TreeNeuron':
        """Read .nml buffer into a TreeNeuron.

        NML files are XML files containing a single neuron.

        Parameters
        ----------
        f :         IO
                    Readable buffer.
        attrs :     dict | None
                    Arbitrary attributes to include in the TreeNeuron.

        Returns
        -------
        core.TreeNeuron
        """
        if isinstance(f, bytes):
            f = f.decode()

        f = io.StringIO(f)
        root = ET.parse(f).getroot()

        # Copy the attributes dict
        for element in root:
            if element.tag == 'thing':
                nodes = pd.DataFrame.from_records([n.attrib for n in element[0]])
                edges = pd.DataFrame.from_records([n.attrib for n in element[1]])
                edges = edges.astype(self._dtypes['node_id'])

                nodes.rename({'id': 'node_id'}, axis=1, inplace=True)
                nodes = nodes.astype({k: v for k, v in self._dtypes.items() if k in nodes.columns})

        G = nx.Graph()
        G.add_edges_from(edges.values)
        tree = nx.bfs_tree(G, list(G.nodes)[0])
        edges = pd.DataFrame(list(tree.edges), columns=['source', 'target'])
        nodes['parent_id'] = edges.set_index('target').reindex(nodes.node_id.values).source.values
        nodes['parent_id'] = nodes.parent_id.fillna(-1).astype(self._dtypes['node_id'])
        nodes.sort_values('node_id', inplace=True)

        return core.TreeNeuron(
            nodes,
            **(self._make_attributes({'name': 'NML', 'origin': 'nml'}, attrs))
        )


class NMXReader(NMLReader):
    """This is a version of the NML file reader that reads from zipped archives."""
    def __init__(
        self,
        precision: int = DEFAULT_PRECISION,
        attrs: Optional[Dict[str, Any]] = None
    ):
        super().__init__(precision=precision,
                         attrs=attrs)

        # Overwrite some of the settings
        self.read_binary = True
        self.file_ext = '.nmx'
        self.name_fallback = 'NMX'

    def read_buffer(
        self, f: IO, attrs: Optional[Dict[str, Any]] = None
    ) -> 'core.TreeNeuron':
        """Read .nmx buffer into a TreeNeuron.

        NMX files are zip files containing XML-encoded .nml files containing
        data for a single neuron.

        Parameters
        ----------
        f :         IO
                    Readable buffer (must be bytes).
        attrs :     dict | None
                    Arbitrary attributes to include in the TreeNeuron.

        Returns
        -------
        core.TreeNeuron
        """
        if not isinstance(f.read(0), bytes):
            raise ValueError(f'Expected bytes, got "{type(f.read(0))}"')

        zip = ZipFile(f)
        for f in zip.filelist:
            if f.filename.endswith('.nml') and 'skeleton' in f.filename:
                attrs['file'] = f.filename
                attrs['id'] = f.filename.split('/')[0]
                return self.read_nml(zip.read(f), attrs=attrs)
        logger.warning(f'Skipped "{f.filename.split("/")[0]}.nmx": failed to '
                       'import skeleton.')


[docs] def read_nmx(f: Union[str, pd.DataFrame, Iterable], include_subdirs: bool = False, parallel: Union[bool, int] = 'auto', precision: int = 32, limit: Optional[int] = None, **kwargs) -> 'core.NeuronObject': """Read NMX files into Neuron/Lists. NMX is an xml-based format used by pyKNOSSOS. See e.g. `here <https://doi.org/10.5281/zenodo.58985>`_ for a data dump of neurons from Wanner et al. (2016). Parameters ---------- f : str Filename or folder. If folder, will import all ``.nmx`` files. include_subdirs : bool, optional If True and ``f`` is a folder, will also search subdirectories for ``.nmx`` files. parallel : "auto" | bool | int Defaults to ``auto`` which means only use parallel processing if more than 200 files are imported. Spawning and joining processes causes overhead and is considerably slower for imports of small numbers of neurons. Integer will be interpreted as the number of cores (otherwise defaults to ``os.cpu_count() // 2``). precision : int [8, 16, 32, 64] | None Precision for data. Defaults to 32 bit integers/floats. If ``None`` will let pandas infer data types - this typically leads to higher than necessary precision. limit : int, optional If reading from a folder you can use this parameter to read only the first ``limit`` NMX files. Useful if wanting to get a sample from a large library of skeletons. **kwargs Keyword arguments passed to the construction of ``navis.TreeNeuron``. You can use this to e.g. set meta data. Returns ------- navis.NeuronList See Also -------- :func:`navis.read_nml` Read NML file(s). """ reader = NMXReader(precision=precision, attrs=kwargs) # Read neurons neurons = reader.read_any(f, parallel=parallel, limit=limit, include_subdirs=include_subdirs) # Failed reads will produce empty neurons which we need to remove if isinstance(neurons, core.NeuronList): neurons = neurons[neurons.has_nodes] return neurons
[docs] def read_nml(f: Union[str, pd.DataFrame, Iterable], include_subdirs: bool = False, parallel: Union[bool, int] = 'auto', precision: int = 32, limit: Optional[int] = None, **kwargs) -> 'core.NeuronObject': """Read xml-based NML files into Neuron/Lists. Parameters ---------- f : str Filename or folder. If folder, will import all ``.nml`` files. include_subdirs : bool, optional If True and ``f`` is a folder, will also search subdirectories for ``.nml`` files. parallel : "auto" | bool | int Defaults to ``auto`` which means only use parallel processing if more than 200 files are imported. Spawning and joining processes causes overhead and is considerably slower for imports of small numbers of neurons. Integer will be interpreted as the number of cores (otherwise defaults to ``os.cpu_count() // 2``). precision : int [8, 16, 32, 64] | None Precision for data. Defaults to 32 bit integers/floats. If ``None`` will let pandas infer data types - this typically leads to higher than necessary precision. limit : int, optional If reading from a folder you can use this parameter to read only the first ``limit`` NML files. Useful if wanting to get a sample from a large library of skeletons. **kwargs Keyword arguments passed to the construction of ``navis.TreeNeuron``. You can use this to e.g. set meta data. Returns ------- navis.NeuronList See Also -------- :func:`navis.read_nmx` Read NMX files (collections of NML files). """ reader = NMLReader(precision=precision, attrs=kwargs) # Read neurons neurons = reader.read_any(f, parallel=parallel, limit=limit, include_subdirs=include_subdirs) return neurons