Source code for navis.io.nmx_io

#    This script is part of navis (http://www.github.com/navis-org/navis).
#    Copyright (C) 2018 Philipp Schlegel
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

import io

import networkx as nx
import pandas as pd
import xml.etree.ElementTree as ET

from typing import Union, Dict, Optional, Any, IO, Iterable
from zipfile import ZipFile

from .. import config, core
from . import base


__all__ = ["read_nmx", "read_nml"]

# Set up logging
logger = config.get_logger(__name__)

NODE_COLUMNS = ('node_id', 'label', 'x', 'y', 'z', 'radius', 'parent_id')
DEFAULT_PRECISION = 32
DEFAULT_FMT = "{name}.nmx"


class NMLReader(base.BaseReader):
    def __init__(
        self,
        precision: int = DEFAULT_PRECISION,
        attrs: Optional[Dict[str, Any]] = None
    ):
        super().__init__(fmt='',
                         attrs=attrs,
                         file_ext='.nml',
                         read_binary=False,
                         name_fallback='NML')

        int_, float_ = base.parse_precision(precision)
        self._dtypes = {
            'node_id': int_,
            'parent_id': int_,
            'label': 'category',
            'x': float_,
            'y': float_,
            'z': float_,
            'radius': float_,
        }

    def read_buffer(
        self, f: IO, attrs: Optional[Dict[str, Any]] = None
    ) -> 'core.TreeNeuron':
        """Read .nml buffer into a TreeNeuron.

        NML files are XML-encoded files containing data for a single neuron.

        Parameters
        ----------
        f :         IO
                    Readable buffer (must be bytes).
        attrs :     dict | None
                    Arbitrary attributes to include in the TreeNeuron.

        Returns
        -------
        core.TreeNeuron
        """
        return self.read_nml(f.read(), attrs=attrs)

    def read_nml(
        self, f: IO, attrs: Optional[Dict[str, Any]] = None
    ) -> 'core.TreeNeuron':
        """Read .nml buffer into a TreeNeuron.

        NML files are XML files containing a single neuron.

        Parameters
        ----------
        f :         IO
                    Readable buffer.
        attrs :     dict | None
                    Arbitrary attributes to include in the TreeNeuron.

        Returns
        -------
        core.TreeNeuron
        """
        if isinstance(f, bytes):
            f = f.decode()

        f = io.StringIO(f)
        root = ET.parse(f).getroot()

        # Copy the attributes dict
        for element in root:
            if element.tag == 'thing':
                nodes = pd.DataFrame.from_records([n.attrib for n in element[0]])
                edges = pd.DataFrame.from_records([n.attrib for n in element[1]])
                edges = edges.astype(self._dtypes['node_id'])

                nodes.rename({'id': 'node_id'}, axis=1, inplace=True)
                nodes = nodes.astype({k: v for k, v in self._dtypes.items() if k in nodes.columns})

        G = nx.Graph()
        G.add_edges_from(edges.values)
        tree = nx.bfs_tree(G, list(G.nodes)[0])
        edges = pd.DataFrame(list(tree.edges), columns=['source', 'target'])
        nodes['parent_id'] = edges.set_index('target').reindex(nodes.node_id.values).source.values
        nodes['parent_id'] = nodes.parent_id.fillna(-1).astype(self._dtypes['node_id'])
        nodes.sort_values('node_id', inplace=True)

        return core.TreeNeuron(
            nodes,
            **(self._make_attributes({'name': 'NML', 'origin': 'nml'}, attrs))
        )


class NMXReader(NMLReader):
    """This is a version of the NML file reader that reads from zipped archives."""
    def __init__(
        self,
        precision: int = DEFAULT_PRECISION,
        attrs: Optional[Dict[str, Any]] = None
    ):
        super().__init__(precision=precision,
                         attrs=attrs)

        # Overwrite some of the settings
        self.read_binary = True
        self.file_ext = '.nmx'
        self.name_fallback = 'NMX'

    def read_buffer(
        self, f: IO, attrs: Optional[Dict[str, Any]] = None
    ) -> 'core.TreeNeuron':
        """Read .nmx buffer into a TreeNeuron.

        NMX files are zip files containing XML-encoded .nml files containing
        data for a single neuron.

        Parameters
        ----------
        f :         IO
                    Readable buffer (must be bytes).
        attrs :     dict | None
                    Arbitrary attributes to include in the TreeNeuron.

        Returns
        -------
        core.TreeNeuron
        """
        if not isinstance(f.read(0), bytes):
            raise ValueError(f'Expected bytes, got "{type(f.read(0))}"')

        zip = ZipFile(f)
        for f in zip.filelist:
            if f.filename.endswith('.nml') and 'skeleton' in f.filename:
                attrs['file'] = f.filename
                attrs['id'] = f.filename.split('/')[0]
                return self.read_nml(zip.read(f), attrs=attrs)
        logger.warning(f'Skipped "{f.filename.split("/")[0]}.nmx": failed to '
                       'import skeleton.')



[docs]
def read_nmx(f: Union[str, pd.DataFrame, Iterable],
             include_subdirs: bool = False,
             parallel: Union[bool, int] = 'auto',
             precision: int = 32,
             limit: Optional[int] = None,
             **kwargs) -> 'core.NeuronObject':
    """Read NMX files into Neuron/Lists.

    NMX is an xml-based format used by pyKNOSSOS.
    See e.g. `here <https://doi.org/10.5281/zenodo.58985>`_ for a data dump
    of neurons from Wanner et al. (2016).

    Parameters
    ----------
    f :                 str
                        Filename or folder. If folder, will import all ``.nmx``
                        files.
    include_subdirs :   bool, optional
                        If True and ``f`` is a folder, will also search
                        subdirectories for ``.nmx`` files.
    parallel :          "auto" | bool | int
                        Defaults to ``auto`` which means only use parallel
                        processing if more than 200 files are imported. Spawning
                        and joining processes causes overhead and is
                        considerably slower for imports of small numbers of
                        neurons. Integer will be interpreted as the
                        number of cores (otherwise defaults to
                        ``os.cpu_count() // 2``).
    precision :         int [8, 16, 32, 64] | None
                        Precision for data. Defaults to 32 bit integers/floats.
                        If ``None`` will let pandas infer data types - this
                        typically leads to higher than necessary precision.
    limit :             int, optional
                        If reading from a folder you can use this parameter to
                        read only the first ``limit`` NMX files. Useful if
                        wanting to get a sample from a large library of
                        skeletons.
    **kwargs
                        Keyword arguments passed to the construction of
                        ``navis.TreeNeuron``. You can use this to e.g. set
                        meta data.

    Returns
    -------
    navis.NeuronList

    See Also
    --------
    :func:`navis.read_nml`
                        Read NML file(s).

    """
    reader = NMXReader(precision=precision,
                       attrs=kwargs)
    # Read neurons
    neurons = reader.read_any(f,
                              parallel=parallel,
                              limit=limit,
                              include_subdirs=include_subdirs)

    # Failed reads will produce empty neurons which we need to remove
    if isinstance(neurons, core.NeuronList):
        neurons = neurons[neurons.has_nodes]

    return neurons




[docs]
def read_nml(f: Union[str, pd.DataFrame, Iterable],
             include_subdirs: bool = False,
             parallel: Union[bool, int] = 'auto',
             precision: int = 32,
             limit: Optional[int] = None,
             **kwargs) -> 'core.NeuronObject':
    """Read xml-based NML files into Neuron/Lists.

    Parameters
    ----------
    f :                 str
                        Filename or folder. If folder, will import all ``.nml``
                        files.
    include_subdirs :   bool, optional
                        If True and ``f`` is a folder, will also search
                        subdirectories for ``.nml`` files.
    parallel :          "auto" | bool | int
                        Defaults to ``auto`` which means only use parallel
                        processing if more than 200 files are imported. Spawning
                        and joining processes causes overhead and is
                        considerably slower for imports of small numbers of
                        neurons. Integer will be interpreted as the
                        number of cores (otherwise defaults to
                        ``os.cpu_count() // 2``).
    precision :         int [8, 16, 32, 64] | None
                        Precision for data. Defaults to 32 bit integers/floats.
                        If ``None`` will let pandas infer data types - this
                        typically leads to higher than necessary precision.
    limit :             int, optional
                        If reading from a folder you can use this parameter to
                        read only the first ``limit`` NML files. Useful if
                        wanting to get a sample from a large library of
                        skeletons.
    **kwargs
                        Keyword arguments passed to the construction of
                        ``navis.TreeNeuron``. You can use this to e.g. set
                        meta data.

    Returns
    -------
    navis.NeuronList

    See Also
    --------
    :func:`navis.read_nmx`
                        Read NMX files (collections of NML files).

    """
    reader = NMLReader(precision=precision,
                       attrs=kwargs)
    # Read neurons
    neurons = reader.read_any(f,
                              parallel=parallel,
                              limit=limit,
                              include_subdirs=include_subdirs)

    return neurons