Source code for navis.interfaces.insectbrain_db

#    This script is part of navis (http://www.github.com/navis-org/navis).
#    Copyright (C) 2018 Philipp Schlegel
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

""" Set of functions to interface with the www.insectbraindb.org database of
insect brains and neurons.
"""

import io
import os
import requests

import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.colors as mcl
import trimesh as tm

from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache
from urllib.parse import urlparse

from typing import Union, Optional, List

from .. import config

from ..core import Volume, TreeNeuron, NeuronList
from ..utils import make_url, make_iterable

logger = config.get_logger(__name__)
baseurl = 'https://www.insectbraindb.org'


class Session:
    """Client to manage Insect Brain DB session.

    Parameters
    ----------
    token :         str
                    API token. See `authenticate()`.
    created_at :    str
                    Time and date the token was generated. Iso-formatted.

    """

    def __init__(self, username=None, password=None, token=None, created_at=None):
        self._session = requests.Session()

        self.username = username
        self.password = password

        self.token = token
        self.created_at = created_at

    @property
    def token_expired(self):
        """Check if token is expired."""
        if self._token_created_at:
            now = dt.datetime.now()
            expires_in = dt.timedelta(days=1)
            if now - self._token_created_at >= expires_in:
                return True
        return False

    @property
    def token(self):
        return self._token

    @token.setter
    def token(self, token):
        if token and not token.startswith('Token'):
            token = f'Token {token}'
        self._token = token
        self._session.headers['Authorization'] = token

    @property
    def token_created_at(self):
        return self._token_created_at

    @token_created_at.setter
    def token_created_at(self, value):
        if value:
            self._token_created_at = dt.datetime.fromisoformat(value[:-1])
        else:
            self._token_created_at = None

    def fetch_token(self):
        """Fetch fresh token."""
        username = self.username
        if not username:
            username = os.environ.get('INSECT_BRAIN_DB_USER', None)
        password = self.password
        if not password:
            password = os.environ.get('INSECT_BRAIN_DB_PASSWORD', None)

        if not username or not password:
            msg = """\
            You must provide username + password, or an API token. Please see
            `navis.interfaces.insectbrian_db.authenticate()` for details.
            """
            raise ValueError(msg)

        creds = {'username': username, 'password': password}

        # Note: do NOT remove the trailing '/' here
        url = make_url(baseurl, 'api', 'v2', 'token/')

        resp = requests.post(url, data=creds)
        resp.raise_for_status()

        global session
        self.token = resp.json()['token']
        self.token_created_at = resp.json()['created']

        logger.info('Successfully retrieved 24h Insect Brain DB API token!')

    def preflight(self):
        """Check if we're ready to make requests."""
        if self.token and self.token_expired:
            self.fetch_token()

    def get(self, *args, **kwargs):
        """Make GET request."""
        self.preflight()

        r = self._session.get(*args, **kwargs)
        r.raise_for_status()

        return r.json()

    def post(self, *args, **kwargs):
        """Make POST request."""
        self.preflight()

        r = self._session.post(*args, **kwargs)
        r.raise_for_status()

        return r.json()


[docs] def authenticate(username=None, password=None, token=None): """Authenticate against Insect Brain DB. You can either provide username + password, or a token. Each token is only valid for 24h though. The better alternative is to provide your username + password as environment variables: `INSECT_BRAIN_DB_USER` and `INSECT_BRAIN_DB_PASSWORD`, respectively. If you are using these environment you don't need to bother with `authenticate()` at all. Parameters ---------- username : str, optional Your username on Insect Brain DB. password : str, optional Your password on Insect Brain DB. token : str, optional A token. If provided you don't need to provide username + password. """ if not token and (not username and not password): raise ValueError('Must provide either username + password, or token ' '(or both).') if username: session.username = username if password: session.password = password if token: session.token = token else: session.fetch_token()
[docs] def get_brain_meshes(species: Union[str, int], combine: bool = False, max_threads: int = 4 ) -> Optional[List[Volume]]: """Fetch brain meshes for given species. Parameters ---------- species: str | int Species for which to fetch brain volumes. Strings are interpreted as names (scientific or common), integers as IDs. combine : bool, optional If True, will combine subvolumes (i.e. neuropils) into a single navis.Volume - else will return list with volumes. max_threads : int Number of parallel threads to use for fetching meshes. Returns ------- list of navis.Volume Examples -------- >>> import navis >>> import navis.interfaces.insectbrain_db as ibdb >>> v = ibdb.get_brain_meshes('Desert Locust', combine_vols=True) >>> navis.plot3d(v) """ # Get info with all available neuropils sp_info = get_species_info(species) # Go over all brains n_brains = len(sp_info.reconstructions) # type: ignore n_reconstr = len([r for r in sp_info.reconstructions if r.get('viewer_files')]) # type: ignore logger.info(f'{n_reconstr} reconstruction(s) from {n_brains} brain(s) found') volumes: List[Volume] = [] for brain in config.tqdm(sp_info.reconstructions, disable=config.pbar_hide, leave=config.pbar_leave, desc='Brains'): # type: ignore this_v = [] # If no reconstructions, continue if not brain.get('viewer_files'): # type: ignore continue with ThreadPoolExecutor(max_workers=max_threads) as executor: futures = {} for file in brain['viewer_files']: # If no file UUID, continue if not file['p_file']['uuid']: continue filename = file['p_file']['file_name'] f = executor.submit(_get_neuropil_mesh, file,) futures[f] = filename with config.tqdm(desc='Fetching', total=len(futures), leave=config.pbar_leave, disable=len(futures) == 1 or config.pbar_hide) as pbar: for f in as_completed(futures): name = futures[f] pbar.update(1) try: this_v.append(f.result()) except Exception as exc: print(f'{name} generated an exception:', exc) # Combine all volumes in this brain if combine: this_v = [Volume.combine(this_v)] this_v[0].color = (.85, .85, .85, .5) this_v[0].name = sp_info.scientific_name volumes += this_v return volumes
def _get_neuropil_mesh(file): filename = file['p_file']['file_name'] # Get the AWS URL (with all the required headers) for this object url = _get_download_url(file['p_file']['uuid']) resp = requests.get(url) resp.raise_for_status() f = io.BytesIO(resp.content) mesh = tm.load_mesh(f, file_type='obj') structures = file.get('structures') if structures: structure = structures[0].get('structure') hemisphere = structures[0].get('hemisphere') if structure: color = structure.get('color') if color: color = mcl.to_rgba(color, alpha=.5) else: color = (.85, .85, .85, .5) name = structure.get('name', filename) if hemisphere: name = f'{name} ({hemisphere})' else: name = file['p_file']['file_name'] return Volume(mesh, name=name, color=color)
[docs] @lru_cache() def get_species_info(species: Union[str, int]) -> pd.Series: """Get all info for given species. Parameters ---------- species : str | int Species to get info for. Returns ------- pandas.Series Pandas Series with info on given species. Examples -------- >>> import navis.interfaces.insectbrain_db as ibdb >>> info = ibdb.get_species_info() """ # First get species ID if isinstance(species, str): species = _get_species_id(species) url = make_url(baseurl, '/archive/species/most_current_permitted/', species_id=species) resp = requests.get(url) resp.raise_for_status() return pd.Series(resp.json())
[docs] @lru_cache() def get_available_species() -> pd.DataFrame: """Get all info for given species. Returns ------- pandas.DataFrame DataFrame with available species. Examples -------- >>> import navis.interfaces.insectbrain_db as ibdb >>> species = ibdb.get_available_species() """ url = make_url(baseurl, 'api', 'v2', 'species') return _sort_columns(pd.DataFrame.from_records(session.get(url)))
# Note to self: do not cache as the URLs expire eventually def list_experiment_files(id) -> pd.DataFrame: """List files associated with given experiment. Parameters ---------- id : int The experiment ID. See e.g. ``list_datasets``. Returns ------- pandas.DataFrame DataFrame with files. Examples -------- >>> import navis.interfaces.insectbrain_db as ibdb >>> files = ibdb.list_experiment_files(61) """ url = make_url(baseurl, 'api', 'v2', 'experiment', id, 'file') return _sort_columns(pd.DataFrame.from_records(session.get(url))) def list_datasets() -> pd.DataFrame: """List publication datasets and associated experiments. Returns ------- pandas.DataFrame DataFrame with available datasets. Examples -------- >>> import navis.interfaces.insectbrain_db as ibdb >>> datasets = ibdb.list_datasets() """ url = make_url(baseurl, 'api', 'publications', 'experiments?offset=0&limit=500') return _sort_columns(pd.DataFrame.from_records(session.get(url)['results'])) def get_skeletons_experiment(id) -> 'NeuronList': """Fetch all skeletons for given experiment. Parameters ---------- id : int The experiment ID. See e.g. ``list_datasets``. Returns ------- NeuronList Examples -------- >>> import navis.interfaces.insectbrain_db as ibdb >>> nl = ibdb.get_skeletons_experiment(61) """ # Make sure ID is integer id = int(id) # Get files associated with experiment files = list_experiment_files(id) # Figure out which files are skeletons sk_files = files[files.file_name.str.contains('skeleton') | files.file_name.str.endswith('.gz')] if sk_files.empty: raise ValueError('Did not find any skeleton files associated with ' f'experiment {id}') skeletons = [] for f in sk_files.itertuples(): logger.info(f'Downloading {f.file_name}') # Load the file r = requests.get(f.url) r.raise_for_status() # Files appear to be json-formatted and not compressed data = r.json() for i, neuron in enumerate(data['data']): for sk in neuron['skeletons']: # Load SWC table swc = pd.DataFrame(sk['data'], columns=['node_id', 'skeleton_id', 'x', 'y', 'z', 'radius', 'parent_id']) # Some cleaning up swc.drop('skeleton_id', axis=1, inplace=True) swc['parent_id'] = swc.parent_id.fillna(-1).astype(int) # Create neuron tn = TreeNeuron(swc, id=sk.get('id', 1), name=neuron.get('name', 'NA'), annotations=neuron.get('annotations', []), soma=None) skeletons.append(tn) logger.info(f'Done! Found {len(skeletons)} skeletons.') return NeuronList(skeletons) def get_meshes_experiment(id) -> 'NeuronList': """Fetch volumes associated with given experiment. Parameters ---------- id : int The experiment ID. See e.g. ``list_datasets``. Returns ------- list Examples -------- >>> import navis.interfaces.insectbrain_db as ibdb >>> vols = ibdb.get_meshes_experiment(61) """ # Make sure ID is integer id = int(id) # Get files associated with experiment files = list_experiment_files(id) # Figure out which files are skeletons me_files = files[files.file_name.str.endswith('.glb')] if me_files.empty: raise ValueError('Did not find any meshes associated with ' f'experiment {id}') volumes = [] for f in config.tqdm(me_files.itertuples(), desc='Downloading', total=me_files.shape[0]): # Load the file r = requests.get(f.url) r.raise_for_status() name = '.'.join(f.file_name.split('.')[:-1]) ext = f.file_name.split('.')[-1] file = io.BytesIO(r.content) scene = tm.load(file, file_type=ext) for obj in scene.geometry.values(): v = Volume(obj.vertices, obj.faces, name=name) volumes.append(v) logger.info(f'Done! Found {len(volumes)} meshes.') return volumes
[docs] def get_skeletons_species(species, max_threads=4): """Fetch all skeletons for given species. Note that some neurons might have multiple reconstructions. They will show up with the same ID with different names. Parameters ---------- species : str | int Name or ID of a species to fetch skeletons for. max_threads : int Number of parallel threads to use for fetching skeletons. Returns ------- navis.NeuronList Examples -------- >>> import navis.interfaces.insectbrain_db as ibdb >>> neurons = ibdb.get_skeletons_species('Desert Locust') """ if isinstance(species, str): species = _get_species_id(species) # First fetch URLs for all neurons url = make_url(baseurl, 'api', 'v2', 'neuron', 'reconstruction', neuron__species=species) meta = session.get(url) meta = [e for e in meta if e['viewer_files']] return _get_skeletons(meta, max_threads=max_threads)
[docs] def get_skeletons(x, max_threads=4): """Fetch skeletons for given neuron(s). Parameters ---------- x : str | int | list thereof Name(s) or ID(s) of neurons you want to fetch. max_threads : int Number of parallel threads to use for fetching skeletons. Returns ------- navis.NeuronList Examples -------- >>> import navis.interfaces.insectbrain_db as ibdb >>> neurons = ibdb.get_skeletons('TUps2-2') """ if isinstance(x, (int, str, np.int32, np.int64)): neurons = [x] else: neurons = x # First fetch URLs for all neurons meta = [] for x in neurons: if isinstance(x, str): q = search_neurons(name=x, partial_match=False) if q.empty: raise ValueError(f'No neuron with name "{x}" found') ids = q.id.values else: ids = x for i, id in enumerate(make_iterable(ids)): url = make_url(baseurl, 'api', 'v2', 'neuron', 'reconstruction', neuron=id) info = session.get(url) if (not info or 'viewer_files' not in info[0] or not info[0]['viewer_files']): raise ValueError(f'Neuron {x} ({id}) has no skeleton.') meta.append(info[0]) return _get_skeletons(meta, max_threads=max_threads)
def _get_skeletons(meta, max_threads=4): """Fetch skeleton(s) from info.""" nl = [] with ThreadPoolExecutor(max_workers=max_threads) as executor: futures = {} for inf in meta: id = inf['neuron'] desc = inf.get('description', '') for file in inf['viewer_files']: url = file['url'] fn = file['file_name'] f = executor.submit(_fetch_single_neuron, url, name=fn, description=desc, id=id) futures[f] = fn with config.tqdm(desc='Fetching', total=len(futures), leave=config.pbar_leave, disable=len(futures) == 1 or config.pbar_hide) as pbar: for f in as_completed(futures): name = futures[f] pbar.update(1) try: nl.append(f.result()) except Exception as exc: print(f'{name} generated an exception:', exc) return NeuronList(nl) def _fetch_single_neuron(url, **kwargs): """Load and parse SWC from given URL.""" resp = requests.get(url) resp.raise_for_status() s = io.StringIO(resp.content.decode()) swc = pd.read_csv(s, delimiter=' ', comment='#', header=None, skipinitialspace=True) swc.columns = ['node_id', 'label', 'x', 'y', 'z', 'radius', 'parent_id'] swc['radius'] /= 2 return TreeNeuron(swc, units='um', soma=None, **kwargs)
[docs] def search_neurons(name=None, short_name=None, species=None, sex=None, arborization=None, partial_match=True) -> pd.DataFrame: """Search for neurons matching given parameters. Parameters ---------- name : str, optional Name of the neuron. short_name : str, optional Short name of the neuron. species : str | int, optional Name or ID of the species. Can be common or scientific name. sex : "FEMALE" | "MALE" | "UNKNOWN", optional Sex of the neuron. arborization : str, optional Restrict to neurons having arborizations in given neuropil. partial_match : bool Whether to allow partial matches (does not apply for species). Returns ------- pandas.DataFrame Examples -------- >>> import navis.interfaces.insectbrain_db as ibdb >>> neurons = ibdb.search_neurons(species='Desert Locust') """ # Construct query options = {} if species: if not isinstance(species, int): species = _get_species_id(species) options['species'] = species for key, value in zip(['name', 'short_name', 'sex', 'arborization_region__structure'], [name, short_name, sex, arborization]): if not value: continue if partial_match: key += '__icontains' options[key] = value url = make_url(baseurl, 'api', 'v2', 'neuron', **options) resp = requests.get(url) resp.raise_for_status() return _sort_columns(pd.DataFrame.from_records(resp.json()))
def _get_species_id(species): """Map species name to its ID.""" spec = get_available_species() if species in spec.scientific_name.values: id = spec.set_index('scientific_name').loc[species, 'id'] elif species in spec.common_name.values: id = spec.set_index('common_name').loc[species, 'id'] else: raise ValueError(f'Unable to find an ID for species "{species}"') return id def _sort_columns(df): """Sort DataFrame columns such that irrelevant columns are in the middle.""" # Some hard-coded priorities prio = {} prio['id'] = 0 prio.update({c: 2 for c in df.columns if 'name' in c}) prio['name'] = 1 prio['description'] = 100 cols = sorted(df.columns, key=lambda x: prio.get(x, 10)) return df[cols] def _get_download_url(uuid): """Get AWS download URL for given object.""" url = f"https://www.insectbraindb.org/filestore/download_url/?uuid={uuid}" r = requests.get(url) r.raise_for_status() return r.json()['url'] session = Session()