# This script is part of navis (http://www.github.com/navis-org/navis).
# Copyright (C) 2018 Philipp Schlegel
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
""" Set of functions to interface with the www.insectbraindb.org database of
insect brains and neurons.
"""
import io
import os
import requests
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.colors as mcl
import trimesh as tm
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache
from urllib.parse import urlparse
from typing import Union, Optional, List
from .. import config
from ..core import Volume, TreeNeuron, NeuronList
from ..utils import make_url, make_iterable
logger = config.get_logger(__name__)
baseurl = 'https://www.insectbraindb.org'
class Session:
"""Client to manage Insect Brain DB session.
Parameters
----------
token : str
API token. See `authenticate()`.
created_at : str
Time and date the token was generated. Iso-formatted.
"""
def __init__(self, username=None, password=None, token=None, created_at=None):
self._session = requests.Session()
self.username = username
self.password = password
self.token = token
self.created_at = created_at
@property
def token_expired(self):
"""Check if token is expired."""
if self._token_created_at:
now = dt.datetime.now()
expires_in = dt.timedelta(days=1)
if now - self._token_created_at >= expires_in:
return True
return False
@property
def token(self):
return self._token
@token.setter
def token(self, token):
if token and not token.startswith('Token'):
token = f'Token {token}'
self._token = token
self._session.headers['Authorization'] = token
@property
def token_created_at(self):
return self._token_created_at
@token_created_at.setter
def token_created_at(self, value):
if value:
self._token_created_at = dt.datetime.fromisoformat(value[:-1])
else:
self._token_created_at = None
def fetch_token(self):
"""Fetch fresh token."""
username = self.username
if not username:
username = os.environ.get('INSECT_BRAIN_DB_USER', None)
password = self.password
if not password:
password = os.environ.get('INSECT_BRAIN_DB_PASSWORD', None)
if not username or not password:
msg = """\
You must provide username + password, or an API token. Please see
`navis.interfaces.insectbrian_db.authenticate()` for details.
"""
raise ValueError(msg)
creds = {'username': username, 'password': password}
# Note: do NOT remove the trailing '/' here
url = make_url(baseurl, 'api', 'v2', 'token/')
resp = requests.post(url, data=creds)
resp.raise_for_status()
global session
self.token = resp.json()['token']
self.token_created_at = resp.json()['created']
logger.info('Successfully retrieved 24h Insect Brain DB API token!')
def preflight(self):
"""Check if we're ready to make requests."""
if self.token and self.token_expired:
self.fetch_token()
def get(self, *args, **kwargs):
"""Make GET request."""
self.preflight()
r = self._session.get(*args, **kwargs)
r.raise_for_status()
return r.json()
def post(self, *args, **kwargs):
"""Make POST request."""
self.preflight()
r = self._session.post(*args, **kwargs)
r.raise_for_status()
return r.json()
[docs]def authenticate(username=None, password=None, token=None):
"""Authenticate against Insect Brain DB.
You can either provide username + password, or a token. Each token is only
valid for 24h though. The better alternative is to provide your
username + password as environment variables: `INSECT_BRAIN_DB_USER` and
`INSECT_BRAIN_DB_PASSWORD`, respectively. If you are using these environment
you don't need to bother with `authenticate()` at all.
Parameters
----------
username : str, optional
Your username on Insect Brain DB.
password : str, optional
Your password on Insect Brain DB.
token : str, optional
A token. If provided you don't need to provide username +
password.
"""
if not token and (not username and not password):
raise ValueError('Must provide either username + password, or token '
'(or both).')
if username:
session.username = username
if password:
session.password = password
if token:
session.token = token
else:
session.fetch_token()
[docs]def get_brain_meshes(species: Union[str, int],
combine: bool = False
) -> Optional[List[Volume]]:
"""Fetch brain meshes for given species.
Parameters
----------
species: str | int
Species for which to fetch brain volumes. Strings are
interpreted as names (scientific or common), integers as IDs.
combine : bool, optional
If True, will combine subvolumes (i.e. neuropils) into
a single navis.Volume - else will return list with volumes.
Returns
-------
list of navis.Volume
Examples
--------
>>> import navis
>>> import navis.interfaces.insectbrain_db as ibdb
>>> v = ibdb.get_brain_meshes('Desert Locust', combine_vols=True)
>>> navis.plot3d(v)
"""
obj_url = 'https://s3.eu-central-1.amazonaws.com/ibdb-file-storage/'
# Get info with all available neuropils
sp_info = get_species_info(species)
# Go over all brains
n_brains = len(sp_info.reconstructions) # type: ignore
n_reconstr = len([r for r in sp_info.reconstructions if r.get('viewer_files')]) # type: ignore
logger.info(f'{n_reconstr} reconstruction(s) from {n_brains} brain(s) found')
volumes: List[Volume] = []
for brain in config.tqdm(sp_info.reconstructions,
disable=config.pbar_hide,
leave=config.pbar_leave,
desc='Brains'): # type: ignore
this_v = []
# If no reconstructions, continue
if not brain.get('viewer_files'): # type: ignore
continue
for file in config.tqdm(brain['viewer_files'],
desc='Neuropils',
disable=config.pbar_hide,
leave=config.pbar_leave):
filename = file['p_file']['file_name']
path = file['p_file']['path']
url = make_url(obj_url, path)
resp = requests.get(url)
resp.raise_for_status()
f = io.BytesIO(resp.content)
mesh = tm.load_mesh(f, file_type='obj')
structures = file.get('structures')
if structures:
structure = structures[0].get('structure')
hemisphere = structures[0].get('hemisphere')
if structure:
color = structure.get('color')
if color:
color = mcl.to_rgba(color, alpha=.5)
else:
color = (.85, .85, .85, .5)
name = structure.get('name', filename)
if hemisphere:
name = f'{name} ({hemisphere})'
v = Volume(mesh, name=name, color=color)
this_v.append(v)
# Combine all volumes in this brain
if combine:
this_v = [Volume.combine(this_v)]
this_v[0].color = (.85, .85, .85, .5)
volumes += this_v
return volumes
[docs]@lru_cache()
def get_species_info(species: Union[str, int]) -> pd.Series:
"""Get all info for given species.
Parameters
----------
species : str | int
Species to get info for.
Returns
-------
pandas.Series
Pandas Series with info on given species.
Examples
--------
>>> import navis.interfaces.insectbrain_db as ibdb
>>> info = ibdb.get_species_info()
"""
# First get species ID
if isinstance(species, str):
species = _get_species_id(species)
url = make_url(baseurl, '/archive/species/most_current_permitted/',
species_id=species)
resp = requests.get(url)
resp.raise_for_status()
return pd.Series(resp.json())
[docs]@lru_cache()
def get_available_species() -> pd.DataFrame:
"""Get all info for given species.
Returns
-------
pandas.DataFrame
DataFrame with available species.
Examples
--------
>>> import navis.interfaces.insectbrain_db as ibdb
>>> species = ibdb.get_available_species()
"""
url = make_url(baseurl, 'api', 'v2', 'species')
return _sort_columns(pd.DataFrame.from_records(session.get(url)))
# Note to self: do not cache as the URLs expire eventually
def list_experiment_files(id) -> pd.DataFrame:
"""List files associated with given experiment.
Parameters
----------
id : int
The experiment ID. See e.g. ``list_datasets``.
Returns
-------
pandas.DataFrame
DataFrame with files.
Examples
--------
>>> import navis.interfaces.insectbrain_db as ibdb
>>> files = ibdb.list_experiment_files(61)
"""
url = make_url(baseurl, 'api', 'v2', 'experiment', id, 'file')
return _sort_columns(pd.DataFrame.from_records(session.get(url)))
def list_datasets() -> pd.DataFrame:
"""List publication datasets and associated experiments.
Returns
-------
pandas.DataFrame
DataFrame with available datasets.
Examples
--------
>>> import navis.interfaces.insectbrain_db as ibdb
>>> datasets = ibdb.list_datasets()
"""
url = make_url(baseurl, 'api', 'publications', 'experiments?offset=0&limit=500')
return _sort_columns(pd.DataFrame.from_records(session.get(url)['results']))
def get_skeletons_experiment(id) -> 'NeuronList':
"""Fetch all skeletons for given experiment.
Parameters
----------
id : int
The experiment ID. See e.g. ``list_datasets``.
Returns
-------
NeuronList
Examples
--------
>>> import navis.interfaces.insectbrain_db as ibdb
>>> nl = ibdb.get_skeletons_experiment(61)
"""
# Make sure ID is integer
id = int(id)
# Get files associated with experiment
files = list_experiment_files(id)
# Figure out which files are skeletons
sk_files = files[files.file_name.str.contains('skeleton') | files.file_name.str.endswith('.gz')]
if sk_files.empty:
raise ValueError('Did not find any skeleton files associated with '
f'experiment {id}')
skeletons = []
for f in sk_files.itertuples():
logger.info(f'Downloading {f.file_name}')
# Load the file
r = requests.get(f.url)
r.raise_for_status()
# Files appear to be json-formatted and not compressed
data = r.json()
for i, neuron in enumerate(data['data']):
for sk in neuron['skeletons']:
# Load SWC table
swc = pd.DataFrame(sk['data'],
columns=['node_id', 'skeleton_id',
'x', 'y', 'z', 'radius',
'parent_id'])
# Some cleaning up
swc.drop('skeleton_id', axis=1, inplace=True)
swc['parent_id'] = swc.parent_id.fillna(-1).astype(int)
# Create neuron
tn = TreeNeuron(swc,
id=sk.get('id', 1),
name=neuron.get('name', 'NA'),
annotations=neuron.get('annotations', []),
soma=None)
skeletons.append(tn)
logger.info(f'Done! Found {len(skeletons)} skeletons.')
return NeuronList(skeletons)
def get_meshes_experiment(id) -> 'NeuronList':
"""Fetch volumes associated with given experiment.
Parameters
----------
id : int
The experiment ID. See e.g. ``list_datasets``.
Returns
-------
list
Examples
--------
>>> import navis.interfaces.insectbrain_db as ibdb
>>> vols = ibdb.get_meshes_experiment(61)
"""
# Make sure ID is integer
id = int(id)
# Get files associated with experiment
files = list_experiment_files(id)
# Figure out which files are skeletons
me_files = files[files.file_name.str.endswith('.glb')]
if me_files.empty:
raise ValueError('Did not find any meshes associated with '
f'experiment {id}')
volumes = []
for f in config.tqdm(me_files.itertuples(),
desc='Downloading',
total=me_files.shape[0]):
# Load the file
r = requests.get(f.url)
r.raise_for_status()
name = '.'.join(f.file_name.split('.')[:-1])
ext = f.file_name.split('.')[-1]
file = io.BytesIO(r.content)
scene = tm.load(file, file_type=ext)
for obj in scene.geometry.values():
v = Volume(obj.vertices, obj.faces, name=name)
volumes.append(v)
logger.info(f'Done! Found {len(volumes)} meshes.')
return volumes
[docs]def get_skeletons_species(species, max_threads=4):
"""Fetch all skeletons for given species.
Note that some neurons might have multiple reconstructions. They will
show up with the same ID with different names.
Parameters
----------
species : str | int
Name or ID of a species to fetch skeletons for.
max_threads : int
Number of parallel threads to use for fetching skeletons.
Returns
-------
navis.NeuronList
Examples
--------
>>> import navis.interfaces.insectbrain_db as ibdb
>>> neurons = ibdb.get_skeletons_species('Desert Locust')
"""
if isinstance(species, str):
species = _get_species_id(species)
# First fetch URLs for all neurons
url = make_url(baseurl, 'api', 'v2', 'neuron', 'reconstruction',
neuron__species=species)
meta = session.get(url)
meta = [e for e in meta if e['viewer_files']]
return _get_skeletons(meta, max_threads=max_threads)
[docs]def get_skeletons(x, max_threads=4):
"""Fetch skeletons for given neuron(s).
Parameters
----------
x : str | int | list thereof
Name(s) or ID(s) of neurons you want to fetch.
max_threads : int
Number of parallel threads to use for fetching skeletons.
Returns
-------
navis.NeuronList
Examples
--------
>>> import navis.interfaces.insectbrain_db as ibdb
>>> neurons = ibdb.get_skeletons('TUps2-2')
"""
if isinstance(x, (int, str, np.int32, np.int64)):
neurons = [x]
else:
neurons = x
# First fetch URLs for all neurons
meta = []
for x in neurons:
if isinstance(x, str):
q = search_neurons(name=x, partial_match=False)
if q.empty:
raise ValueError(f'No neuron with name "{x}" found')
ids = q.id.values
else:
ids = x
for i, id in enumerate(make_iterable(ids)):
url = make_url(baseurl, 'api', 'v2', 'neuron', 'reconstruction',
neuron=id)
info = session.get(url)
if (not info
or 'viewer_files' not in info[0]
or not info[0]['viewer_files']):
raise ValueError(f'Neuron {x} ({id}) has no skeleton.')
meta.append(info[0])
return _get_skeletons(meta, max_threads=max_threads)
def _get_skeletons(meta, max_threads=4):
"""Fetch skeleton(s) from info."""
nl = []
with ThreadPoolExecutor(max_workers=max_threads) as executor:
futures = {}
for inf in meta:
id = inf['neuron']
desc = inf.get('description', '')
for file in inf['viewer_files']:
url = file['url']
fn = file['file_name']
f = executor.submit(_fetch_single_neuron,
url,
name=fn,
description=desc,
id=id)
futures[f] = fn
with config.tqdm(desc='Fetching',
total=len(futures),
leave=config.pbar_leave,
disable=len(futures) == 1 or config.pbar_hide) as pbar:
for f in as_completed(futures):
name = futures[f]
pbar.update(1)
try:
nl.append(f.result())
except Exception as exc:
print(f'{name} generated an exception:', exc)
return NeuronList(nl)
def _fetch_single_neuron(url, **kwargs):
"""Load and parse SWC from given URL."""
resp = requests.get(url)
resp.raise_for_status()
s = io.StringIO(resp.content.decode())
swc = pd.read_csv(s,
delimiter=' ', comment='#',
header=None, skipinitialspace=True)
swc.columns = ['node_id', 'label', 'x', 'y', 'z', 'radius', 'parent_id']
swc['radius'] /= 2
return TreeNeuron(swc, units='um', soma=None, **kwargs)
[docs]def search_neurons(name=None, short_name=None, species=None, sex=None,
arborization=None, partial_match=True) -> pd.DataFrame:
"""Search for neurons matching given parameters.
Parameters
----------
name : str, optional
Name of the neuron.
short_name : str, optional
Short name of the neuron.
species : str | int, optional
Name or ID of the species. Can be common or scientific name.
sex : "FEMALE" | "MALE" | "UNKNOWN", optional
Sex of the neuron.
arborization : str, optional
Restrict to neurons having arborizations in given neuropil.
partial_match : bool
Whether to allow partial matches (does not apply for species).
Returns
-------
pandas.DataFrame
Examples
--------
>>> import navis.interfaces.insectbrain_db as ibdb
>>> neurons = ibdb.search_neurons(species='Desert Locust')
"""
# Construct query
options = {}
if species:
if not isinstance(species, int):
species = _get_species_id(species)
options['species'] = species
for key, value in zip(['name', 'short_name', 'sex',
'arborization_region__structure'],
[name, short_name, sex, arborization]):
if not value:
continue
if partial_match:
key += '__icontains'
options[key] = value
url = make_url(baseurl, 'api', 'v2', 'neuron', **options)
resp = requests.get(url)
resp.raise_for_status()
return _sort_columns(pd.DataFrame.from_records(resp.json()))
def _get_species_id(species):
"""Map species name to its ID."""
spec = get_available_species()
if species in spec.scientific_name.values:
id = spec.set_index('scientific_name').loc[species, 'id']
elif species in spec.common_name.values:
id = spec.set_index('common_name').loc[species, 'id']
else:
raise ValueError(f'Unable to find an ID for species "{species}"')
return id
def _sort_columns(df):
"""Sort DataFrame columns such that irrelevant columns are in the middle."""
# Some hard-coded priorities
prio = {}
prio['id'] = 0
prio.update({c: 2 for c in df.columns if 'name' in c})
prio['name'] = 1
prio['description'] = 100
cols = sorted(df.columns, key=lambda x: prio.get(x, 10))
return df[cols]
session = Session()