Source code for pumpp.task.tags

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''Tag task transformers'''

import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer

from librosa import time_to_frames
from librosa.sequence import transition_loop

import jams

from .base import BaseTaskTransformer
from ..exceptions import ParameterError

__all__ = ['DynamicLabelTransformer', 'StaticLabelTransformer']


[docs]class DynamicLabelTransformer(BaseTaskTransformer): '''Time-series label transformer. Attributes ---------- name : str The name of this transformer object namespace : str The JAMS namespace for this task labels : list of str [optional] The list of labels for this task. If not provided, it will attempt to infer the label set from the namespace definition. sr : number > 0 The audio sampling rate hop_length : int > 0 The hop length for annotation frames p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)] Optional self-loop probability(ies), used for Viterbi decoding p_state : None or np.ndarray [shape=(n_labels,)] Optional marginal probability for each class p_init : None or np.ndarray [shape=(n_labels,)] Optional initial probability for each class See Also -------- StaticLabelTransformer '''
[docs] def __init__(self, name, namespace, labels=None, sr=22050, hop_length=512, p_self=None, p_init=None, p_state=None): super(DynamicLabelTransformer, self).__init__(name=name, namespace=namespace, sr=sr, hop_length=hop_length) if labels is None: labels = jams.schema.values(namespace) self.encoder = MultiLabelBinarizer() self.encoder.fit([labels]) self._classes = set(self.encoder.classes_) self.set_transition(p_self) if p_init is not None: if len(p_init) != len(self._classes): raise ParameterError('Invalid p_init.shape={} for vocabulary size={}'.format(p_init.shape, len(self._classes))) self.p_init = p_init if p_state is not None: if len(p_state) != len(self._classes): raise ParameterError('Invalid p_state.shape={} for vocabulary size={}'.format(p_state.shape, len(self._classes))) self.p_state = p_state self.register('tags', [None, len(self._classes)], np.bool)
def set_transition(self, p_self): '''Set the transition matrix according to self-loop probabilities. Parameters ---------- p_self : None, float in (0, 1), or np.ndarray [shape=(n_labels,)] Optional self-loop probability(ies), used for Viterbi decoding ''' if p_self is None: self.transition = None else: self.transition = np.empty((len(self._classes), 2, 2)) if np.isscalar(p_self): self.transition = transition_loop(2, p_self) elif len(p_self) != len(self._classes): raise ParameterError('Invalid p_self.shape={} for vocabulary size={}'.format(p_self.shape, len(self._classes))) else: for i in range(len(self._classes)): self.transition[i] = transition_loop(2, p_self[i]) def empty(self, duration): '''Empty label annotations. Constructs a single observation with an empty value (None). Parameters ---------- duration : number > 0 The duration of the annotation ''' ann = super(DynamicLabelTransformer, self).empty(duration) ann.append(time=0, duration=duration, value=None) return ann def transform_annotation(self, ann, duration): '''Transform an annotation to dynamic label encoding. Parameters ---------- ann : jams.Annotation The annotation to convert duration : number > 0 The duration of the track Returns ------- data : dict data['tags'] : np.ndarray, shape=(n, n_labels) A time-varying binary encoding of the labels ''' intervals, values = ann.to_interval_values() # Suppress all intervals not in the encoder tags = [] for v in values: if v in self._classes: tags.extend(self.encoder.transform([[v]])) else: tags.extend(self.encoder.transform([[]])) tags = np.asarray(tags) target = self.encode_intervals(duration, intervals, tags) return {'tags': target} def inverse(self, encoded, duration=None): '''Inverse transformation''' ann = jams.Annotation(namespace=self.namespace, duration=duration) for start, end, value in self.decode_intervals(encoded, duration=duration, transition=self.transition, p_init=self.p_init, p_state=self.p_state): # Map start:end to frames f_start, f_end = time_to_frames([start, end], sr=self.sr, hop_length=self.hop_length) confidence = np.mean(encoded[f_start:f_end+1, value]) value_dec = self.encoder.inverse_transform(np.atleast_2d(value))[0] for vd in value_dec: ann.append(time=start, duration=end-start, value=vd, confidence=confidence) return ann
[docs]class StaticLabelTransformer(BaseTaskTransformer): '''Static label transformer. Attributes ---------- name : str The name of this transformer object namespace : str The JAMS namespace for this task labels : list of str [optional] The list of labels for this task. If not provided, it will attempt to infer the label set from the namespace definition. See Also -------- DynamicLabelTransformer '''
[docs] def __init__(self, name, namespace, labels=None): super(StaticLabelTransformer, self).__init__(name=name, namespace=namespace, sr=1, hop_length=1) if labels is None: labels = jams.schema.values(namespace) self.encoder = MultiLabelBinarizer() self.encoder.fit([labels]) self._classes = set(self.encoder.classes_) self.register('tags', [len(self._classes)], np.bool)
def transform_annotation(self, ann, duration): '''Transform an annotation to static label encoding. Parameters ---------- ann : jams.Annotation The annotation to convert duration : number > 0 The duration of the track Returns ------- data : dict data['tags'] : np.ndarray, shape=(n_labels,) A static binary encoding of the labels ''' intervals = np.asarray([[0, 1]]) values = list([obs.value for obs in ann]) intervals = np.tile(intervals, [len(values), 1]) # Suppress all intervals not in the encoder tags = [v for v in values if v in self._classes] if len(tags): target = self.encoder.transform([tags]).astype(np.bool).max(axis=0) else: target = np.zeros(len(self._classes), dtype=np.bool) return {'tags': target} def inverse(self, encoded, duration=None): '''Inverse static tag transformation''' ann = jams.Annotation(namespace=self.namespace, duration=duration) if np.isrealobj(encoded): detected = (encoded >= 0.5) else: detected = encoded for vd in self.encoder.inverse_transform(np.atleast_2d(detected))[0]: vid = np.flatnonzero(self.encoder.transform(np.atleast_2d(vd))) ann.append(time=0, duration=duration, value=vd, confidence=encoded[vid]) return ann