Source code for pumpp.feature.base

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''Feature extraction base class'''

import numpy as np
from librosa import resample, time_to_frames

from ..base import Scope
from ..exceptions import ParameterError


[docs]class FeatureExtractor(Scope):
    '''The base feature extractor class.

    Attributes
    ----------
    name : str
        The name for this feature extractor

    sr : number > 0
        The sampling rate of audio for analysis

    hop_length : int > 0
        The hop length between analysis frames

    conv : {'tf', 'th', 'channels_last', 'channels_first', None}
        convolution dimension ordering:

            - 'channels_last' for tensorflow-style 2D convolution
            - 'tf' equivalent to 'channels_last'
            - 'channels_first' for theano-style 2D convolution
            - 'th' equivalent to 'channels_first'
            - None for 1D or non-convolutional representations
    '''
[docs]    def __init__(self, name, sr, hop_length, conv=None):

        super(FeatureExtractor, self).__init__(name)

        if conv not in ('tf', 'th', 'channels_last', 'channels_first', None):
            raise ParameterError('conv="{}", must be one of '
                                 '("channels_last", "tf", '
                                 '"channels_first", "th", None)'.format(conv))

        self.sr = sr
        self.hop_length = hop_length
        self.conv = conv

    def register(self, key, dimension, dtype, channels=1):

        shape = [None, dimension]

        if self.conv in ('channels_last', 'tf'):
            shape.append(channels)

        elif self.conv in ('channels_first', 'th'):
            shape.insert(0, channels)

        super(FeatureExtractor, self).register(key, shape, dtype)

    @property
    def idx(self):
        if self.conv is None:
            return Ellipsis

        elif self.conv in ('channels_last', 'tf'):
            return (slice(None), slice(None), np.newaxis)

        elif self.conv in ('channels_first', 'th'):
            return (np.newaxis, slice(None), slice(None))

    def transform(self, y, sr):
        '''Transform an audio signal

        Parameters
        ----------
        y : np.ndarray
            The audio signal

        sr : number > 0
            The native sampling rate of y

        Returns
        -------
        dict
            Data dictionary containing features extracted from y

        See Also
        --------
        transform_audio
        '''
        if sr != self.sr:
            y = resample(y, sr, self.sr)

        return self.merge([self.transform_audio(y)])

    def transform_audio(self, y):
        raise NotImplementedError

    def phase_diff(self, phase):
        '''Compute the phase differential along a given axis

        Parameters
        ----------
        phase : np.ndarray
            Input phase (in radians)

        Returns
        -------
        dphase : np.ndarray like `phase`
            The phase differential.
        '''

        if self.conv is None:
            axis = 0
        elif self.conv in ('channels_last', 'tf'):
            axis = 0
        elif self.conv in ('channels_first', 'th'):
            axis = 1

        # Compute the phase differential
        dphase = np.empty(phase.shape, dtype=phase.dtype)
        zero_idx = [slice(None)] * phase.ndim
        zero_idx[axis] = slice(1)
        else_idx = [slice(None)] * phase.ndim
        else_idx[axis] = slice(1, None)
        dphase[zero_idx] = phase[zero_idx]
        dphase[else_idx] = np.diff(np.unwrap(phase, axis=axis), axis=axis)
        return dphase

    def layers(self):
        '''Construct Keras input layers for the given transformer

        Returns
        -------
        layers : {field: keras.layers.Input}
            A dictionary of keras input layers, keyed by the corresponding
            field keys.
        '''
        from keras.layers import Input

        L = dict()
        for key in self.fields:
            L[key] = Input(name=key,
                           shape=self.fields[key].shape,
                           dtype=self.fields[key].dtype)

        return L

    def n_frames(self, duration):
        '''Get the number of frames for a given duration

        Parameters
        ----------
        duration : number >= 0
            The duration, in seconds

        Returns
        -------
        n_frames : int >= 0
            The number of frames at this extractor's sampling rate and
            hop length
        '''

        return int(time_to_frames(duration, sr=self.sr,
                                  hop_length=self.hop_length))