Source code for pumpp.feature.cqt

#!/usr/bin/env python
'''CQT features'''

import numpy as np
from librosa import cqt, magphase, note_to_hz
from librosa import amplitude_to_db, get_duration
from librosa.util import fix_length

from .base import FeatureExtractor
from ..exceptions import ParameterError

__all__ = ['CQT', 'CQTMag', 'CQTPhaseDiff',
           'HCQT', 'HCQTMag', 'HCQTPhaseDiff']


[docs]class CQT(FeatureExtractor):
    '''Constant-Q transform

    Attributes
    ----------
    name : str
        The name for this feature extractor

    sr : number > 0
        The sampling rate of audio

    hop_length : int > 0
        The number of samples between CQT frames

    n_octaves : int > 0
        The number of octaves in the CQT

    over_sample : int > 0
        The amount of frequency oversampling (bins per semitone)

    fmin : float > 0
        The minimum frequency of the CQT

    log : boolean
        If `True`, scale the magnitude to decibels

        Otherwise, use linear magnitude

    '''
[docs]    def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
                 fmin=None, log=False, conv=None):
        super(CQT, self).__init__(name, sr, hop_length, conv=conv)

        if fmin is None:
            fmin = note_to_hz('C1')

        self.n_octaves = n_octaves
        self.over_sample = over_sample
        self.fmin = fmin
        self.log = log

        n_bins = n_octaves * 12 * over_sample
        self.register('mag', n_bins, np.float32)
        self.register('phase', n_bins, np.float32)

    def transform_audio(self, y):
        '''Compute the CQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                fmin=self.fmin,
                n_bins=(self.n_octaves * self.over_sample * 12),
                bins_per_octave=(self.over_sample * 12))

        C = fix_length(C, n_frames)

        cqtm, phase = magphase(C)
        if self.log:
            cqtm = amplitude_to_db(cqtm, ref=np.max)

        return {'mag': cqtm.T.astype(np.float32)[self.idx],
                'phase': np.angle(phase).T.astype(np.float32)[self.idx]}


[docs]class CQTMag(CQT):
    '''Magnitude CQT

    See Also
    --------
    CQT
    '''

[docs]    def __init__(self, *args, **kwargs):
        super(CQTMag, self).__init__(*args, **kwargs)
        self.pop('phase')

    def transform_audio(self, y):
        '''Compute CQT magnitude.

        Parameters
        ----------
        y : np.ndarray
            the audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, n_bins)
                The CQT magnitude
        '''
        data = super(CQTMag, self).transform_audio(y)
        data.pop('phase')
        return data


[docs]class CQTPhaseDiff(CQT):
    '''CQT with unwrapped phase differentials

    See Also
    --------
    CQT
    '''
[docs]    def __init__(self, *args, **kwargs):
        super(CQTPhaseDiff, self).__init__(*args, **kwargs)
        phase_field = self.pop('phase')

        self.register('dphase',
                      self.n_octaves * 12 * self.over_sample,
                      phase_field.dtype)

    def transform_audio(self, y):
        '''Compute the CQT with unwrapped phase

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, n_bins)
                CQT magnitude

            data['dphase'] : np.ndarray, shape=(n_frames, n_bins)
                Unwrapped phase differential
        '''
        data = super(CQTPhaseDiff, self).transform_audio(y)
        data['dphase'] = self.phase_diff(data.pop('phase'))
        return data


[docs]class HCQT(FeatureExtractor):
    '''Harmonic Constant-Q transform

    Attributes
    ----------
    name : str
        The name for this feature extractor

    sr : number > 0
        The sampling rate of audio

    hop_length : int > 0
        The number of samples between CQT frames

    n_octaves : int > 0
        The number of octaves in the CQT

    over_sample : int > 0
        The amount of frequency oversampling (bins per semitone)

    fmin : float > 0
        The minimum frequency of the CQT

    harmonics : list of int >= 1
        The list of harmonics to compute

    log : boolean
        If `True`, scale the magnitude to decibels

        Otherwise, use linear magnitude

    conv : {'tf', 'th', 'channels_last', 'channels_first', None}
        convolution dimension ordering:

            - 'channels_last' for tensorflow-style 2D convolution
            - 'tf' equivalent to 'channels_last'
            - 'channels_first' for theano-style 2D convolution
            - 'th' equivalent to 'channels_first'

    '''
[docs]    def __init__(self, name, sr, hop_length, n_octaves=8, over_sample=3,
                 fmin=None, harmonics=None, log=False, conv='channels_last'):

        if conv not in ('channels_last', 'tf', 'channels_first', 'th'):
            raise ParameterError('Invalid conv={}'.format(conv))

        super(HCQT, self).__init__(name, sr, hop_length, conv=conv)

        if fmin is None:
            fmin = note_to_hz('C1')

        if harmonics is None:
            harmonics = [1]
        else:
            harmonics = list(harmonics)
            if not all(isinstance(_, int) and _ > 0 for _ in harmonics):
                raise ParameterError('Invalid harmonics={}'.format(harmonics))

        self.n_octaves = n_octaves
        self.over_sample = over_sample
        self.fmin = fmin
        self.log = log
        self.harmonics = harmonics

        n_bins = n_octaves * 12 * over_sample
        self.register('mag', n_bins, np.float32, channels=len(harmonics))
        self.register('phase', n_bins, np.float32, channels=len(harmonics))

    def transform_audio(self, y):
        '''Compute the HCQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins, n_harmonics)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        cqtm, phase = [], []

        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        for h in self.harmonics:
            C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                    fmin=self.fmin * h,
                    n_bins=(self.n_octaves * self.over_sample * 12),
                    bins_per_octave=(self.over_sample * 12))

            C = fix_length(C, n_frames)

            C, P = magphase(C)
            if self.log:
                C = amplitude_to_db(C, ref=np.max)
            cqtm.append(C)
            phase.append(P)

        cqtm = np.asarray(cqtm).astype(np.float32)
        phase = np.angle(np.asarray(phase)).astype(np.float32)

        return {'mag': self._index(cqtm),
                'phase': self._index(phase)}

    def _index(self, value):
        '''Rearrange a tensor according to the convolution mode

        Input is assumed to be in (channels, bins, time) format.
        '''

        if self.conv in ('channels_last', 'tf'):
            return np.transpose(value, (2, 1, 0))

        else:  # self.conv in ('channels_first', 'th')
            return np.transpose(value, (0, 2, 1))


[docs]class HCQTMag(HCQT):
    '''Magnitude HCQT

    See Also
    --------
    HCQT
    '''

[docs]    def __init__(self, *args, **kwargs):
        super(HCQTMag, self).__init__(*args, **kwargs)
        self.pop('phase')

    def transform_audio(self, y):
        '''Compute HCQT magnitude.

        Parameters
        ----------
        y : np.ndarray
            the audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, n_bins)
                The CQT magnitude
        '''
        data = super(HCQTMag, self).transform_audio(y)
        data.pop('phase')
        return data


[docs]class HCQTPhaseDiff(HCQT):
    '''HCQT with unwrapped phase differentials

    See Also
    --------
    HCQT
    '''
[docs]    def __init__(self, *args, **kwargs):
        super(HCQTPhaseDiff, self).__init__(*args, **kwargs)
        phase_field = self.pop('phase')

        self.register('dphase',
                      self.n_octaves * 12 * self.over_sample,
                      phase_field.dtype,
                      channels=len(self.harmonics))

    def transform_audio(self, y):
        '''Compute the HCQT with unwrapped phase

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, n_bins)
                CQT magnitude

            data['dphase'] : np.ndarray, shape=(n_frames, n_bins)
                Unwrapped phase differential
        '''
        data = super(HCQTPhaseDiff, self).transform_audio(y)
        data['dphase'] = self.phase_diff(data.pop('phase'))
        return data