Source code for pumpp.feature.mel

#!/usr/bin/env python
"""Mel spectrogram"""

import numpy as np
from librosa.feature import melspectrogram

from .base import FeatureExtractor

__all__ = ['Mel']


[docs]class Mel(FeatureExtractor):
    '''Mel spectra feature extraction

    Attributes
    ----------
    name : str or None
        naming scope for this feature extractor

    sr : number > 0
        Sampling rate of the audio (in Hz)

    hop_length : int > 0
        Number of samples to advance between frames

    n_fft :  int > 0
        Number of samples per frame

    n_mels : int > 0
        Number of Mel frequency bins

    fmax : number > 0
        The maximum frequency bin.
        Defaults to `0.5 * sr`

    log : bool
        If `True`, scale magnitude in decibels.

        Otherwise, use a linear amplitude scale.
    '''
[docs]    def __init__(self, name, sr, hop_length, n_fft, n_mels, fmax=None,
                 log=False, conv=None):
        super(Mel, self).__init__(name, sr, hop_length, conv=conv)

        self.n_fft = n_fft
        self.n_mels = n_mels
        self.fmax = fmax
        self.log = log

        self.register('mag', n_mels, np.float32)

    def transform_audio(self, y):
        '''Compute the Mel spectrogram

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, n_mels)
                The Mel spectrogram
        '''
        mel = np.sqrt(melspectrogram(y=y, sr=self.sr,
                                     n_fft=self.n_fft,
                                     hop_length=self.hop_length,
                                     n_mels=self.n_mels,
                                     fmax=self.fmax)).astype(np.float32)
        if self.log:
            mel = librosa.amplitude_to_db(mel, ref=np.max)

        return {'mag': mel.T[self.idx]}