Source code for pumpp.feature.rhythm

#!/usr/bin/env python
'''Rhythm analysis features'''

import numpy as np
from librosa import fmt
from librosa.feature import tempogram
from librosa import get_duration
from librosa.util import fix_length

from .base import FeatureExtractor

__all__ = ['Tempogram', 'TempoScale']


[docs]class Tempogram(FeatureExtractor): '''Tempogram: the short-time autocorrelation of the accent signal Attributes ---------- name : str The name of this feature extractor sr : number > 0 The sampling rate of audio hop_length : int > 0 The hop length of analysis windows win_length : int > 0 The length of the analysis window (in frames) '''
[docs] def __init__(self, name, sr, hop_length, win_length, conv=None): super(Tempogram, self).__init__(name, sr, hop_length, conv=conv) self.win_length = win_length self.register('tempogram', win_length, np.float32)
def transform_audio(self, y): '''Compute the tempogram Parameters ---------- y : np.ndarray Audio buffer Returns ------- data : dict data['tempogram'] : np.ndarray, shape=(n_frames, win_length) The tempogram ''' n_frames = self.n_frames(get_duration(y=y, sr=self.sr)) tgram = tempogram(y=y, sr=self.sr, hop_length=self.hop_length, win_length=self.win_length).astype(np.float32) tgram = fix_length(tgram, n_frames) return {'tempogram': tgram.T[self.idx]}
[docs]class TempoScale(Tempogram): '''Tempogram scale transform. Mellin scale transform magnitude of the Tempogram. Attributes ---------- name : str Name of this extractor sr : number > 0 Sampling rate of audio hop_length : int > 0 Hop length for analysis frames win_length : int > 0 Number of frames per analysis window n_fmt : int > 0 Number of scale coefficients to retain '''
[docs] def __init__(self, name, sr, hop_length, win_length, n_fmt=128, conv=None): super(TempoScale, self).__init__(name, sr, hop_length, win_length, conv=conv) self.n_fmt = n_fmt self.pop('tempogram') self.register('temposcale', 1 + n_fmt // 2, np.float32)
def transform_audio(self, y): '''Apply the scale transform to the tempogram Parameters ---------- y : np.ndarray The audio buffer Returns ------- data : dict data['temposcale'] : np.ndarray, shape=(n_frames, n_fmt) The scale transform magnitude coefficients ''' data = super(TempoScale, self).transform_audio(y) data['temposcale'] = np.abs(fmt(data.pop('tempogram'), axis=1, n_fmt=self.n_fmt)).astype(np.float32)[self.idx] return data