#!/usr/bin/env python
'''Rhythm analysis features'''
import numpy as np
from librosa import fmt
from librosa.feature import tempogram
from librosa import get_duration
from librosa.util import fix_length
from .base import FeatureExtractor
__all__ = ['Tempogram', 'TempoScale']
[docs]class Tempogram(FeatureExtractor):
'''Tempogram: the short-time autocorrelation of the accent signal
Attributes
----------
name : str
The name of this feature extractor
sr : number > 0
The sampling rate of audio
hop_length : int > 0
The hop length of analysis windows
win_length : int > 0
The length of the analysis window (in frames)
'''
[docs] def __init__(self, name, sr, hop_length, win_length, conv=None):
super(Tempogram, self).__init__(name, sr, hop_length, conv=conv)
self.win_length = win_length
self.register('tempogram', win_length, np.float32)
def transform_audio(self, y):
'''Compute the tempogram
Parameters
----------
y : np.ndarray
Audio buffer
Returns
-------
data : dict
data['tempogram'] : np.ndarray, shape=(n_frames, win_length)
The tempogram
'''
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
tgram = tempogram(y=y, sr=self.sr,
hop_length=self.hop_length,
win_length=self.win_length).astype(np.float32)
tgram = fix_length(tgram, n_frames)
return {'tempogram': tgram.T[self.idx]}
[docs]class TempoScale(Tempogram):
'''Tempogram scale transform.
Mellin scale transform magnitude of the Tempogram.
Attributes
----------
name : str
Name of this extractor
sr : number > 0
Sampling rate of audio
hop_length : int > 0
Hop length for analysis frames
win_length : int > 0
Number of frames per analysis window
n_fmt : int > 0
Number of scale coefficients to retain
'''
[docs] def __init__(self, name, sr, hop_length, win_length, n_fmt=128, conv=None):
super(TempoScale, self).__init__(name, sr, hop_length, win_length,
conv=conv)
self.n_fmt = n_fmt
self.pop('tempogram')
self.register('temposcale', 1 + n_fmt // 2, np.float32)
def transform_audio(self, y):
'''Apply the scale transform to the tempogram
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['temposcale'] : np.ndarray, shape=(n_frames, n_fmt)
The scale transform magnitude coefficients
'''
data = super(TempoScale, self).transform_audio(y)
data['temposcale'] = np.abs(fmt(data.pop('tempogram'),
axis=1,
n_fmt=self.n_fmt)).astype(np.float32)[self.idx]
return data