% usage tsm_op = tsm_mstft(input_signal, tsm_factor, win_len, num_iterations) % win_len is typically about 20-30ms and num_terations set to 50 normally returns good results but in general the more the better function synthSignal = tsm_mstft(signal, tsm_factor, win_len, num_iterations) % Time-scaling implementation based on the magnitude only reconstruction % algorithm of Griffen and Lim (IEEE Transactions on Acoustics, Speech and Signal Processing, % vol. ASSP-32(2), pp.236-243, April 1984.) % % David Dorran, Audio Research Group, Dublin Institute of Technology % david.dorran@dit.ie % http://eleceng.dit.ie/dorran % http://eleceng.dit.ie/arg % anHopSamps = win_len/4; %use a one quarter step size win = hanning(win_len); dummy_freq = 44100; % frequency variable for specgram (doesn't matter for our purposes) but will set it to 44100 so that specgram function works X = specgram(signal, win_len, dummy_freq,win, win_len - anHopSamps); moduli = abs(X); [numBins, numFrames ] = size(moduli); mapping_indices = round([1:round(numFrames*tsm_factor)]/tsm_factor); %provides mapping for L output samples to N input samples where L/N = tsm_factor syn_moduli = moduli(:,[mapping_indices]); %this line generates the magnitude estimates of the new STFT representationof the time-scaled output [rows, cols] = size(syn_moduli); synthSignal = rand((cols*anHopSamps) + win_len*2, 1); % provides a random signal from which random phases can be obtained during the first run of the for loop disp(strcat('Number of iterations: ', num2str(num_iterations))); for k = 1: num_iterations disp(strcat('iteration number :', num2str(k))); X = specgram(synthSignal, win_len, dummy_freq,win, win_len - anHopSamps); Z = syn_moduli.*exp(i*angle(X(:,1:cols))); % next three lines generate the remainder of the fft values i.e. a fliped version of the first half of the fft obtained from specgram Z = Z(1:(numBins),:); conj_Z = conj(flipud(Z(2:size(Z,1) -1,:))); Z = [Z;conj_Z]; synthSignal = zeros((cols*anHopSamps) + win_len*2, 1); %reinitalize the vector that holds the op curStart = 1; for idx = 1:cols-1 curEnd = curStart + win_len - 1; rIFFT = real(ifft(Z(:,idx))); synthSignal([curStart:curEnd]) = synthSignal([curStart:curEnd]) + rIFFT.*win; curStart = curStart + anHopSamps; end %if the analysis hop wasn't 1/4 the window length then a division of win^2 would be required at this point end