www.gusucode.com > 声音的处理有:LPC,FFT,共振峰,频谱源码程序 > siganlandsystemusingMatlab/SSUM/speech/vq/vqexpofn.m
% function vqexpofn(action, datastruct) if nargin < 1 action='init'; end name = mfilename; figname = [name(1:end-2) '_fig']; f=findobj('Tag',figname); handles = get(f,'UserData'); switch action case 'help' display_help(figname); case 'init' setdefaults; movegui(f,'center'); reset(handles.vectorplot); set(handles.info,'Enable','inactive'); set(handles.show_errors,'Visible','off'); case 'resize' movegui(f,'onscreen'); case 'load' set(handles.show_codebook,'Value',0); handles.audiodata = load_audiodata; if ~isfield(handles.audiodata, 'filenamepath') return; end if (size(handles.audiodata.data,2) > 1) handles.audiodata.data = to_mono(handles.audiodata.data); end handles.audiodata.data = normalize(handles.audiodata.data); if handles.audiodata.Fs > 8000 handles.audiodata.data = ... resample(handles.audiodata.data, 8000, ... handles.audiodata.Fs); handles.audiodata.Fs = 8000; end % Pre-emphasize handles.audiodata.data = filter([1 -.9], 1, handles.audiodata.data); windowsize = str2num(get(handles.windowsize,'String'))* ... handles.audiodata.Fs/1000; windowskip = str2num(get(handles.windowskip,'String'))* ... handles.audiodata.Fs/1000; handles.audiodata = endpoint(handles.audiodata, windowsize, windowskip); handles.audiodata = getStatistics(handles.audiodata, ... windowsize, windowskip); cla(handles.vectorplot); updateVectorPlot(handles); updateInfo(handles); drawnow; handles = createCodebooks(handles); set(handles.show_errors,'Visible','on'); case 'readinput' case 'play' if isfield(handles,'audiodata') audiodata = handles.audiodata; button = handles.play; play_audiodata(audiodata, button); end case 'updatePlot' if isfield(handles, 'audiodata') cla(handles.vectorplot); drawnow; updateVectorPlot(handles); updateInfo(handles); end case 'analyze' if isfield(handles, 'audiodata') set(handles.show_errors,'Visible','off'); cla(handles.vectorplot); windowsize = str2num(get(handles.windowsize,'String'))* ... handles.audiodata.Fs/1000; windowskip = str2num(get(handles.windowskip,'String'))* ... handles.audiodata.Fs/1000; handles.audiodata = getStatistics(handles.audiodata, ... windowsize, windowskip); updateVectorPlot(handles); updateInfo(handles); drawnow; handles = createCodebooks(handles); set(handles.show_errors,'Visible','on'); end case 'show_errors' f = figure('Position',[500 300 700 700],'Units','Normalized'); width = 0.9; h = 0.89; top = 0.065; Caxes1 = axes('position', [0.07 top width h]); hold on; for mnum=1:3 switch mnum case 1 y = handles.audiodata.cepstrum.codebook.distortion; sig1 = 'ko--'; legtext = 'CC'; l = 2.^[0:size(y,2)-1]; case 2 y = handles.audiodata.MFCC.codebook.distortion; sig1 = 'bo--'; legtext = 'MFCC'; l = 2.^[0:size(y,2)-1]; case 3 y = handles.audiodata.LPC.codebook.distortion; sig1 = 'ro--'; legtext = 'LPC'; l = 2.^[0:size(y,2)-1]; end p(mnum) = semilogx(l,y./max(y),sig1); end ylabel('Normalized Total Average Distortion'); xlabel('Codebook Length'); set(Caxes1,'XScale','log','XLim',[l(1) l(end)]); set(Caxes1,'XTick',l,'XTickLabel',l,'XMinorTick','off'); axis([l(1) l(end) 0 1.05]); legend(p,'CC','MFCC','LPC','Location','NorthWest'); tt = title('Distortion Values of Codebook Sizes'); set(tt,'FontSize',14); grid on; set(Caxes1,'XMinorGrid','off'); case 'analyze' if isfield(handles, 'audiodata_ref') & isfield(handles, 'audiodata') windowsize = str2num(get(handles.windowsize,'String'))* ... handles.audiodata.Fs/1000; windowskip = str2num(get(handles.windowskip,'String'))* ... handles.audiodata.Fs/1000; % test handles.audiodata = getStatistics(handles.audiodata, ... windowsize, windowskip); updateInfo(handles,'test'); handles.audiodata_ref = getStatistics(handles.audiodata_ref, ... windowsize, windowskip); updateInfo(handles,'ref'); cla(handles.dtwplot); set(handles.datatype,'Value',1); handles = findPath(handles,'Cepstrum'); updateDTWPlot(handles); drawnow; handles = findPath(handles,'LPC'); end case {'fourier', 'alias', 'feature', 'firexpo', 'iirexpo', 'formantexpo', 'lpcexpo'} if isfield(handles,'audiodata'), audiodata = handles.audiodata; switch action case 'fourier' fourierexpogui(audiodata); case 'alias' aliasexpogui(audiodata); case 'feature' featurexpogui(audiodata); case 'firexpo' firexpogui(audiodata); case 'iirexpo' iirexpogui(audiodata); case 'formantexpo' formantexpogui(audiodata); case 'lpcexpo' lpcexpogui(audiodata); end end case 'print' print_figure(f); case 'close' close_figure(f,figname(1:end-4)); return; end set(f,'UserData',handles); % -------------------------------------------------------------------- %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function updateInfo(ud) box = ud.info; data = ud.audiodata; windowskip = str2num(get(ud.windowskip,'String'))*data.Fs/1000; filename = data.filenamepath; [t,r] = strtok(filename,'/'); while ~isempty(r) [t,r] = strtok(r,'/'); end info = {... ['Name: ' t]; ... ['Fs: ' num2str(data.Fs)]; ... ['Length: ' num2str(size(data.data,1))]; ... ['Data Frames: ' num2str(floor(size(data.data,1)/windowskip))]}; set(box,'String', info); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function updateVectorPlot(ud) axes(ud.vectorplot); hold on; contents = get(ud.datatype,'String'); cbcontents = get(ud.codebooksize,'String'); distortion = 0; colors=['b' 'g' 'r' 'c' 'm' 'y']; switch contents{get(ud.datatype,'Value')} case 'Cepstrum' data = ud.audiodata.cepstrum.cc; codebooksize = log2(str2num(cbcontents{get(ud.codebooksize,'Value')}))+1; if isfield(ud.audiodata.cepstrum,'codebook'); codebook = ud.audiodata.cepstrum.codebook.codewords{codebooksize}; distortion = ud.audiodata.cepstrum.codebook.distortion(codebooksize); end case 'MFCC' data = ud.audiodata.MFCC.cc; codebooksize = log2(str2num(cbcontents{get(ud.codebooksize,'Value')}))+1; if isfield(ud.audiodata.MFCC,'codebook'); codebook = ud.audiodata.MFCC.codebook.codewords{codebooksize}; distortion = ud.audiodata.MFCC.codebook.distortion(codebooksize); end case 'LPC' data = ud.audiodata.LPC.a; codebooksize = log2(str2num(cbcontents{get(ud.codebooksize,'Value')}))+1; if isfield(ud.audiodata.LPC,'codebook'); codebook = ud.audiodata.LPC.codebook.codewords{codebooksize}; distortion = ud.audiodata.LPC.codebook.distortion(codebooksize); end end l = [0:size(data,2)-1]; switch contents{get(ud.datatype,'Value')} case 'LPC' if get(ud.original_vectors,'Value') if get(ud.show_codebook,'Value') for i=1:size(data,1) [ha,f]=freqz(1,data(i,:),512,ud.audiodata.Fs); plot(f,20*log10(abs(ha)./max(abs(ha))),'k'); end else for i=1:size(data,1) [ha,f]=freqz(1,data(i,:),512,ud.audiodata.Fs); plot(f,20*log10(abs(ha)./max(abs(ha))),colors(mod(i,length(colors))+1)); end end end if get(ud.show_codebook,'Value') for i=1:size(codebook,1) [a,b] = parcor_to_lpc(codebook(i,:)); [ha,f]=freqz(1,a,512,ud.audiodata.Fs); p = plot(f,20*log10(abs(ha)./max(abs(ha))), ... colors(mod(i,length(colors))+1),'LineWidth',2); end tt = text(0.01*ud.audiodata.Fs,-47, ... ['Distortion: ' num2str(distortion)], 'BackgroundColor','white'); set(tt,'FontSize',12,'FontWeight','bold'); end ylabel('Magnitude (dB)'); axis([0 ud.audiodata.Fs/2 -50 4.8]); grid on; otherwise if get(ud.original_vectors,'Value') if get(ud.show_codebook,'Value') for i=1:size(data,1) plot(l,data(i,:),'k'); end else for i=1:size(data,1) plot(l,data(i,:),colors(mod(i,length(colors))+1)); end end end if get(ud.show_codebook,'Value') for i=1:size(codebook,1) p = plot(l,codebook(i,:),colors(mod(i,length(colors))+1), ... 'LineWidth',2); end tt = text(0.5*size(data,2),0.9*max(max(data)), ... ['Distortion: ' num2str(distortion)], 'BackgroundColor','white'); set(tt,'FontSize',12,'FontWeight','bold'); end ylabel('Magnitude'); axis([0 size(data,2)-1 min(min(data)) max(max(data))]); grid on; end switch contents{get(ud.datatype,'Value')} case 'Cepstrum' xlabel('Cepstral Coefficient'); case 'MFCC' xlabel('Mel-Frequency Cepstral Coefficient'); case 'LPC' xlabel('Frequency (Hz)'); end function ud = createCodebooks(ud) e = str2num(get(ud.epsilon,'String')); distortion_threshold = str2num(get(ud.error_thresh,'String')); %distortion_threshold = 200; % First for cepstrum h = waitbar(0,'Creating Codebooks'); for mnum=1:3 clear y; switch mnum case 1 y = ud.audiodata.cepstrum.cc; disp('Creating codebooks for cepstrum vectors.'); mssg = 'Cepstral Codebook'; case 2 y = ud.audiodata.MFCC.cc; disp('Creating codebooks for mel-frequency cepstrum vectors.'); mssg = 'MF Cepstral Codebook'; case 3 R = ud.audiodata.LPC.R; %a = ud.audiodata.LPC.a; %b = ud.audiodata.LPC.b; sigma_p2 = ud.audiodata.LPC.sigma_p2; % Normalize autocorrelations for i=1:size(R,1) y(i,:) = R(i,:)./sigma_p2(i); end disp('Creating codebooks for LPC vectors.'); mssg = 'LPC Codebook'; end [numvec, numcc] = size(y); % Step 1: determine centroid vector centroid = zeros(1,numcc); for j=1:numcc centroid(j) = sum(y(:,j))/numvec; end codebook = centroid; % 1 element codebook dist = [0 0]; % Find initial distortion if mnum == 3 % LPC % Convert R-centroid to b for distance b = R_to_b(centroid); for j=1:numvec dist(1) = dist(1) + 2*b*y(j,:)' - 1; end codebook = R_to_parcor(centroid); else for j=1:numvec dist(1) = dist(1) + sum((y(j,:)-centroid).^2); end end dist(1) = dist(1)/numvec; disp(['Initial distortion = ' num2str(dist(1))]); % for each codebook size, using the previous designed codebook MAX_ITERATIONS = 100; k = 1; while (2^(k-1) < 256) num_codewords = 2^(k-1); % Must be 2^i, where i E Z disp(['Determining ' num2str(num_codewords) ' codewords...']); clear tempbook; tempbook = codebook; % Step 2: split centroid vectors into two codewords each for j=1:2^(k-1) codebook(2*j-1,:) = tempbook(j,:).*(1-e); codebook(2*j,:) = tempbook(j,:).*(1+e); end % Step 3: map each vector to these codewords matches = zeros(1,numvec); testortion(1:numvec) = 1.e15; for l=1:2^(k-1) for j=1:numvec if mnum == 3 [a,b] = parcor_to_lpc(codebook(l,:)); d = 2*b*y(j,:)' - 1; else d = sum((y(j,:)-codebook(l,:)).^2); end if d < testortion(j) testortion(j) = d; matches(j) = l; end end end dist(2) = 1.e15; count = 1; while (abs(diff(dist)) > distortion_threshold & count < MAX_ITERATIONS) % Step 4: Now find centroids of these matches and match dist(2) = 0; for j=1:2^k ind{j} = find(matches == j); if ~isempty(ind{j}) centroid = zeros(1,numcc); for m=1:numcc centroid(m) = sum(y(ind{j},m))/numel(ind{j}); end % Find new distortion for set of quantized vectors if mnum == 3 % Convert centroid R to parcor [a,b] = parcor_to_lpc(codebook(j,:)); for m=1:numel(ind{j}) dist(2) = dist(2) + 2*b*y(ind{j}(m),:)' - 1; end codebook(j,:) = R_to_parcor(centroid); else for m=1:numel(ind{j}) dist(2) = dist(2) + ... sum((y(ind{j}(m),:)-codebook(j,:)).^2); end codebook(j,:) = centroid; end end end dist(2) = dist(2)/numvec; % map each cc vector to these codewords matches = zeros(1,numvec); testortion(1:numvec) = 1.e15; for l=1:2^(k-1) for j=1:numvec if mnum == 3 [a,b] = parcor_to_lpc(codebook(l,:)); d = 2*b*y(j,:)' - 1; else d = sum((y(j,:)-codebook(l,:)).^2); end if d < testortion(j) testortion(j) = d; matches(j) = l; end end end dist(1) = sum(testortion)/numvec; count = count + 1; end % Find final distortion dist(2) = 0; for j=1:2^k ind{j} = find(matches == j); if ~isempty(ind{j}) % Find new distortion for set of quantized vectors if mnum == 3 [a,b] = parcor_to_lpc(codebook(j,:)); for m=1:numel(ind{j}) dist(2) = dist(2) + 2*b*y(ind{j}(m),:)' - 1; end else for m=1:numel(ind{j}) dist(2) = dist(2) + sum((y(ind{j}(m),:)-codebook(j,:)).^2); end end end end dist(2) = dist(2)/numvec; if count >= MAX_ITERATIONS disp('Maximum iterations exceeded!'); end % Place codebook into array disp(['Distortion = ' num2str(dist(2))]); contents{k} = num2str(2^(k-1)); if num_codewords > numvec disp(['Codebook size exceeds number of vectors (' num2str(numvec) ')!']); end tempbook = []; % Get rid of unused codewords for j=1:2^k ind{j} = find(matches == j); if ~isempty(ind{j}) tempbook = [tempbook; codebook(j,:)]; end end switch mnum case 1 ud.audiodata.cepstrum.codebook.codewords{k} = tempbook; ud.audiodata.cepstrum.codebook.distortion(k) = dist(2); case 2 ud.audiodata.MFCC.codebook.codewords{k} = tempbook; ud.audiodata.MFCC.codebook.distortion(k) = dist(2); case 3 ud.audiodata.LPC.codebook.codewords{k} = tempbook; ud.audiodata.LPC.codebook.distortion(k) = dist(2); end %tempbook k = k+1; waitbar((mnum-1)/3,h,['Creating ' mssg]); end end set(ud.codebooksize,'String',contents); waitbar(1,h); close(h); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function audiodata = endpoint(audiodata, window_size, window_skip) dBthreshold = 15; zcthreshold_low = 28; zcthreshold = 30; shape = 'Hamming'; y = audiodata.data; win = get_windowdata(window_size, shape); numwin = floor(size(y,1)/window_skip); % Make sure we have an integer number of windows if size(y,1) < ((numwin-1)*window_skip + window_size) diff = numwin*window_size + window_size - window_skip - length(y); y = [y; zeros(diff,1)]; end % Get data % Gather log energy and zerocrossings maxlogen = -1000; for j = 1:numwin, start = (j-1)*window_skip+1; stop = start+window_size-1; frame = y(start:stop); zc(j) = zero_crossings(frame); logen(j) = logenergy(frame,win); if logen(j) > maxlogen maxlogen = logen(j); end end % Normalize logen logen = (logen - maxlogen)'; % Avg number per 10 ms interval zc = zc'*window_skip/(2*window_size); tz = [1:size(zc,1)].*window_skip/audiodata.Fs; logen_i = find(logen > -dBthreshold); if numel(logen_i) > 0 % Add two frames to compensate for window size lower1 = logen_i(1)-1; higher1 = logen_i(end)+1; else lower1 = 1; higher1 = numel(tz); end % Revise with zerocrossings zc_i = find(zc(1:lower1) > zcthreshold); if ~isempty(zc_i) lower2 = zc_i(end); % Now find where it drops below zcthreshold_low zc_i2 = find(zc(1:lower2) < zcthreshold_low); if ~isempty(zc_i2) lower2 = zc_i2(end); end else lower2 = lower1; end zc_i = find(zc(higher1:end) > zcthreshold); if ~isempty(zc_i) higher2 = zc_i(1)+higher1; % Now find where it drops below zcthreshold_low zc_i2 = find(zc(higher2:end) < zcthreshold_low); if ~isempty(zc_i2) higher2 = zc_i2(1)+higher2; end else higher2 = higher1; end if higher2 > numel(tz) higher2 = numel(tz); end if lower2 == 0 lower2 = 1; end startpt = ceil(tz(lower2)*audiodata.Fs); endpt = floor(tz(higher2)*audiodata.Fs); audiodata.data = y(startpt:endpt,1); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function audiodata = getStatistics(audiodata, window_size, window_skip) audiodata.cepstrum.cc = cepstrum(audiodata, window_size, window_skip); audiodata.MFCC.cc = getmfcc(audiodata, window_size, window_skip); audiodata.LPC = getlpc(audiodata, window_size, window_skip); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function Nzeros = zero_crossings(signal) Nzeros = 0; for i=2:length(signal), if (sign(signal(i)) ~= sign(signal(i-1))) Nzeros = Nzeros + 1; end end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function logenergy_value = logenergy(signal,window) logenergy_value = 0; N = length(signal); signal = window.*signal; logenergy_value = 10*log10(1e-10+sum(signal.^2)); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function cepdata = cepstrum(audiodata, window_size, window_skip) y = audiodata.data; shape = 'Hamming'; win = get_windowdata(window_size, shape); numwin = floor(size(y,1)/window_skip); % Make sure we have an integer number of windows if size(y,1) < ((numwin-1)*window_skip + window_size) diff = numwin*window_size + window_size - window_skip - length(y); y = [y; zeros(diff,1)]; end N = window_size*4; % to avoid aliasing for i=1:numwin startN = (i-1)*window_skip+1; s = win.*y(startN:startN+window_size-1); S = fft(s,N); SC = log(abs(S));% + j*angle(S); sc = (ifft(SC)); % take first 16 coefficients except for first one cepdata(i,:) = real(sc(2:17)); end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function mfccvals = getmfcc(audiodata, windowsize, windowskip) mfccvals = mfcc(audiodata.data, audiodata.Fs, windowsize, windowskip)'; mfccvals = mfccvals(:,2:end); %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function lpcdata = getlpc(audiodata, window_size, window_skip) y = audiodata.data; shape = 'Hamming'; win = get_windowdata(window_size, shape); numwin = floor(size(y,1)/window_skip); % Make sure we have an integer number of windows if size(y,1) < ((numwin-1)*window_skip + window_size) diff = numwin*window_size + window_size - window_skip - length(y); y = [y; zeros(diff,1)]; end p = 16; for i=1:numwin startN = (i-1)*window_skip+1; signal = win.*y(startN:startN+window_size-1); R = xcorr(signal); R = R(ceil(end/2):end); R = R(1:p+1); Rt = toeplitz(R(1:end-1)); lpcdata.a(i,:) = [ -1; inv(Rt)*R(2:end)]'; %a = lpc(signal,p)'; % For likelihood-ratio distance calculation lpcdata.R(i,:) = R'; b = xcorr(lpcdata.a(i,:)); b = b(ceil(end/2):end); b = [b(1)/2 b(2:p+1)]; lpcdata.b(i,:) = b; %[temp lpcdata.sigma_p3(i)] = lpc(signal,p); %lpcdata.sigma_p(i) = a(2:end)'*Rt*a(2:end); lpcdata.sigma_p2(i) = 2*b*R(:); end function k = R_to_parcor(R) p = 16; E=zeros(1,p); k=zeros(1,p); alpha=zeros(p,p); E(1)=R(1); ind=1; k(ind)=R(ind+1)/E(ind); alpha(ind,ind)=k(ind); E(ind+1)=(1-k(ind).^2)*E(ind); for ind=2:p k(ind)=(R(ind+1)-sum(alpha(1:ind-1,ind-1)'.*R(ind:-1:2)))/E(ind); alpha(ind,ind)=k(ind); for jnd=1:ind-1 alpha(jnd,ind)=alpha(jnd,ind-1)-k(ind)*alpha(ind-jnd,ind-1); end E(ind+1)=(1-k(ind).^2)*E(ind); end %G=sqrt(E(p+1)); function [a,b] = parcor_to_lpc(k) p = 16; alpha(1:p,1:p)=0; for i=1:p alpha(i,i)=k(i); if (i > 1) for j=1:i-1 alpha(j,i)=alpha(j,i-1)-k(i)*alpha(i-j,i-1); end end end a = [-1; alpha(1:p,p)]; b = xcorr(a); b = b(ceil(end/2):end)'; b = [b(1)/2 b(2:p+1)]; function b = R_to_b(R) p = 16; Rt = toeplitz(R(1:end-1)); a = [ -1; inv(Rt)*R(2:end)']'; b = xcorr(a); b = b(ceil(end/2):end); b = [b(1)/2 b(2:end)];