www.gusucode.com > 声音的处理有:LPC,FFT,共振峰,频谱源码程序 > siganlandsystemusingMatlab/SSUM/speech/endpoint/endpointexpofn.m
function endpointexpofn(action,datastruct) if nargin < 1 action='init'; end name = mfilename; figname = [name(1:end-2) '_fig']; f=findobj('Tag',figname); handles = get(f,'UserData'); switch action case 'help' display_help(figname); case 'init' setdefaults; reset(handles.specplot); reset(handles.rmsplot); reset(handles.zcplot); reset(handles.timeplot); set(handles.rmsplot,'XTickLabel',''); set(handles.timeplot,'XTickLabel',''); case 'loadsound' handles.audiodata = load_audiodata; if ~isfield(handles.audiodata, 'filenamepath') return; end if (size(handles.audiodata.data,2) > 1) handles.audiodata.data = to_mono(handles.audiodata.data); end % Pre-emphasize handles.audiodata.data = filter([1 -.9], 1, handles.audiodata.data); if (handles.audiodata.Fs > 8000) handles.audiodata.data = resample(handles.audiodata.data, ... 8000, handles.audiodata.Fs); handles.audiodata.Fs = 8000; end handles.stats = getStatistics(handles); handles = makeDataPlot(handles,'time'); handles = makeDataPlot(handles,'spectrum'); handles = makeDataPlot(handles,'rms'); handles = makeDataPlot(handles,'zc'); handles = findEndpoints(handles); place_header(f, handles); case 'readinput' handles.audiodata = datastruct; clear datastruct; handles.audiodata.filenamepath = ''; handles.audiodata.nBits = 16; handles.audiodata; handles.stats = getStatistics(handles); handles = makeDataPlot(handles,'time'); handles = makeDataPlot(handles,'spectrum'); handles = makeDataPlot(handles,'rms'); handles = makeDataPlot(handles,'zc'); handles = findEndpoints(handles); place_header(f, handles); case 'playsound' if isfield(handles, 'audiodata') audiodata = handles.audiodata; if (max(abs(audiodata.data)) > 1.0) audiodata.data = normalize(audiodata.data); end play_audiodata(audiodata, handles.play); end case 'play_endpoint' if isfield(handles, 'audiodata') audiodata = handles.audiodata; samples = floor(handles.endpoints*handles.audiodata.Fs+1); audiodata.data = audiodata.data(samples(1):samples(2),1); if (max(abs(audiodata.data)) > 1.0) audiodata.data = normalize(audiodata.data); end play_audiodata(audiodata, handles.play); end case 'updateanalysis' handles.stats = getStatistics(handles); handles = makeDataPlot(handles,'time'); handles = makeDataPlot(handles,'rms'); handles = makeDataPlot(handles,'zc'); handles = findEndpoints(handles); case 'updateendpt' handles = makeDataPlot(handles,'time'); handles = makeDataPlot(handles,'rms'); handles = makeDataPlot(handles,'zc'); handles = findEndpoints(handles); case 'updatelpc' handles = makeDataPlot(handles,'spectrum'); case {'fftsize','window'} if isfield(handles, 'audiodata') contents = get(handles.fftsize,'String'); fftsize = contents{get(handles.fftsize,'Value')}; if strncmp(fftsize,'Entire',6) fftsize = length(handles.audiodata.data); else fftsize = str2num(fftsize); end if (fftsize > length(handles.audiodata.data)) h = errordlg('FFTSize larger than data size!'); uiwait(h); set(handles.fftsize,'Value',1); fftsize = str2num(contents{get(handles.fftsize,'Value')}); end wintime = fftsize/handles.audiodata.Fs; handles.right = handles.left + wintime; if handles.right > handles.maxtime handles.right = handles.maxtime; handles.left = handles.maxtime - wintime; end set(handles.l,'XData',[handles.left handles.left]); set(handles.r,'XData',[handles.right handles.right]); handles = makeDataPlot(handles,'time'); handles = makeDataPlot(handles,'spectrum'); handles = findEndpoints(handles); end case 'normalize' if isfield(handles,'audiodata') handles.audiodata.data = normalize(handles.audiodata.data); handles = makeDataPlot(handles,'time'); % handles = makeDataPlot(handles,'spectrum'); % handles = makeDataPlot(handles,'rms'); % handles = makeDataPlot(handles,'zc'); handles = findEndpoints(handles); end % The following taken in large part from MAD. case 'left' set(gcf,'WindowButtonMotionFcn','endpointexpofn moveleft'); set(gcf,'WindowButtonUpFcn','endpointexpofn release'); case 'right' set(gcf,'WindowButtonMotionFcn','endpointexpofn moveright'); set(gcf,'WindowButtonUpFcn','endpointexpofn release'); case 'moveleft' handles = moveleft(handles); updateSpecPlot(handles); case 'moveright' handles = moveright(handles); updateSpecPlot(handles); case 'release' set(gcf,'WindowButtonMotionFcn',''); set(gcf,'WindowButtonUpFcn',''); refresh(f); case {'sonogram', 'alias'} if isfield(handles,'audiodata'), audiodata = handles.audiodata; switch action case 'sonogram' sonoexpogui(audiodata); case 'alias' aliasexpogui(audiodata); end end case 'print' print_figure(f); case 'close' close_figure(f,figname(1:end-4)); return; end set(f,'UserData',handles); % -------------------------------------------------------------------- function handles = makeTimePlot(handles); contents = get(handles.fftsize,'String'); fftsize = contents{get(handles.fftsize,'Value')}; if strncmp(fftsize,'Entire',6) fftsize = length(handles.audiodata.data); else fftsize = str2num(fftsize); end axes(handles.timeplot); cla; handles.t = [0:1/handles.audiodata.Fs:(length(handles.audiodata.data)-1)/... handles.audiodata.Fs]; maxtime = length(handles.t)/handles.audiodata.Fs; handles.maxtime = maxtime; handles.winleft = 0; handles.winright = maxtime; if max(handles.audiodata.data) > 1 handles.audiodata.data = normalize(handles.audiodata.data); end plot(handles.t,handles.audiodata.data) set(handles.timeplot,'NextPlot','replacechildren','Drawmode','fast',... 'XLim',[0 maxtime], 'YLim', [-1.1 1.1],'YTickLabel',''); grid on; hold on; handles.left = 0.1*maxtime; handles.delta = fftsize/handles.audiodata.Fs; handles.right = handles.left + handles.delta; handles.l=line([handles.left handles.left],get(gca,'YLim'),'Color','r',... 'ButtonDownFcn','endpointexpofn left','LineWidth', 2,'EraseMode','xor'); handles.r=line([handles.right handles.right],get(gca,'YLim'),'Color','r',... 'ButtonDownFcn','endpointexpofn right','LineWidth', 2,'EraseMode','xor'); handles.step=0.2*maxtime; function ud = makeDataPlot(ud,type) t = linspace(0,size(ud.audiodata.data,1)/ud.audiodata.Fs,... size(ud.stats.zc,1)); signal = ud.audiodata.data; switch type case 'time' cla(ud.timeplot); ud = makeTimePlot(ud); set(ud.timeplot,'XTickLabel',''); grid on; case 'spectrum' cla(ud.specplot); ud = makeSpecPlot(ud); grid on; case 'rms' cla(ud.rmsplot); axes(ud.rmsplot); plot(t,ud.stats.logen); set(ud.rmsplot,'XTickLabel',''); ylabel('Log RMS'); grid on; axis([0 max(t) min(ud.stats.logen)*1.1 5]); case 'zc' cla(ud.zcplot); axes(ud.zcplot); plot(t,ud.stats.zc); ylabel('Mean Zero Crossings'); xlabel('Time (s)'); grid on; axis([0 max(t) 0 max(ud.stats.zc)*1.1]); end function handles = makeSpecPlot(handles); contents = get(handles.fftsize,'String'); fftsize = contents{get(handles.fftsize,'Value')}; if strncmp(fftsize,'Entire',6) fftsize = length(handles.audiodata.data); else fftsize = str2num(fftsize); end contents = get(handles.Window,'String'); shape = contents{get(handles.Window,'Value')}; axes(handles.specplot); % Get x-position of samples startpos = floor(handles.left*handles.audiodata.Fs+1); signal = handles.audiodata.data(startpos:startpos+fftsize-1).*... get_windowdata(fftsize, shape); freqs = linspace(-handles.audiodata.Fs/2,handles.audiodata.Fs/2,fftsize); % find and plot formant p = str2num(get(handles.lpcorder,'String')); R = xcorr(signal,'coeff'); R = R(fftsize:end); R = R(1:p+1); Rt = toeplitz(R(1:end-1)); alpha = inv(Rt)*R(2:end); H = freqz(1,[1 -alpha'],fftsize/2); Hmag = 20*log10(1e-10+abs(H)); maxXmag = max(Hmag); Hmag = Hmag - maxXmag; axes(handles.specplot); handles.formant = plot(freqs',[flipud(Hmag); Hmag]); set(handles.formant,'LineStyle','--','LineWidth',3,... 'Color','g','EraseMode','background'); if fftsize < 2048 fftsize = 2048; % Force zeropadding end fftsignal = fft(signal,fftsize); f = fftshift(fftsignal); f = 20*log10(1e-10+abs(f)); fmax = max(f); f = f - fmax; freqs = linspace(-handles.audiodata.Fs/2,handles.audiodata.Fs/2,fftsize); axes(handles.specplot); hold on; handles.sp = plot(freqs,f); set(handles.specplot,'XLim',[0 handles.audiodata.Fs/2], ... 'YLim',[-100 5]); axis xy; grid; ylabel('Log Magnitude (dB)'); set(handles.sp, 'EraseMode','background','LineWidth',1); xlabel('Frequency (Hz)'); % -------------------------------------------------------------------- function updateTimePlot(handles) axes(handles.timeplot) plot(handles.t,handles.audiodata) xlabel('time (s)'); grid on; function updateSpecPlot(handles) contents = get(handles.fftsize,'String'); fftsize = contents{get(handles.fftsize,'Value')}; if strncmp(fftsize,'Entire',6) fftsize = length(handles.audiodata.data); else fftsize = str2num(fftsize); end contents = get(handles.Window,'String'); shape = contents{get(handles.Window,'Value')}; % Get x-position of samples startpos = floor(handles.left*handles.audiodata.Fs+1); signal = handles.audiodata.data(startpos:startpos+fftsize-1).*... get_windowdata(fftsize, shape); freqs = linspace(-handles.audiodata.Fs/2,handles.audiodata.Fs/2,fftsize); % find and plot formant p = str2num(get(handles.lpcorder,'String')); R = xcorr(signal,'coeff'); R = R(fftsize:end); R = R(1:p+1); Rt = toeplitz(R(1:end-1)); alpha = inv(Rt)*R(2:end); H = freqz(1,[1 -alpha'],fftsize/2); Hmag = 20*log10(1e-10+abs(H)); maxXmag = max(Hmag); Hmag = Hmag - maxXmag; set(handles.formant,'YData', [flipud(Hmag); Hmag]); if fftsize < 2048 fftsize = 2048; % Force zeropadding end fftsignal = fft(signal,fftsize); f = fftshift(fftsignal); f = 20*log10(1e-10+abs(f)); fmax = max(f); f = f - fmax; set(handles.sp, 'YData', f); % -------------------------------------------------------------------- function ud = moveleft(ud) contents = get(ud.fftsize,'String'); fftsize = contents{get(ud.fftsize,'Value')}; if strncmp(fftsize,'Entire',6) fftsize = length(ud.audiodata.data); else fftsize = str2num(fftsize); end currentPoint=get(gca,'CurrentPoint'); ud.left=currentPoint(1); wintime = fftsize/ud.audiodata.Fs; if ud.left <= ud.winleft ud.left=0; end if ud.left >= (ud.winright-wintime) ud.left=ud.winright-wintime; end set(ud.l,'XData',[ud.left ud.left]); set(ud.r,'XData',[ud.left+wintime ud.left+wintime]); function ud = moveright(ud) contents = get(ud.fftsize,'String'); fftsize = contents{get(ud.fftsize,'Value')}; if strncmp(fftsize,'Entire',6) fftsize = length(ud.audiodata.data); else fftsize = str2num(fftsize); end currentPoint=get(gca,'CurrentPoint'); ud.right=currentPoint(1); wintime = fftsize/ud.audiodata.Fs; if ud.right <= (ud.winleft+wintime) ud.right=wintime; end if ud.right >= (ud.winright) ud.right=ud.winright; end ud.left = ud.right - wintime; set(ud.r,'XData',[ud.right ud.right]); set(ud.l,'XData',[ud.right-wintime ud.right-wintime]); function stats = getStatistics(ud) y = ud.audiodata.data; shape = 'Hamming'; anal_window_size = floor(str2num(get(ud.analysis_wsize,'String'))*ud.audiodata.Fs/1000); anal_window_skip = floor(str2num(get(ud.analysis_wskip,'String'))*ud.audiodata.Fs/1000); win = get_windowdata(anal_window_size, shape); numwin = floor(size(y,1)/anal_window_skip); % Make sure we have an integer number of windows if size(y,1) < ((numwin-1)*anal_window_skip + anal_window_size) diff = numwin*anal_window_size + anal_window_size - anal_window_skip - length(y); y = [y; zeros(diff,1)]; end % Get data % Gather log energy and zerocrossings maxlogen = -1000; for j = 1:numwin, start = (j-1)*anal_window_skip+1; stop = start+anal_window_size-1; frame = y(start:stop); stats.zc(j) = zero_crossings(frame); stats.logen(j) = logenergy(frame,win); if stats.logen(j) > maxlogen maxlogen = stats.logen(j); end end % Normalize logen stats.logen = (stats.logen - maxlogen)'; % Avg number per 10 ms interval stats.zc = stats.zc'*anal_window_skip/(2*anal_window_size); function Nzeros = zero_crossings(signal) Nzeros = 0; for i=2:length(signal), if (sign(signal(i)) ~= sign(signal(i-1))) Nzeros = Nzeros + 1; end end function logenergy_value = logenergy(signal,window) logenergy_value = 0; N = length(signal); signal = window.*signal; logenergy_value = 10*log10(1e-10+sum(signal.^2)); function ud = findEndpoints(ud) anal_window_skip = floor(str2num(get(ud.analysis_wskip,'String'))* ... ud.audiodata.Fs/1000); dBthreshold = str2num(get(ud.logrms_threshold,'String')); zcthreshold_low = str2num(get(ud.zcthresh_low,'String')); zcthreshold = str2num(get(ud.zcthresh_high,'String')); y = ud.stats; tz = [1:size(y.zc,1)].*anal_window_skip/ud.audiodata.Fs; logen_i = find(y.logen > -dBthreshold); if numel(logen_i) > 0 % Add two frames to compensate for window size lower1 = logen_i(1)-1; higher1 = logen_i(end)+1; else lower1 = 1; higher1 = numel(tz); end if higher1 > numel(tz) higher1 = numel(tz); end if lower1 == 0 lower1 = 1; end % Now plot these initial lines faxes = [ud.timeplot ud.rmsplot ud.zcplot]; for i=1:3 axes(faxes(i)); yLim = get(faxes(i),'YLim'); hold on; plot([tz(lower1) tz(lower1)], yLim,'r--'); plot([tz(higher1) tz(higher1)], yLim,'r--'); end % Revise with zerocrossings zc_i = find(y.zc(1:lower1) > zcthreshold); if ~isempty(zc_i) lower2 = zc_i(end); % Now find where it drops below zcthreshold_low zc_i2 = find(y.zc(1:lower2) < zcthreshold_low); if ~isempty(zc_i2) lower2 = zc_i2(end); end else lower2 = lower1; end zc_i = find(y.zc(higher1:end) > zcthreshold); if ~isempty(zc_i) higher2 = zc_i(1)+higher1; % Now find where it drops below zcthreshold_low zc_i2 = find(y.zc(higher2:end) < zcthreshold_low); if ~isempty(zc_i2) higher2 = zc_i2(1)+higher2; end else higher2 = higher1; end if higher2 > numel(tz) higher2 = numel(tz); end if lower2 == 0 lower2 = 1; end for i=1:3 axes(faxes(i)); yLim = get(faxes(i),'YLim'); hold on; plot([tz(lower2) tz(lower2)], yLim,'k--'); plot([tz(higher2) tz(higher2)], yLim,'k--'); end ud.endpoints = [tz(lower2) tz(higher2)];