www.gusucode.com > 图像FFT_DCT源码程序 > 图像FFT_DCT源码程序/spectral_saliency_matlab/benchmark_gpu_convolution.m
% A simple spectral saliency benchmark on the GPU. % % Question: At which resolution may it become beneficial to switch from a % CPU to a GPU implementation? % % Notes: % - This is very simple code to check whether it makes sense to use the GPU % or not. Of course a specialized implementation may be faster % - You can optionally exclude the memory transfer from the basic benchmark % - Since the DCT code in principle relies on the FFT, the most costly % functions are equivalent and thus this code is more or less % representative for PFT/SW as well % % @author B. Schauerte % @date 2012 % Copyright 2012 B. Schauerte. All rights reserved. % % Redistribution and use in source and binary forms, with or without % modification, are permitted provided that the following conditions are % met: % % 1. Redistributions of source code must retain the above copyright % notice, this list of conditions and the following disclaimer. % % 2. Redistributions in binary form must reproduce the above copyright % notice, this list of conditions and the following disclaimer in % the documentation and/or other materials provided with the % distribution. % % THIS SOFTWARE IS PROVIDED BY B. SCHAUERTE ''AS IS'' AND ANY EXPRESS OR % IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED % WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE % DISCLAIMED. IN NO EVENT SHALL B. SCHAUERTE OR CONTRIBUTORS BE LIABLE % FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR % CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF % SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR % BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, % WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR % OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF % ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. % % The views and conclusions contained in the software and documentation % are those of the authors and should not be interpreted as representing % official policies, either expressed or implied, of B. Schauerte. %% if exist('gpu_idct2','file') gpu_idct2_func=@gpu_idct2; else gpu_idct2_func=@idct2; % see error check above end %% % A simple test to find a possible problem/bug try foo=gpu_idct2_func(gpuArray(rand(64,64))); catch err if strcmp(err.identifier,'MATLAB:UnableToConvert') fprintf(['You need to patch Matlab''s idct function. I.e., you need\n' ... 'to change this:\n' ... ' %% Re-order elements of each column according to equations (5.93) and\n' ... ' %% (5.94) in Jain\n' ... ' a = zeros(n,m);\n' ... ' a(1:2:n,:) = y(1:n/2,:);\n' ... ' a(2:2:n,:) = y(n:-1:n/2+1,:);\n' ... 'to:\n' ... ' %% Re-order elements of each column according to equations (5.93) and\n' ... ' %% (5.94) in Jain\n' ... ' a = parallel.gpu.GPUArray.zeros(n,m);\n' ... ' a(1:2:n,:) = y(1:n/2,:);\n' ... ' a(2:2:n,:) = y(n:-1:n/2+1,:);\n' ... 'Unfortunately, since this is copyrighted code by Mathworks, I\n' ... 'can not include an already patched version with the package.\n' ... 'You best save the patched file as ''gpu_idct'' and also create\n' ... 'a patched ''gpu_idct2'' that calls ''gpu_idct''.\n' ... ]); assert(false); else rethrow(err); end end %% % Print information about the GPU on which gdev=gpuDevice(); fprintf('Using %s as GPU ...\n', gdev.Name); %% % Number of benchmark runs to get more stable results num_runs=10; % incorporate the memory transfer in the benchmark? benchmark_with_gpu_memory_in=false; benchmark_with_gpu_memory_out=false; %resolutions=[128,128;256,256;512,512;1024,1024]; resolutions=[64,48;128,96;256,192;320,240;640,480;1280,960;1600,1200]; times=zeros(size(resolutions,1),2); for r=1:num_runs for i=1:size(resolutions,1) in=rand(resolutions(i,:)); % memory transfer in_cpu=in; tic; in_gpu=gpuArray(in); t_gpu_transfer_in=toc; % pre-allocate the memory dct_gpu=parallel.gpu.GPUArray.zeros(size(in,1),size(in,2)); sign_dct_gpu=parallel.gpu.GPUArray.zeros(size(in,1),size(in,2)); idct_sign_dct_gpu=parallel.gpu.GPUArray.zeros(size(in,1),size(in,2)); dct_cpu=zeros(size(in,1),size(in,2)); sign_dct_cpu=zeros(size(in,1),size(in,2)); idct_sign_dct_cpu=zeros(size(in,1),size(in,2)); total_t_cpu=0; total_t_gpu=0; tic; dct_cpu=dct2(in_cpu); t_cpu=toc; tic; dct_gpu=dct2(in_gpu); t_gpu=toc; fprintf('OP=DCT2: size=[%4d %4d] GPU=%f CPU=%f\n',resolutions(i,1),resolutions(i,2),t_gpu,t_cpu); total_t_cpu=total_t_cpu+t_cpu; total_t_gpu=total_t_gpu+t_gpu; tic; sign_dct_cpu=sign(dct_cpu); t_cpu=toc; tic; sign_dct_gpu=sign(dct_gpu); t_gpu=toc; fprintf('OP=SIGN: size=[%4d %4d] GPU=%f CPU=%f\n',resolutions(i,1),resolutions(i,2),t_gpu,t_cpu); total_t_cpu=total_t_cpu+t_cpu; total_t_gpu=total_t_gpu+t_gpu; tic; idct_sign_dct_cpu=idct2(sign_dct_cpu).^2; t_cpu=toc; tic; idct_sign_dct_gpu=gpu_idct2_func(sign_dct_gpu).^2; t_gpu=toc; fprintf('OP=DCTI: size=[%4d %4d] GPU=%f CPU=%f\n',resolutions(i,1),resolutions(i,2),t_gpu,t_cpu); total_t_cpu=total_t_cpu+t_cpu; total_t_gpu=total_t_gpu+t_gpu; % @note: we don't do filtering here (but you can add it, if you wish) tic; out_gpu=gather(idct_sign_dct_gpu); t_gpu_transfer_out=toc; if benchmark_with_gpu_memory_in, total_t_gpu = total_t_gpu + t_gpu_transfer_in; end if benchmark_with_gpu_memory_out, total_t_gpu = total_t_gpu + t_gpu_transfer_out; end times(i,:) = times(i,:) + [total_t_cpu total_t_gpu]; end end %% figure bar(times); %plot(times); xlabel('resolution'); ylabel('time'); xticklabels=[]; for i=1:size(resolutions,1) xticklabels=vertcat(xticklabels,sprintf('[%4d %4d]',resolutions(i,1),resolutions(i,2))); end set(gca,'XTickLabel',xticklabels) legend('CPU','GPU'); % %% % figure % subplot(1,2,1); imshow(mat2gray(idct_sign_dct_cpu)); colormap('jet'); % subplot(1,2,2); imshow(mat2gray(gather(idct_sign_dct_gpu))); colormap('jet');