www.gusucode.com > nnet 工具箱 matlab 源码程序 > nnet/nnderivative/+nnGPU/codeHints.m
function hints = codeHints(hints) % Copyright 2012-2014 The MathWorks, Inc. MAX_MASKS = 3; % C Precision switch hints.precision case 'single', cPrecision = 'float'; case 'double', cPrecision = 'double'; end % Y KERNEL yProto = [ ... 'PRECISION *, PRECISION *,'... 'const PRECISION *,' ... 'const PRECISION *, const PRECISION *, const PRECISION *, const PRECISION *,' ... 'const long long, const long long, const long long']; yProto = strrep(yProto,'PRECISION',cPrecision); hints.yKernel = nnGPU.getKernel(['yy_' hints.precision],yProto); if (hints.yKernel.MaxThreadsPerBlock >= 1024) hints.yBlockWidth = 32; elseif (hints.yKernel.MaxThreadsPerBlock >= 256) hints.yBlockWidth = 16; else hints.yBlockWidth = 8; end hints.yGridSize = ceil(hints.Q / hints.yBlockWidth); hints.yKernel.ThreadBlockSize = [hints.yBlockWidth hints.yBlockWidth]; hints.yKernel.GridSize = hints.yGridSize; hints.yKernel.SharedMemorySize = 2*(hints.yBlockWidth^2)*hints.valSize; setConstantMemory(hints.yKernel,'hintsD',hints.double); setConstantMemory(hints.yKernel,'hintsL',hints.long); % PERFORMANCE KERNEL perfsProto = [ ... 'PRECISION *,' ... 'const PRECISION *,' ... 'const PRECISION *, const PRECISION *, const PRECISION *, const PRECISION *, PRECISION *,' ... 'const PRECISION *, const PRECISION *, const signed char * const,' ... 'const long long, const long long, const long long,const long long']; perfsProto = strrep(perfsProto,'PRECISION',cPrecision); hints.perfsKernel = nnGPU.getKernel(['perfs_' hints.precision],perfsProto); if (hints.perfsKernel.MaxThreadsPerBlock >= 1024) hints.perfsBlockWidth = 32; elseif (hints.perfsKernel.MaxThreadsPerBlock >= 256) hints.perfsBlockWidth = 16; else hints.perfsBlockWidth = 8; end hints.perfsGridSize = ceil(hints.Q / hints.perfsBlockWidth); hints.perfsKernel.ThreadBlockSize = [hints.perfsBlockWidth hints.perfsBlockWidth]; hints.perfsKernel.GridSize = hints.perfsGridSize; hints.perfsKernel.SharedMemorySize = 2*(hints.perfsBlockWidth^2)*hints.valSize; setConstantMemory(hints.perfsKernel,'hintsD',hints.double); setConstantMemory(hints.perfsKernel,'hintsL',hints.long); % ALLOCATE PERFORMANCE RESULT Perfs_and_N = zeros(2*MAX_MASKS,hints.perfsGridSize,hints.precision); if isempty(Perfs_and_N) % GPUs do not like empty matrices Perfs_and_N = zeros(2*MAX_MASKS,1,hints.precision); end hints.Perfs_and_N = gpuArray(Perfs_and_N); % BG KERNEL bgProto = [ ... 'PRECISION * const,' ... 'PRECISION * const,' ... 'PRECISION * const,' ... 'const PRECISION * const,' ... 'const PRECISION * const, const PRECISION * const, const PRECISION * const, const PRECISION * const, PRECISION * const,' ... 'const PRECISION * const, const PRECISION * const, const signed char * const,' ... 'const long long, const long long, const long long, const long long']; bgProto = strrep(bgProto,'PRECISION',cPrecision); hints.bgKernel = nnGPU.getKernel(['bg_' hints.precision],bgProto); if (hints.bgKernel.MaxThreadsPerBlock >= 1024) hints.bgBlockWidth = 32; elseif (hints.bgKernel.MaxThreadsPerBlock >= 256) hints.bgBlockWidth = 16; else hints.bgBlockWidth = 8; end hints.bgGridSize = ceil(hints.Q / hints.bgBlockWidth); hints.bgKernel.ThreadBlockSize = [hints.bgBlockWidth hints.bgBlockWidth]; hints.bgKernel.GridSize = hints.bgGridSize; hints.bgKernel.SharedMemorySize = 2*(hints.bgBlockWidth^2)*hints.valSize; setConstantMemory(hints.bgKernel,'hintsD',hints.double); setConstantMemory(hints.bgKernel,'hintsL',hints.long); % ALLOCATE BG OUPUTS hints.dWB = gpuArray(zeros(hints.gpuLearnWB.wbLen,hints.perfsGridSize,hints.precision)); % ALLOCATE TEMPORARY BG STORAGE N_size = hints.QAligned * sum(hints.layer_sizes) * hints.TS; dAc_size = hints.QAligned * sum(hints.layer_sizes) * (hints.numLayerDelays + hints.TS); TEMP_size = N_size + dAc_size; hints.TEMP = gpuArray(zeros(1,TEMP_size,hints.precision)); hints.emptyD = gpuArray(ones(0,0,'double')); hints.emptyL = gpuArray(ones(0,0,'int64'));