www.gusucode.com > 3-D Deep Learning - Lung Tumor Segmentation > 3-D Deep Learning - Lung Tumor Segmentation/packages/3DVNet/preprocessLungTSdataset.m
function preprocessLungTSdataset(destination,source) % Crop the data set to a region containing primarily the lung and tumor. % Then, normalize each 3-D volumetric series independently % by subtracting the mean and dividing by the standard deviation of the % cropped brain region. Finally, split the data set into training, % validation and test sets. % % Copyright 2019 The MathWorks, Inc. %% Load data %volLoc = [source filesep 'imagesTr']; volLoc = fullfile(pwd, source, 'imagesTr'); %lblLoc = [source filesep 'labelsTr']; lblLoc = fullfile(pwd, source, 'labelsTr'); if ~exist(volLoc,'dir') || ~exist(lblLoc,'dir') error(['Please unzip Task06_Lung file to ' source]) end moveHiddenFiles(source,volLoc,lblLoc); % If the directory for preprocessed data does not exist, or only a partial % set of the data files have been processed, process the data. if ~exist(destination,'dir') || proceedWithPreprocessing(destination) mkdir(fullfile(destination,'imagesTr')); mkdir(fullfile(destination,'labelsTr')); mkdir(fullfile(destination,'imagesVal')); mkdir(fullfile(destination,'labelsVal')); mkdir(fullfile(destination,'imagesTest')); mkdir(fullfile(destination,'labelsTest')); labelReader = @(x) (niftiread(x) > 0); volReader = @(x) niftiread(x); volds = imageDatastore(volLoc, ... 'FileExtensions','.gz','ReadFcn',volReader); classNames = ["background","tumor"]; pixelLabelID = [0 1]; pxds = pixelLabelDatastore(lblLoc,classNames, pixelLabelID, ... 'FileExtensions','.gz','ReadFcn',labelReader); reset(volds); reset(pxds); %% Crop relevant region NumFiles = length(pxds.Files); id = 1; while hasdata(pxds) outL = readNumeric(pxds); outV = read(volds); temp = outL>0; sz = size(outL); reg = regionprops3(temp,'BoundingBox'); tol = 64; ROI = ceil(reg.BoundingBox(1,:)); ROIst = ROI(1:3) - tol; ROIend = ROI(1:3) + ROI(4:6) + tol; ROIst(ROIst<1)=1; ROIend(ROIend>sz)=sz(ROIend>sz); tumorRows = ROIst(2):ROIend(2); tumorCols = ROIst(1):ROIend(1); tumorPlanes = ROIst(3):ROIend(3); tcropVol = outV(tumorRows,tumorCols, tumorPlanes,:); tcropLabel = outL(tumorRows,tumorCols, tumorPlanes); % Data set with a valid size for V-Net (multiple of 8) ind = floor(size(tcropVol)/8)*8; incropVol = tcropVol(1:ind(1),1:ind(2),1:ind(3),:); mask = incropVol == 0; cropVol = channelWisePreProcess(incropVol); % Set the nonlung region to 0 cropVol(mask) = 0; cropLabel = tcropLabel(1:ind(1),1:ind(2),1:ind(3)); % Split data into training, validation and test sets. Roughly 82% % are training, 6% are validation, and 12% are test if (id < floor(0.83*NumFiles)) imDir = fullfile(destination,'imagesTr','LungTS'); labelDir = fullfile(destination,'labelsTr','LungTS'); elseif (id < floor(0.89*NumFiles)) imDir = fullfile(destination,'imagesVal','LungTS'); labelDir = fullfile(destination,'labelsVal','LungTS'); else imDir = fullfile(destination,'imagesTest','LungTS'); labelDir = fullfile(destination,'labelsTest','LungTS'); end save([imDir num2str(id,'%.3d') '.mat'],'cropVol'); save([labelDir num2str(id,'%.3d') '.mat'],'cropLabel'); id=id+1; end end end function out = channelWisePreProcess(in) % As input has 4 channels (modalities), remove the mean and divide by the % standard deviation of each modality independently. chn_Mean = mean(in,[1 2 3]); chn_Std = std(in,0,[1 2 3]); out = (in - chn_Mean)./chn_Std; rangeMin = -5; rangeMax = 5; % Remove outliers out(out > rangeMax) = rangeMax; out(out < rangeMin) = rangeMin; % Rescale the data to the range [0, 1] out = (out - rangeMin) / (rangeMax - rangeMin); end function moveHiddenFiles(source,volLoc,lblLoc) % The original data set includes hidden files whose filenames begin with % "._". Move these files out of the training, test, and validation data % directories. myLoc = pwd; hiddenDir = fullfile(source,'HiddenFiles'); if ~exist(hiddenDir,'dir') mkdir(hiddenDir); cd(volLoc); !move ._* ../HiddenFiles/ cd(lblLoc) !move ._* ../HiddenFiles/ end cd(myLoc) end function out = proceedWithPreprocessing(destination) totalNumFiles = 63; numFiles = 0; if exist(fullfile(destination,'imagesTr'),'dir') tmp1 = dir(fullfile(destination,'imagesTr')); numFiles = numFiles + sum(~vertcat(tmp1.isdir)); end if exist(fullfile(destination,'imagesVal'),'dir') tmp1 = dir(fullfile(destination,'imagesVal')); numFiles = numFiles + sum(~vertcat(tmp1.isdir)); end if exist(fullfile(destination,'imagesTest'),'dir') tmp1 = dir(fullfile(destination,'imagesTest')); numFiles = numFiles + sum(~vertcat(tmp1.isdir)); end % If total number of preprocessed files is not equal to the number of % files in the dataset, perform preprocessing. Otherwise, preprocessing has % already been completed and can be skipped. out = (numFiles ~= totalNumFiles); end