www.gusucode.com > datastoreio工具箱 matlab源码程序 > datastoreio/+matlab/+io/+datastore/MatSeqDatastore.m

    classdef (Hidden) MatSeqDatastore < ...
                  matlab.io.datastore.FileBasedDatastore & ...
                  matlab.io.datastore.mixin.HadoopFileBasedSupport & ...
                  matlab.io.datastore.internal.ScalarBase
%MATSEQDATASTORE Datastore for use with MAT-Files or Sequence files.
%   This class inherits from FileBasedDatastore and uses the mixin
%   HadoopFileBasedSupport. This will be the superclass for all MAT-file or
%   Sequence file supporting datastore(s), eg., TallDatastore and,
%   KeyValueDatastore, etc.
%
%   See also tall, matlab.io.datastore.TallDatastore, mapreduce, datastore.

%   Copyright 2016 The MathWorks, Inc.

    properties (Abstract, Dependent)
        %Files -
        % MAT files or SEQUENCE files in the datastore.
        Files;
        %ReadSize -
        % Maximum number of data rows to read.
        ReadSize;
    end

    properties (Abstract, SetAccess = protected)
        %FileType -
        % The type of file supported by the Datastore. FileType
        % must be 'mat' or 'seq'. By default, FileType is determined
        % by the type of file in the location provided.
        FileType;
    end

    properties (Abstract, Access = protected)
        % deployment needs a way to get files before resolving them
        UnresolvedFiles;
    end

    properties (Abstract, Access = protected)
        %ErrorCatalogName - Error catalog name for error handling
        ErrorCatalogName;
        %MFilename - mfilename of the subclasses for error handling
        MFilename;
        %ValuesOnly- Only values are supported from the MAT-files or Sequence files 
        ValuesOnly;
    end

    properties (Constant, Access = protected)
       DEFAULT_FILE_TYPE = 'mat';
       DEFAULT_READ_SIZE = 1;
       DEFAULT_INCLUDE_SUBFOLDERS = false;
       DEFAULT_FILE_EXTENSIONS = -1;
       ALLOWED_FILE_TYPES = {'mat', 'seq'};
       SEQUENCE_FILE_TYPE = 'seq';
       FILETYPE_PROP_NAME = 'FileType';
       READSIZE_PROP_NAME = 'ReadSize';
       INCLUDE_SUBFOLDERS_NV_NAME = 'IncludeSubfolders';
       FILE_EXTENSIONS_NV_NAME = 'FileExtensions';
       FILE_TYPE_MAP = containers.Map({'seq', 'mat'}, {'Sequence ', 'MAT-'});
       MAT_KV_SPLITTER_NAME = 'matlab.io.datastore.splitter.MatKVFileSplitter';
       SEQ_KV_SPLITTER_NAME = 'matlab.io.datastore.splitter.SequenceFileSplitter';
    end

    methods (Access = protected)
        % Setup input files and info for initializing the datastore
        function [files, info] = preambleSetFiles(ds, files)
            import matlab.io.datastore.internal.validators.validatePaths;
            import matlab.io.datastore.internal.indexOfFirstFolderOrWildCard;

            % ensure the given paths are valid strings or cell array of strings
            paths = validatePaths(files);

            % get the appended or modified file list
            appendedPaths = setdiff(paths, ds.Files, 'stable');

            % get the index of the first string which is a folder or
            % contains a wildcard
            idx = indexOfFirstFolderOrWildCard(appendedPaths);

            % error for folder or wild card inputs
            if (-1 ~= idx)
                error(message('MATLAB:datastoreio:filebaseddatastore:nonFilePaths', appendedPaths{idx}));
            end

            import matlab.io.datastore.MatSeqDatastore;
            info.FromConstruction = false;
            info.FileType = ds.FileType;
            info.ReadSize = ds.ReadSize;
            info.ValuesOnly = ds.ValuesOnly;
            info.UsingDefaults = {MatSeqDatastore.FILE_EXTENSIONS_NV_NAME,...
                MatSeqDatastore.INCLUDE_SUBFOLDERS_NV_NAME};
            info.FileExtensions = -1;
            info.IncludeSubfolders = false;
        end
    end

    methods (Access = protected)

        % Initialization of datastore. Used by set.Files method and the constructor
        % This resolves the file paths, checks for supported MAT-files or Sequence
        % files, sets up the splitter for the datastore.
        function initDatastore(ds, files, nvStruct)
            import matlab.io.datastore.splitter.*;
            import matlab.io.datastore.MatSeqDatastore;
            fileType = nvStruct.FileType;
            fileType = validateFileTypeStr(ds, fileType);
            userPassedFileType = true;
            checkMatOrSeq = strcmp(fileType, MatSeqDatastore.ALLOWED_FILE_TYPES);
            if ismember(MatSeqDatastore.FILETYPE_PROP_NAME, nvStruct.UsingDefaults)
                checkMatOrSeq = [true, true];
                userPassedFileType = false;
            end
            % Check the first file
            [~, info, files, fileSizes] = ...
                MatSeqDatastore.supportsLocation(files, nvStruct, checkMatOrSeq, userPassedFileType);
            if userPassedFileType && nvStruct.FromConstruction
                % Filter all unsupported files and if empty error.
                [splitterName, fileInfo] = filterAndGetInfo(ds, fileType, files, fileSizes);
            else
                % Check if all files are supported, otherwise error.
                info.FileType = fileType;
                info.FromConstruction = nvStruct.FromConstruction;
                [splitterName, fileInfo] = checkSupportAndGetInfo(ds, info, files, fileSizes);
            end
            ds.Splitter = feval([splitterName '.create'], fileInfo);
            % reset the datastore so setting the readsize after is a valid call.
            reset(ds);
            ds.ReadSize = nvStruct.ReadSize;
            if ds.Splitter.NumSplits == 0
                % Empty datastore defaults to mat type.
                ds.FileType = 'mat';
            else
                % Pick the fileType from the SplitReader.
                ds.FileType = ds.SplitReader.FileType;
            end
        end

        % Throws error that the given file is unsupported
        function unsupportedFilesError(ds, errorThrown, fname)
            if ~isempty(errorThrown)
                throw(errorThrown);
            elseif ~isempty(fname)
                error(message(['MATLAB:datastoreio:' ds.ErrorCatalogName ':unsupportedFiles'], fname));
            else
                error(message(['MATLAB:datastoreio:' ds.ErrorCatalogName ':unsupportedFiles'], ''));
            end
        end

        % When FileType is given as 'mat', when the file is Sequence and vice-versa
        function unexpectedFileTypeError(ds, fileType, fname)
            import matlab.io.datastore.MatSeqDatastore;
            error(message(['MATLAB:datastoreio:' ds.ErrorCatalogName ':unexpectedFileType'], ...
                            MatSeqDatastore.FILE_TYPE_MAP(fileType), fname));
        end

        % When FileType is given and none of the files are of that type.
        function noFileTypeEmptyError(ds, fileType)
            import matlab.io.datastore.MatSeqDatastore;
            fileType = MatSeqDatastore.FILE_TYPE_MAP(fileType);
            error(message(['MATLAB:datastoreio:' ds.ErrorCatalogName ':noFileTypeEmptyError'], ...
                fileType));
        end

        % When a filetype is given filter only that filetype supported files.
        function [splitterName, fileInfo] = filterAndGetInfo(ds, fileType, files, fileSizes)
            import matlab.io.datastore.splitter.MatKVFileSplitter;
            import matlab.io.datastore.splitter.SequenceFileSplitter;
            import matlab.io.datastore.MatSeqDatastore;
            splitterName = '';
            fileInfo = [];
            prevNumFiles = numel(files);
            switch fileType
                case 'mat'
                    splitterName = MatSeqDatastore.MAT_KV_SPLITTER_NAME;
                    fileInfo = MatKVFileSplitter.filterMatFiles(files, ds.ValuesOnly);
                case 'seq'
                    splitterName = MatSeqDatastore.SEQ_KV_SPLITTER_NAME;
                    tfArr = SequenceFileSplitter.filterSeqFiles(files, ds.ValuesOnly);
                    files = files(tfArr);
                    fileSizes = fileSizes(tfArr);
                    fileInfo.Files = files;
                    fileInfo.FileSizes = fileSizes;
            end
            if prevNumFiles ~= 0 && numel(fileInfo.Files) == 0
                noFileTypeEmptyError(ds, fileType);
            end
            fileInfo.ValuesOnly = ds.ValuesOnly;
        end

        % When Filetype is not given check if all files in the resolved-files
        % are supported; throw an error otherwise.
        function [splitterName, fileInfo] = checkSupportAndGetInfo(ds, info, files, fileSizes)
            import matlab.io.datastore.splitter.MatKVFileSplitter;
            import matlab.io.datastore.splitter.SequenceFileSplitter;
            import matlab.io.datastore.MatSeqDatastore;
            splitterName = '';
            fileInfo = [];
            switch info.Support
                case 'MATSupport'
                    splitterName = MatSeqDatastore.MAT_KV_SPLITTER_NAME;
                    % Check if all files are supported MAT-files
                    [fileInfo, areMat, idx] = MatKVFileSplitter.filterMatFiles(files, ds.ValuesOnly);
                    if ~areMat
                        if info.FromConstruction
                            unsupportedFilesError(ds, [], files{idx});
                        else
                            unexpectedFileTypeError(ds, info.FileType, files{idx});
                        end
                    end
                case 'SEQSupport'
                    % Check if all files are sequence files.
                    if numel(files) > 1
                        [areSeq, idx] = SequenceFileSplitter.areSeqFilesSupported(files(2:end), ds.ValuesOnly);
                        if ~areSeq
                            if info.FromConstruction
                                unsupportedFilesError(ds, [], files{idx + 1});
                            else
                                unexpectedFileTypeError(ds, info.FileType, files{idx + 1});
                            end
                        end
                    end
                    fileInfo.Files = files;
                    fileInfo.FileSizes = fileSizes;
                    splitterName = MatSeqDatastore.SEQ_KV_SPLITTER_NAME;
                case 'UnexpectedFileType'
                    unexpectedFileTypeError(ds, info.FileType, info.Filename);
                case 'Unsupported'
                    unsupportedFilesError(ds, info.ErrorThrown, info.Filename);
            end
            fileInfo.ValuesOnly = ds.ValuesOnly;
        end

        %Validate the given filetype option string
        function fileType = validateFileTypeStr(ds, fileType)
            import matlab.io.datastore.MatSeqDatastore;
            fileType = validatestring(fileType, ...
                MatSeqDatastore.ALLOWED_FILE_TYPES, ...
                ds.MFilename, ...
                MatSeqDatastore.FILETYPE_PROP_NAME);
        end

        %Validate the given readsize option
        function validateReadSize(ds, readSize)
            try
                validateattributes(readSize, {'numeric'}, ...
                    {'scalar', 'positive', 'integer'});
            catch
                error(message(['MATLAB:datastoreio:' ds.ErrorCatalogName ':invalidReadSize']))
            end
        end
    end

    methods (Static = true, Hidden = true)

        % This function is responsible for determining whether a given
        % location is supported by a MatSeqDatastore. It also returns a
        % resolved filelist and the corresponding file sizes.
        function [tf, info, files, fileSizes] = supportsLocation(files, nvStruct, checkMatOrSeq, userPassedFileType)
            info.Filename = '';
            info.ErrorThrown = [];
            tf = false;
            if iscell(files) && isempty(files)
                % MATSuppport by default for empty datastore.
                tf = true;
                fileSizes = 0;
                info.Support = 'MATSupport';
                return;
            end
            import matlab.io.datastore.internal.validators.validateFileExtensions;
            import matlab.io.datastore.FileBasedDatastore;

            isDefaultExts = validateFileExtensions(nvStruct.FileExtensions, nvStruct.UsingDefaults);
            % This validates the paths and does a pathlookup of the location input
            [~, files, fileSizes] = matlab.io.datastore.FileBasedDatastore.supportsLocation(files, nvStruct, {}, ~isDefaultExts);
            [~, ~, exts] = cellfun(@fileparts, files, 'UniformOutput', false);
            isCrcFile = strcmp(exts, '.crc');
            files(isCrcFile) = [];
            fileSizes(isCrcFile) = [];
            
            checkForMat = true;
            checkForSeq = true;
            if nargin == 4
                checkForMat = checkMatOrSeq(1);
                checkForSeq = checkMatOrSeq(2);
            end
            info.Support = 'Unsupported';
            try
                info.Filename = files{1};
                import matlab.io.datastore.splitter.MatKVFileSplitter;
                if checkForMat && MatKVFileSplitter.isMatSupported(info.Filename, nvStruct.ValuesOnly)
                    tf = true;
                    info.Support = 'MATSupport';
                    return;
                end
                import matlab.io.datastore.internal.SequenceFileReader;
                if ~tf && checkForSeq && SequenceFileReader.isSeqSupported(info.Filename, nvStruct.ValuesOnly)
                    tf = true;
                    info.Support = 'SEQSupport';
                    return;
                end
            catch e
                info.ErrorThrown = e;
                tf = false;
                return;
            end
            if nargin == 4 && userPassedFileType && ~tf
                % Used by initDatastore method to validate
                info.Support = 'UnexpectedFileType';
            end
        end

    end

    methods (Hidden = true, Access = 'public')
        % return true if the splits of this datastore are file at a time
        function tf = areSplitsWholeFile(ds)
            tf = ds.Splitter.isFullFileSplitter();
        end

        % return true if the splits of this datastore span the all files
        % in the Files property in their entirety (non-partitioned)
        function tf = areSplitsOverCompleteFiles(ds)
            tf = ds.Splitter.isSplitsOverAllOfFiles();
        end

        %PROGRESS   Percentage of completed splits between 0.0 and 1.0.
        %   Return fraction between 0.0 and 1.0 indicating progress. Does
        %   not count unfinished splits
        function frac = progress(ds)
            frac = ds.SplitIdx-hasNext(ds.SplitReader) + progress(ds.SplitReader);
            frac = min(frac/numel(ds.Splitter.Splits), 1.0);
        end

        % HadoopFileBasedSupport: initialize this datastore given filename, offset
        % and size to read
        function initFromFileSplit(ds, filename, offset, len)
            import matlab.io.datastore.splitter.SequenceFileSplitter;
            ds.Splitter = ds.Splitter.createCopyWithSplits(...
                            SequenceFileSplitter.createBasicSplit(filename, offset, len));
            reset(ds);
        end

        % Deployment needs a way to get unresolved files.
        function files = getUnresolvedFiles(ds)
            files = ds.UnresolvedFiles;
        end
    end
end