www.gusucode.com > datastoreio工具箱 matlab源码程序 > datastoreio/+matlab/+io/+datastore/@TallDatastore/TallDatastore.m

    classdef (Sealed) TallDatastore < ...
                  matlab.io.datastore.MatSeqDatastore & ...
                  matlab.mixin.CustomDisplay
%TALLDATASTORE Datastore for use with files produced by write method of tall.
%   TDS = datastore(LOCATION)
%   TDS = datastore(LOCATION,'Type','tall') creates an TALLDATASTORE if a
%   file or a collection of files that were outputs of write method on tall,
%   is present in LOCATION. LOCATION has the following properties:
%      - Can be a filename or a folder name
%      - Can be a cell array of multiple file or folder names
%      - Can contain a relative path (HDFS requires full paths)
%      - Can contain a wildcard (*) character.
%      - All the files in LOCATION must be MAT-Files (or Sequence files)
%        containing data, typically produced by write method of tall.
%
%   TDS = datastore(_,'ReadSize',readSize) specifies the maximum number of
%   data rows returned by read. By default, ReadSize is determined by the
%   datastore. If the values are small, increase ReadSize.
%
%   TDS = datastore(_,'FileType',fileType) specifies the type of files in
%   LOCATION. The default FileType is 'mat', for data stored in MAT-files,
%   typically produced by tall write method. FileType can also be 'seq',
%   for data stored in one or more sequence files, typically produced by
%   write method of tall.
%
%
%   TallDatastore Methods:
%
%      preview       - Read a small amount of data from the start of the
%                      datastore.
%      read          - Read some data from the datastore.
%      readall       - Read all of the data from the datastore.
%      hasdata       - Returns true if there is more data in the datastore.
%      reset         - Reset the datastore to the start of the data.
%      partition     - Return a new datastore that represents a single
%                      partitioned part of the original datastore.
%      numpartitions - Return an estimate for a reasonable number of
%                      partitions to use with the partition function for
%                      the given information.
%
%   TallDatastore Properties:
%
%      Files           - Cell array of filenames.
%      FileType        - The type of file supported by this datastore.
%      ReadSize        - Upper limit for the number of key-value pairs to read.
%
%   Example:
%   --------
%      % Create a simple tall double.
%      t = tall(rand(500,1))
%      % Write to a new folder.
%      newFolder = fullfile(pwd, 'myTest');
%      write(newFolder, t)
%      % Create an TallDatastore from newFolder
%      tds = datastore(newFolder)
%      % Create a new tall using TallDatastore
%      t1 = tall(tds)
%
%   See also tall, tall/write, matlab.io.datastore.TallDatastore, mapreduce, datastore.

%   Copyright 2016 The MathWorks, Inc.

    properties (Dependent = true)
        %Files -
        % MAT files or SEQUENCE files in the TallDatastore.
        Files;
    end

    properties
        %ReadSize -
        % Maximum number of data rows to read.
        ReadSize;
    end

    properties (SetAccess = protected)
        %FileType -
        % The type of file supported by the TallDatastore. FileType
        % must be 'mat' or 'seq'. By default, FileType is determined
        % by the type of file in the location provided.
        FileType;
    end

    properties (Access = protected)
        % deployement needs a way to get files before resolving them
        UnresolvedFiles
        %ErrorCatalogName - Error catalog name for error handling
        ErrorCatalogName;
        %MFilename - mfilename of the subclasses for error handling
        MFilename;
        %ValuesOnly- Only values are supported from the MAT-files or Sequence files
        ValuesOnly;
    end

    properties (Access = private)
        % Data buffered to support variable ReadSize
        BufferedData;
        % Size of the data buffered
        BufferedSize;
        % Info of the data buffered
        BufferedInfo;
        % Substruct used to subsref into BufferedData
        BufferedSubstruct;
    end

    properties (Constant, Access = protected)
        M_FILENAME = mfilename;
        ERROR_CATALOG_NAME = 'talldatastore';
    end
    % Constructor
    methods
        % TallDatastore can be constructed with files argument,
        % optionally providing FileType and ReadSize Name-Value pairs.
        function tds = TallDatastore(files, varargin)
            try
                import matlab.io.datastore.TallDatastore;
                nameValues = TallDatastore.parseNameValues(varargin{:});
                initializeForConstruction(tds, files, nameValues);
            catch e
                throwAsCaller(e);
            end
        end
    end

    % Set and Get methods for properties
    methods
        % Set method for Files
        function set.Files(tds, files)
            try
                % initialize the datastore with files, current filetype and current readsize
                [files, info] = preambleSetFiles(tds, files);
                initialize(tds, files, info);
            catch e
                throw(e)
            end
        end

        % Set method for ReadSize
        function set.ReadSize(tds, readSize)
            try
                validateReadSize(tds, readSize);
            catch e
                throw(e);
            end
            tds.ReadSize = readSize;
        end

        % Get Files
        function files = get.Files(tds)
            files = tds.Splitter.Files;
        end
    end

    methods (Access = private)
        % Initialize datastore values before passing to initDatastore of the superclass
        % MatSeqDatastore
        function initializeForConstruction(ds, files, info)
            import matlab.io.datastore.TallDatastore;
            import matlab.io.datastore.MatSeqDatastore;

            ds.UnresolvedFiles = files;
            ds.ErrorCatalogName = TallDatastore.ERROR_CATALOG_NAME;
            ds.MFilename = TallDatastore.M_FILENAME;
            ds.ValuesOnly = true;
            info.ValuesOnly = true;
            info.FromConstruction = true;
            % IncludeSubfolders and FileExtensions are not supported currently
            % Assign default values, so to use the underlying superclass code.
            info.UsingDefaults = horzcat(info.UsingDefaults, {MatSeqDatastore.FILE_EXTENSIONS_NV_NAME,...
                MatSeqDatastore.INCLUDE_SUBFOLDERS_NV_NAME});
            info.FileExtensions = -1;
            info.IncludeSubfolders = false;
            initialize(ds, files, info);
        end

        % Initialize using superclass common code and set private
        % properties after reset
        function initialize(tds, files, info)
            import matlab.io.datastore.MatSeqDatastore;

            % initialize using superclass common code
            initDatastore(tds, files, info);

            % This needs to be called after splitter (and splitreader, if any)
            % are initialized.
            setKeyValueLimit(tds);

            % Get the correct ReadSize from the SplitReader, if any
            [readSize, numDims] = getBestReadSize(tds);
            % If ReadSize is to be default, use the correct one from the SplitReader
            if ismember(MatSeqDatastore.READSIZE_PROP_NAME, info.UsingDefaults)
                tds.ReadSize = readSize;
            end
            setBufferedDataInfo(tds, numDims);
        end

        % Set the KeyValueLimit for Splitter and SplitReader during initialization
        function setKeyValueLimit(tds)
            import matlab.io.datastore.MatSeqDatastore;
            tds.Splitter.KeyValueLimit = MatSeqDatastore.DEFAULT_READ_SIZE;
            % if we have a non empty splitter, then a reader is guaranteed.
            if tds.Splitter.NumSplits ~= 0
                tds.SplitReader.KeyValueLimit = MatSeqDatastore.DEFAULT_READ_SIZE;
            end
        end

        % Set the private buffered substruct and buffered size values
        % This is called after setting BufferedData
        %
        % See also read, getDataUsingSubstructInfo
        function setBufferedDataInfo(tds, numDims)
            % colon : for all non-ReadSize dimensions
            col = repmat({':'}, numDims - 1);
            tds.BufferedSubstruct = substruct('()', [{[]}, col]);
            tds.BufferedSize = 0;
            tds.BufferedData = [];
        end

        % Get data using the private buffered substruct and buffered size values
        % This is called after setting BufferedSubstruct
        %
        % See also read, setBufferedDataInfo 
        function data = getDataUsingSubstructInfo(tds, readSize)
            % Set the subs field value to the readsize amount
            tds.BufferedSubstruct.subs{1} = 1:readSize;
            data = subsref(tds.BufferedData, tds.BufferedSubstruct);
            tds.BufferedSubstruct.subs{1} = readSize+1:tds.BufferedSize;
            % Reset the buffered data to the remaining data
            tds.BufferedData = subsref(tds.BufferedData, tds.BufferedSubstruct);
            tds.BufferedSize = tds.BufferedSize - readSize;
        end

        % This gets the best readsize based on the values
        % in the underlying file container - MAT-Files or Sequence Files
        function [readSize, numDims] = getBestReadSize(tds)
            readSize = 1;
            numDims = 2;
            if isempty(tds.SplitReader)
                % empty matrix [0x1] for an uninitialized SplitReader
                % Not for empty datastores created from partition of non-empty datastore
                return;
            end
            data = getBufferedValue(tds.SplitReader);
            % if the first value is cell, rest of them are
            % cell as well. So ReadSize = 1, is safe for default.
            if isempty(data) || iscell(data)
                return;
            end
            % if the value is an N-D array, get the first dimension
            % which is the ReadSize dimension.
            readSize = size(data, 1);
            numDims = ndims(data);
        end

        % Used by preview
        % This subsrefs the value from the underlying container - MAT-Files or Sequence Files,
        % using the stored substruct, with zero first dimension.
        function data = getZeroFirstDimData(tds)
            if isempty(tds.SplitReader)
                % empty matrix for an uninitialized SplitReader
                % This will not happen for empty datastores created from partition
                % of non-empty datastore
                data = zeros(0,1);
                return;
            end
            data = getBufferedValue(tds.SplitReader);
            tds.BufferedSubstruct.subs{1} = [];
            data = subsref(data, tds.BufferedSubstruct);
        end
    end

    methods (Static, Access = private)

        % Parse the Name-Value pairs for TallDatastore
        function parsedStruct = parseNameValues(varargin)
            import matlab.io.datastore.MatSeqDatastore;
            import matlab.io.datastore.TallDatastore;
            persistent inpP;
            if isempty(inpP)
                inpP = inputParser;
                addParameter(inpP, MatSeqDatastore.FILETYPE_PROP_NAME, MatSeqDatastore.DEFAULT_FILE_TYPE);
                addParameter(inpP, MatSeqDatastore.READSIZE_PROP_NAME, MatSeqDatastore.DEFAULT_READ_SIZE);
                inpP.FunctionName = TallDatastore.M_FILENAME;
            end
            parse(inpP, varargin{:});
            parsedStruct = inpP.Results;
            parsedStruct.UsingDefaults = inpP.UsingDefaults;
        end

    end

    methods (Static, Hidden)

        % This function is responsible for determining whether a given
        % location is supported by a TallDatastore.
        function tf = supportsLocation(files, nvStruct)
            tf = false;
            if isempty(files)
                %From datastore gateway, one will not be able to construct an empty
                % datastore, except with a type Name-Value pair.
                return;
            end
            nvStruct.ValuesOnly = true;
            tf = matlab.io.datastore.MatSeqDatastore.supportsLocation(files, nvStruct);
        end
    end

    methods (Access = protected)
        % matlab.mixin.CustomDisplay method.
        % Used for custom display the properties of the object.
        function displayScalarObject(tds)
            % header
            disp(getHeader(tds));
            group = getPropertyGroups(tds);
            detailsStr = evalc('details(tds)');
            nsplits = strsplit(detailsStr, '\n');
            filesStr = nsplits(~cellfun(@isempty, strfind(nsplits, 'Files: ')));
            % Find the indent spaces from details
            nFilesIndent = strfind(filesStr{1}, 'Files: ') - 1;
            if nFilesIndent > 0
                % File Properties
                filesIndent = [sprintf(repmat(' ',1,nFilesIndent)) 'Files: '];
                nlspacing = sprintf(repmat(' ',1,numel(filesIndent)));
                if isempty(tds.Files)
                    nlspacing = '';
                end
                import matlab.io.datastore.internal.cellArrayDisp;
                filesStrDisp = cellArrayDisp(tds.Files, true, nlspacing);
                disp([filesIndent filesStrDisp]);
                % Remove Files property from the group, since custom
                % display is used for Files.
                group.PropertyList = rmfield(group.PropertyList, 'Files');
            end
            matlab.mixin.CustomDisplay.displayPropertyGroups(tds, group);
            disp(getFooter(tds));
        end

    end
end