www.gusucode.com > datastoreio工具箱 matlab源码程序 > datastoreio/+matlab/+io/+datastore/@TallDatastore/TallDatastore.m
classdef (Sealed) TallDatastore < ... matlab.io.datastore.MatSeqDatastore & ... matlab.mixin.CustomDisplay %TALLDATASTORE Datastore for use with files produced by write method of tall. % TDS = datastore(LOCATION) % TDS = datastore(LOCATION,'Type','tall') creates an TALLDATASTORE if a % file or a collection of files that were outputs of write method on tall, % is present in LOCATION. LOCATION has the following properties: % - Can be a filename or a folder name % - Can be a cell array of multiple file or folder names % - Can contain a relative path (HDFS requires full paths) % - Can contain a wildcard (*) character. % - All the files in LOCATION must be MAT-Files (or Sequence files) % containing data, typically produced by write method of tall. % % TDS = datastore(_,'ReadSize',readSize) specifies the maximum number of % data rows returned by read. By default, ReadSize is determined by the % datastore. If the values are small, increase ReadSize. % % TDS = datastore(_,'FileType',fileType) specifies the type of files in % LOCATION. The default FileType is 'mat', for data stored in MAT-files, % typically produced by tall write method. FileType can also be 'seq', % for data stored in one or more sequence files, typically produced by % write method of tall. % % % TallDatastore Methods: % % preview - Read a small amount of data from the start of the % datastore. % read - Read some data from the datastore. % readall - Read all of the data from the datastore. % hasdata - Returns true if there is more data in the datastore. % reset - Reset the datastore to the start of the data. % partition - Return a new datastore that represents a single % partitioned part of the original datastore. % numpartitions - Return an estimate for a reasonable number of % partitions to use with the partition function for % the given information. % % TallDatastore Properties: % % Files - Cell array of filenames. % FileType - The type of file supported by this datastore. % ReadSize - Upper limit for the number of key-value pairs to read. % % Example: % -------- % % Create a simple tall double. % t = tall(rand(500,1)) % % Write to a new folder. % newFolder = fullfile(pwd, 'myTest'); % write(newFolder, t) % % Create an TallDatastore from newFolder % tds = datastore(newFolder) % % Create a new tall using TallDatastore % t1 = tall(tds) % % See also tall, tall/write, matlab.io.datastore.TallDatastore, mapreduce, datastore. % Copyright 2016 The MathWorks, Inc. properties (Dependent = true) %Files - % MAT files or SEQUENCE files in the TallDatastore. Files; end properties %ReadSize - % Maximum number of data rows to read. ReadSize; end properties (SetAccess = protected) %FileType - % The type of file supported by the TallDatastore. FileType % must be 'mat' or 'seq'. By default, FileType is determined % by the type of file in the location provided. FileType; end properties (Access = protected) % deployement needs a way to get files before resolving them UnresolvedFiles %ErrorCatalogName - Error catalog name for error handling ErrorCatalogName; %MFilename - mfilename of the subclasses for error handling MFilename; %ValuesOnly- Only values are supported from the MAT-files or Sequence files ValuesOnly; end properties (Access = private) % Data buffered to support variable ReadSize BufferedData; % Size of the data buffered BufferedSize; % Info of the data buffered BufferedInfo; % Substruct used to subsref into BufferedData BufferedSubstruct; end properties (Constant, Access = protected) M_FILENAME = mfilename; ERROR_CATALOG_NAME = 'talldatastore'; end % Constructor methods % TallDatastore can be constructed with files argument, % optionally providing FileType and ReadSize Name-Value pairs. function tds = TallDatastore(files, varargin) try import matlab.io.datastore.TallDatastore; nameValues = TallDatastore.parseNameValues(varargin{:}); initializeForConstruction(tds, files, nameValues); catch e throwAsCaller(e); end end end % Set and Get methods for properties methods % Set method for Files function set.Files(tds, files) try % initialize the datastore with files, current filetype and current readsize [files, info] = preambleSetFiles(tds, files); initialize(tds, files, info); catch e throw(e) end end % Set method for ReadSize function set.ReadSize(tds, readSize) try validateReadSize(tds, readSize); catch e throw(e); end tds.ReadSize = readSize; end % Get Files function files = get.Files(tds) files = tds.Splitter.Files; end end methods (Access = private) % Initialize datastore values before passing to initDatastore of the superclass % MatSeqDatastore function initializeForConstruction(ds, files, info) import matlab.io.datastore.TallDatastore; import matlab.io.datastore.MatSeqDatastore; ds.UnresolvedFiles = files; ds.ErrorCatalogName = TallDatastore.ERROR_CATALOG_NAME; ds.MFilename = TallDatastore.M_FILENAME; ds.ValuesOnly = true; info.ValuesOnly = true; info.FromConstruction = true; % IncludeSubfolders and FileExtensions are not supported currently % Assign default values, so to use the underlying superclass code. info.UsingDefaults = horzcat(info.UsingDefaults, {MatSeqDatastore.FILE_EXTENSIONS_NV_NAME,... MatSeqDatastore.INCLUDE_SUBFOLDERS_NV_NAME}); info.FileExtensions = -1; info.IncludeSubfolders = false; initialize(ds, files, info); end % Initialize using superclass common code and set private % properties after reset function initialize(tds, files, info) import matlab.io.datastore.MatSeqDatastore; % initialize using superclass common code initDatastore(tds, files, info); % This needs to be called after splitter (and splitreader, if any) % are initialized. setKeyValueLimit(tds); % Get the correct ReadSize from the SplitReader, if any [readSize, numDims] = getBestReadSize(tds); % If ReadSize is to be default, use the correct one from the SplitReader if ismember(MatSeqDatastore.READSIZE_PROP_NAME, info.UsingDefaults) tds.ReadSize = readSize; end setBufferedDataInfo(tds, numDims); end % Set the KeyValueLimit for Splitter and SplitReader during initialization function setKeyValueLimit(tds) import matlab.io.datastore.MatSeqDatastore; tds.Splitter.KeyValueLimit = MatSeqDatastore.DEFAULT_READ_SIZE; % if we have a non empty splitter, then a reader is guaranteed. if tds.Splitter.NumSplits ~= 0 tds.SplitReader.KeyValueLimit = MatSeqDatastore.DEFAULT_READ_SIZE; end end % Set the private buffered substruct and buffered size values % This is called after setting BufferedData % % See also read, getDataUsingSubstructInfo function setBufferedDataInfo(tds, numDims) % colon : for all non-ReadSize dimensions col = repmat({':'}, numDims - 1); tds.BufferedSubstruct = substruct('()', [{[]}, col]); tds.BufferedSize = 0; tds.BufferedData = []; end % Get data using the private buffered substruct and buffered size values % This is called after setting BufferedSubstruct % % See also read, setBufferedDataInfo function data = getDataUsingSubstructInfo(tds, readSize) % Set the subs field value to the readsize amount tds.BufferedSubstruct.subs{1} = 1:readSize; data = subsref(tds.BufferedData, tds.BufferedSubstruct); tds.BufferedSubstruct.subs{1} = readSize+1:tds.BufferedSize; % Reset the buffered data to the remaining data tds.BufferedData = subsref(tds.BufferedData, tds.BufferedSubstruct); tds.BufferedSize = tds.BufferedSize - readSize; end % This gets the best readsize based on the values % in the underlying file container - MAT-Files or Sequence Files function [readSize, numDims] = getBestReadSize(tds) readSize = 1; numDims = 2; if isempty(tds.SplitReader) % empty matrix [0x1] for an uninitialized SplitReader % Not for empty datastores created from partition of non-empty datastore return; end data = getBufferedValue(tds.SplitReader); % if the first value is cell, rest of them are % cell as well. So ReadSize = 1, is safe for default. if isempty(data) || iscell(data) return; end % if the value is an N-D array, get the first dimension % which is the ReadSize dimension. readSize = size(data, 1); numDims = ndims(data); end % Used by preview % This subsrefs the value from the underlying container - MAT-Files or Sequence Files, % using the stored substruct, with zero first dimension. function data = getZeroFirstDimData(tds) if isempty(tds.SplitReader) % empty matrix for an uninitialized SplitReader % This will not happen for empty datastores created from partition % of non-empty datastore data = zeros(0,1); return; end data = getBufferedValue(tds.SplitReader); tds.BufferedSubstruct.subs{1} = []; data = subsref(data, tds.BufferedSubstruct); end end methods (Static, Access = private) % Parse the Name-Value pairs for TallDatastore function parsedStruct = parseNameValues(varargin) import matlab.io.datastore.MatSeqDatastore; import matlab.io.datastore.TallDatastore; persistent inpP; if isempty(inpP) inpP = inputParser; addParameter(inpP, MatSeqDatastore.FILETYPE_PROP_NAME, MatSeqDatastore.DEFAULT_FILE_TYPE); addParameter(inpP, MatSeqDatastore.READSIZE_PROP_NAME, MatSeqDatastore.DEFAULT_READ_SIZE); inpP.FunctionName = TallDatastore.M_FILENAME; end parse(inpP, varargin{:}); parsedStruct = inpP.Results; parsedStruct.UsingDefaults = inpP.UsingDefaults; end end methods (Static, Hidden) % This function is responsible for determining whether a given % location is supported by a TallDatastore. function tf = supportsLocation(files, nvStruct) tf = false; if isempty(files) %From datastore gateway, one will not be able to construct an empty % datastore, except with a type Name-Value pair. return; end nvStruct.ValuesOnly = true; tf = matlab.io.datastore.MatSeqDatastore.supportsLocation(files, nvStruct); end end methods (Access = protected) % matlab.mixin.CustomDisplay method. % Used for custom display the properties of the object. function displayScalarObject(tds) % header disp(getHeader(tds)); group = getPropertyGroups(tds); detailsStr = evalc('details(tds)'); nsplits = strsplit(detailsStr, '\n'); filesStr = nsplits(~cellfun(@isempty, strfind(nsplits, 'Files: '))); % Find the indent spaces from details nFilesIndent = strfind(filesStr{1}, 'Files: ') - 1; if nFilesIndent > 0 % File Properties filesIndent = [sprintf(repmat(' ',1,nFilesIndent)) 'Files: ']; nlspacing = sprintf(repmat(' ',1,numel(filesIndent))); if isempty(tds.Files) nlspacing = ''; end import matlab.io.datastore.internal.cellArrayDisp; filesStrDisp = cellArrayDisp(tds.Files, true, nlspacing); disp([filesIndent filesStrDisp]); % Remove Files property from the group, since custom % display is used for Files. group.PropertyList = rmfield(group.PropertyList, 'Files'); end matlab.mixin.CustomDisplay.displayPropertyGroups(tds, group); disp(getFooter(tds)); end end end