www.gusucode.com > datastoreio工具箱 matlab源码程序 > datastoreio/+matlab/+io/+datastore/@KeyValueDatastore/KeyValueDatastore.m
classdef (Sealed) KeyValueDatastore < ... matlab.io.datastore.MatSeqDatastore & ... matlab.io.datastore.TabularDatastore & ... matlab.mixin.CustomDisplay %KEYVALUEDATASTORE Datastore for key-value pairs for use with mapreduce. % KVDS = datastore(LOCATION) % KVDS = datastore(LOCATION,'Type','keyvalue') creates a % KEYVALUEDATASTORE if a key-value formatted file or a collection of % such files is present in LOCATION. LOCATION has the following % properties: % - Can be a filename or a folder name % - Can be a cell array of multiple file or folder names % - Can contain a relative path (HDFS requires full paths) % - Can contain a wildcard (*) character. % - All the files in LOCATION must be MAT-Files (or Sequence files) % containing key-value data, typically produced by mapreduce. % % KVDS = datastore(__,'IncludeSubfolders',TF) specifies the logical % true or false to indicate whether the files in each folder and its % subfolders are included recursively or not. % % KVDS = datastore(__,'FileExtensions',EXTENSIONS) specifies the % extensions of files to be included. Values for EXTENSIONS can be: % - A character vector, such as '.mat' or '.seq' (empty quotes '' are % allowed for files without extensions) % - A cell array of character vectors, such as {'.mat', ''} % % KVDS = datastore(_,'ReadSize',readSize) specifies the maximum number % of key-value pairs returned by read or preview. By default, ReadSize is % 1. If the values are small, increase ReadSize. % % KVDS = datastore(_,'FileType',fileType) specifies the type of files in % LOCATION. The default FileType is 'mat', for key-value pairs stored in % MAT-files, typically produced by mapreduce. FileType can also be 'seq', % for key-value pairs stored in one or more sequence files, typically % produced by running mapreduce with Hadoop. % % % KeyValueDatastore Methods: % % preview - Read a small amount of data from the start of the % datastore. % read - Read some data from the datastore. % readall - Read all of the data from the datastore. % hasdata - Returns true if there is more data in the datastore. % reset - Reset the datastore to the start of the data. % partition - Return a new datastore that represents a single % partitioned part of the original datastore. % numpartitions - Return an estimate for a reasonable number of % partitions to use with the partition function for % the given information. % % KeyValueDatastore Properties: % % Files - Cell array of filenames. % FileType - The type of file supported by this datastore. % ReadSize - Upper limit for the number of key-value pairs to read. % % Example: % -------- % % 'mapredout.mat' is the output file of a mapreduce function. % kvds = datastore('mapredout.mat') % % Read the first key-value pair % kv1 = read(kvds) % % Set the ReadSize = 6, to read the next 6 key-value pairs. % kvds.ReadSize = 6; % % Read the next 6 key-value pairs % kv6 = read(kvds) % % Read all of the key-value pairs % kvall = readall(kvds) % % See also matlab.io.datastore.TabularTextDatastore, mapreduce, datastore. % Copyright 2014-2016 The MathWorks, Inc. properties (Dependent) %Files - % MAT files or SEQUENCE files in the KeyValueDatastore. Files; %ReadSize - % Maximum number of key-value pairs to read. Default is 1. ReadSize; end properties (SetAccess = protected) %FileType - % The type of file supported by the KeyValueDatastore. FileType % must be 'mat' or 'seq'. By default, FileType is 'mat'. FileType; end properties (Access = protected) % Deployment needs a way to get files before resolving them UnresolvedFiles %ErrorCatalogName - Error catalog name for error handling ErrorCatalogName; %MFilename - mfilename of the subclasses for error handling MFilename; %ValuesOnly- Only values are supported from the MAT-files or Sequence files ValuesOnly; end properties (Access = private) % To help support future forward compatibility. The value % indicates the version of MATLAB. SchemaVersion; end properties (Constant, Access = private) M_FILENAME = mfilename; ERROR_CATALOG_NAME = 'keyvaluedatastore'; TABLE_OUTPUT_VARIABLE_NAMES = {'Key', 'Value'}; end % Constructor methods % KeyValueDataStore can be constructed with files argument, % optionally providing FileType and ReadSize Name-Value pairs. function kvds = KeyValueDatastore(files, varargin) try import matlab.io.datastore.KeyValueDatastore; nameValues = KeyValueDatastore.parseNameValues(varargin{:}); % SchemaVersion indicates the release number of MATLAB. This will be empty in % 14b or the appropriate release, if we set it in the constructor. kvds.SchemaVersion = version('-release'); initializeForConstruction(kvds, files, nameValues); catch e throwAsCaller(e); end end end % Set and Get methods for properties methods % Set method for Files function set.Files(kvds, files) try % initialize the datastore with files, current filetype and current readsize [files, info] = preambleSetFiles(kvds, files); initDatastore(kvds, files, info); catch e throw(e) end end % Set method for ReadSize function set.ReadSize(kvds, readSize) try validateReadSize(kvds, readSize); catch e throw(e); end kvds.Splitter.KeyValueLimit = readSize; % if we have a non empty splitter, then a reader is guaranteed. if kvds.Splitter.NumSplits ~= 0 kvds.SplitReader.KeyValueLimit = readSize; end end % Get Files function files = get.Files(kvds) files = kvds.Splitter.Files; end % Get ReadSize function readSize = get.ReadSize(kvds) readSize = kvds.Splitter.KeyValueLimit; end end methods (Access = private) % Initialize datastore values before passing to initDatastore of the superclass % MatSeqDatastore function initializeForConstruction(ds, files, info) import matlab.io.datastore.KeyValueDatastore; import matlab.io.datastore.MatSeqDatastore; ds.UnresolvedFiles = files; ds.ErrorCatalogName = KeyValueDatastore.ERROR_CATALOG_NAME; ds.MFilename = KeyValueDatastore.M_FILENAME; ds.ValuesOnly = false; info.ValuesOnly = false; info.FromConstruction = true; initDatastore(ds, files, info); end end methods (Static, Hidden) % This function is responsible for determining whether a given % location is supported by a KeyValueDatastore. function tf = supportsLocation(files, nvStruct) tf = false; if isempty(files) && iscell(files) % From datastore gateway, one will be able to construct an empty % KeyValueDatastore. tf = true; return; end nvStruct.ValuesOnly = false; tf = matlab.io.datastore.MatSeqDatastore.supportsLocation(files, nvStruct); end function outds = loadobj(ds) if ~isempty(ds.Splitter.Splits) if ~isfield(ds.Splitter.Splits, 'SchemaAvailable') % This must be a 14b datastore being loaded in 15a or 15b % Add SchemaAvailable false field to all the Splits, if not available setSchemaAvailable(ds.Splitter, false); end if ~isfield(ds.Splitter.Splits, 'ValuesOnly') % This must be a 14b datastore being loaded in 15a or 15b % Add ValuesOnly false field to all the Splits, if not available setSplitsWithValuesOnly(ds.Splitter, false); end end if isprop(ds, 'SchemaVersion') && isequal(ds.SchemaVersion, []) % This must be a 14b datastore being loaded in 15a as % SchemaVersion was introduced only in 15a. matlab.io.datastore.KeyValueDatastore.load14bin15a(ds); elseif isa(ds, 'struct') % This must be a 14b datastore loaded in 15b ds = matlab.io.datastore.KeyValueDatastore.load14bIn15b(ds); end % At this point we have a KeyValueDatastore object which % calls the super class loadobj for safe loading. outds = loadobj@matlab.io.datastore.FileBasedDatastore(ds); end end methods (Access = protected) function displayScalarObject(kvds) % header disp(getHeader(kvds)); group = getPropertyGroups(kvds); detailsStr = evalc('details(kvds)'); nsplits = strsplit(detailsStr, '\n'); filesStr = nsplits(~cellfun(@isempty, strfind(nsplits, 'Files: '))); % Find the indent spaces from details nFilesIndent = strfind(filesStr{1}, 'Files: ') - 1; if nFilesIndent > 0 % File Properties filesIndent = [sprintf(repmat(' ',1,nFilesIndent)) 'Files: ']; nlspacing = sprintf(repmat(' ',1,numel(filesIndent))); if isempty(kvds.Files) nlspacing = ''; end import matlab.io.datastore.internal.cellArrayDisp; filesStrDisp = cellArrayDisp(kvds.Files, true, nlspacing); disp([filesIndent filesStrDisp]); % Remove Files property from the group, since custom % display is used for Files. group.PropertyList = rmfield(group.PropertyList, 'Files'); end readSizeStr = nsplits(~cellfun(@isempty, strfind(nsplits, 'ReadSize: '))); nReadSizeIndent = strfind(readSizeStr{1}, 'ReadSize: ') - 1; readSizeIndent = [sprintf(repmat(' ',1,nReadSizeIndent)) 'ReadSize: ']; disp([readSizeIndent getString(message('MATLAB:datastoreio:keyvaluedatastore:keyValueString', num2str(kvds.ReadSize)))]); group.PropertyList = rmfield(group.PropertyList, 'ReadSize'); matlab.mixin.CustomDisplay.displayPropertyGroups(kvds, group); disp(getFooter(kvds)); end % readData method protected declaration. [data, info] = readData(obj); % readAllData method protected declaration. data = readAllData(obj); end methods (Static, Access = private) function parsedStruct = parseNameValues(varargin) import matlab.io.datastore.MatSeqDatastore; import matlab.io.datastore.KeyValueDatastore; persistent inpP; if isempty(inpP) inpP = inputParser; addParameter(inpP, MatSeqDatastore.FILETYPE_PROP_NAME, KeyValueDatastore.DEFAULT_FILE_TYPE); addParameter(inpP, MatSeqDatastore.READSIZE_PROP_NAME, KeyValueDatastore.DEFAULT_READ_SIZE); addParameter(inpP, MatSeqDatastore.INCLUDE_SUBFOLDERS_NV_NAME, KeyValueDatastore.DEFAULT_INCLUDE_SUBFOLDERS); addParameter(inpP, MatSeqDatastore.FILE_EXTENSIONS_NV_NAME, KeyValueDatastore.DEFAULT_FILE_EXTENSIONS); inpP.FunctionName = KeyValueDatastore.M_FILENAME; end parse(inpP, varargin{:}); parsedStruct = inpP.Results; parsedStruct.UsingDefaults = inpP.UsingDefaults; end function ds = load14bIn15b(dsStruct) import matlab.io.datastore.KeyValueDatastore; %empty datastore ds = KeyValueDatastore({}); import matlab.io.datastore.splitter.*; switch dsStruct.SplitReader.FileType case 'mat' ds.FileType = 'mat'; ds.Splitter = MatKVFileSplitter.createFromSplits(dsStruct.Splitter.Splits); ds.Splitter.KeyValueLimit = dsStruct.SplitReader.KeyValueLimit; case 'seq' ds.FileType = 'seq'; ds.Splitter = SequenceFileSplitter.createFromSplits(dsStruct.Splitter.Splits); ds.Splitter.KeyValueLimit = dsStruct.SplitReader.KeyValueLimit; end if ds.Splitter.NumSplits ~= 0 % create a stub reader so copy() works fine as it expects % a non empty datastore to have a reader. ds.SplitReader = ds.Splitter.createReader(1); end end function load14bin15a(ds) import matlab.io.datastore.KeyValueDatastore; [~, support] = KeyValueDatastore.supportsLocation(ds.Splitter.Files); switch support % 14b version automatically sets the FileType based on % Files property. case 'MATSupport' ds.FileType = 'mat'; case 'SEQSupport' ds.FileType = 'seq'; end end end methods (Hidden) % method used by deployment to get KeyValueLimit. function kvlimit = getKeyValueLimit(kvds) kvlimit = kvds.Splitter.KeyValueLimit; end end end