www.gusucode.com > datastoreio工具箱 matlab源码程序 > datastoreio/+matlab/+io/+datastore/+splitter/SequenceFileSplitter.m

    classdef (Sealed, Hidden) SequenceFileSplitter < matlab.io.datastore.splitter.FileSizeBasedSplitter
%SEQUENCEFILESPLITTER Splitter to handle Sequence files.
% Helper class that wraps around FileSizeBasedSplitter to adapt this to the
% interface expected by KeyValueDatastore.

%   Copyright 2015-2016 The MathWorks, Inc.

    properties (Constant, Access = private)
        % KeyValueLimit to be set for readall.
        READALL_KEY_VALUE_LIMIT = 1000;
        % Split size analogous to hdfs block size
        DEFAULT_SEQ_SPLIT_SIZE = 64*1024*1024; % 64 MB
    end

    properties (Access = public)
        % KeyValueLimit for this Splitter
        KeyValueLimit;
    end

    methods (Access = public, Hidden)
        % Required function for KeyValueDatastore/readall. Reads all data
        % from all files.
        function output = readAllSplits(obj)
            import matlab.io.datastore.splitreader.SequenceFileSplitReader;
            import matlab.io.datastore.splitter.SequenceFileSplitter;
            splitReader = SequenceFileSplitReader();
            splitReader.KeyValueLimit = SequenceFileSplitter.READALL_KEY_VALUE_LIMIT;
            splits = obj.Splits;
            valuesOnly = false;
            if ~isempty(splits)
                valuesOnly = splits(1).ValuesOnly;
            end

            output = cell(numel(splits), 1);
            if ~valuesOnly
                % Key classes can be different. Values are always in a cell.
                % Used for throwing useful error messages.
                splitKeyClasses = cell(numel(splits), 1);
            end
            for ii = 1:numel(splits)
                splitReader.Split = splits(ii);
                splitReader.reset;

                splitOutput = {};
                while hasNext(splitReader)
                    splitOutput{end + 1} = getNext(splitReader); %#ok<AGROW>
                end
                output{ii} = vertcat(splitOutput{:});
                if ~valuesOnly && ~isempty(output{ii})
                    splitKeyClasses{ii} = class(output{ii}.Key);
                end
            end
            try
                output = vertcat(output{:});
            catch e
                if ~valuesOnly && strcmp(e.identifier, 'MATLAB:table:vertcat:VertcatCellAndNonCell')
                    SequenceFileSplitter.invalidKeyError(splitKeyClasses, splits);
                end
                throw(e);
            end
            % if empty, we do not want empty double array as output.
            if isempty(output)
                output = table;
            end

            if valuesOnly
                % output.Value is used in the readall of TallDatastore for both
                % MAT-files and Sequence files.
                % valuesOnly is true only for TallDatastore.
                data.Value = output;
                output = data;
            end
        end

        % set all splits to have the SchemaAvailable field to the
        % given boolean.
        function setSchemaAvailable(splitter, tf)
            [splitter.Splits.SchemaAvailable] = deal(tf);
        end

        % set all splits to have the ValuesOnly field to the
        % given boolean.
        function setSplitsWithValuesOnly(splitter, tf)
            [splitter.Splits.ValuesOnly] = deal(tf);
        end

    end

    methods (Static, Access = private)
        function invalidKeyError(keyClasses, splits)
            if isempty(keyClasses) || numel(keyClasses) < 2
                return;
            end
            c1 = keyClasses{1};
            c2 = '';
            j = [];
            % We just need the first 2 differing key classes
            % Below for loop breaks when we find the first different class.
            % Better than [i, j, k] = unique(keyClasses).
            for ii = 2:numel(keyClasses)
                c2 = keyClasses{ii};
                if ~strcmp(c2, c1)
                    j = ii;
                    break;
                end
            end
            if ~isempty(j)
                msgid = 'MATLAB:datastoreio:keyvaluedatastore:invalidKeyConversion';
                msg = message(msgid, c1, c2, splits(1).Filename, splits(j).Filename);
                throw(MException(msg));
            end
        end
    end

    methods (Static = true)
        % Create Splitter from appropriate arguments
        function splitter = create(fileInfo)
            import matlab.io.datastore.splitter.SequenceFileSplitter;
            import matlab.io.datastore.splitter.FileSizeBasedSplitter;
            [splits, splitSize] = FileSizeBasedSplitter.createArgs(fileInfo.Files, ...
                                    SequenceFileSplitter.DEFAULT_SEQ_SPLIT_SIZE, fileInfo.FileSizes);
            splitter = SequenceFileSplitter(splits, splitSize);
        end

        % Create Splitter from existing Splits
        function splitter = createFromSplits(splits)
            import matlab.io.datastore.splitter.SequenceFileSplitter;
            import matlab.io.datastore.splitter.FileSizeBasedSplitter;
            [splits, ~] = FileSizeBasedSplitter.createFromSplitsArgs(splits);
            splitter = SequenceFileSplitter(splits, ...
                                SequenceFileSplitter.DEFAULT_SEQ_SPLIT_SIZE);
        end
    end

    methods (Static, Hidden)

        function tfArr = filterSeqFiles(files, valuesOnly)
            import matlab.io.datastore.internal.SequenceFileReader;
            numFiles = numel(files);
            tfArr = true(numFiles, 1);
            for ii = 1:numFiles
                tfArr(ii) = SequenceFileReader.isSeqSupported(files{ii}, valuesOnly);
            end
        end

        function [tf, idx] = areSeqFilesSupported(files, valuesOnly)
            import matlab.io.datastore.internal.SequenceFileReader;
            numFiles = numel(files);
            tf = true;
            idx = -1;
            for ii = 1:numFiles
                if ~SequenceFileReader.isSeqSupported(files{ii}, valuesOnly)
                    idx = ii;
                    tf = false;
                    break;
                end
            end
        end
    end

    methods (Access = private)
        % Private constructor for static build methods
        function splitter = SequenceFileSplitter(splits, splitSize)
            splitter@matlab.io.datastore.splitter.FileSizeBasedSplitter(splits, splitSize);
        end
    end

    methods (Access = 'public')
        % Create reader for the ii-th split
        function rdr = createReader(splitter, ii)
            rdr = matlab.io.datastore.splitreader.SequenceFileSplitReader;
            rdr.Split = splitter.Splits(ii);
            rdr.KeyValueLimit = splitter.KeyValueLimit;
        end

        % Create Splitter from existing Splits
        %
        % Splits passed as input must be of identical in structure to the
        % splits used by this Spltiter class.
        function splitterCopy = createCopyWithSplits(splitter, splits)
            splitterCopy = copy(splitter);
            splitterCopy.Splits = splits;
        end
    end
end