www.gusucode.com > datastoreio工具箱 matlab源码程序 > datastoreio/+matlab/+io/+datastore/+internal/TextFileSplitReader.m

    classdef TextFileSplitReader < matlab.io.datastore.internal.SplitReader
%TextFileSplitReader   SplitReader for reading text file splits.
%   Allows the user to ensure the reader returns data ending at logical 
%   record boundaries as defined by the end of record (EOR) character 
%   sequence. Once can customize the size of the chunk that is returned by
%   this reader and the EOR sequence. This reader is assigned a split and
%   iterates through the split using the hasSplitData readSplitData
%   paradigm. It keeps doing this until it runs out of data.

%   Copyright 2014-2016 The MathWorks, Inc.

    properties (Access = 'public')
        Split;
        Eor = [];           % the end of record for this reader
        FileEncoding = 'UTF-8';
    end

    properties (Access = 'private', Transient = true)
        SizeRead = 0;       % size currently read from the split
        Stream;             % the stream to use for reading
    end

    properties (Constant = true, Access = 'private')
        DFLT_EOR = '\r\n';  % default end of record
        DEFAULT_FILE_ENCODING = 'UTF-8';
    end
    
    methods
        function set.Split(rdr, split)
            rdr.Split = split;
        end
        
        function set.Eor(rdr, eor)
            import matlab.io.internal.validators.isString;
            import matlab.io.datastore.internal.TextFileSplitReader;
            if ~isString(eor) || (~strcmp(eor, TextFileSplitReader.DFLT_EOR) && numel(sprintf(eor)) ~= 1)
                error(message('MATLAB:datastoreio:textfilesplitreader:invalidStr', ...
                    'RowDelimiter'));
            end
            rdr.Eor = eor;
        end
        
        function set.FileEncoding(rdr, fileEncoding)
            encStats = ... % throws errors for invalid encoding
                  matlab.io.datastore.internal.encodingStats(fileEncoding);
            rdr.FileEncoding = encStats.CanonicalName;
        end
    end
    
    methods (Access = 'public')
        
        function rdr = TextFileSplitReader(eor, fileEncoding)
            % Create a TextFileSplitReader. % default end of record is \r\n
            import matlab.io.datastore.internal.TextFileSplitReader;
            
            if nargin < 2
                fileEncoding = TextFileSplitReader.DEFAULT_FILE_ENCODING;
            end
            
            if nargin < 1
                eor = TextFileSplitReader.DFLT_EOR;
            end
            
            % reader properties must be initialized before use
            rdr.FileEncoding = fileEncoding;
            rdr.Eor = eor;
            rdr.Split = [];
            rdr.Stream = [];
        end
        
        function tf = hasSplitData(rdr)
            % Return logical scalar indicating availability of data
            tf = ~isempty(rdr.Split) && rdr.SizeRead < rdr.Split.Size;
        end
        
        function [data, info] = readSplitData(rdr)
            % Return "data" and "info" read while iterating over the split
            
            % local vars
            split = rdr.Split;
            eorStr = sprintf(rdr.Eor);
            if rdr.SizeRead < split.Size
                remainingSize = split.Size-rdr.SizeRead;
                [splitBody, numBytes] = readTextBytes(rdr.Stream, remainingSize);
                [splitEnd, numEndBytes] = readUpto(rdr.Stream, eorStr);
                data = [splitBody, splitEnd];
            else
                data = '';
                numBytes = 0;
                numEndBytes = 0;
            end

            % populate the info struct
            info = struct('Filename', split.Filename, ...
                          'FileSize', split.FileSize, ...
                          'Offset', split.Offset + rdr.SizeRead, ...
                          'NumCharactersRead', numel(data));

            % update the size read, add numel in splitEnd as we read
            % that many chars, and therefore at least that many bytes
            rdr.SizeRead = rdr.SizeRead + numBytes + numEndBytes;
        end

        function reset(rdr)
            % store the previous SizeRead
            prevSizeRead = rdr.SizeRead;
            
            % Reset the reader to the beginning of the split
            rdr.SizeRead = 0;
            
            try
                % any errors in initializing the stream must restore
                % SizeRead as that determines if a split has data.
                initStream(rdr);
                
                if ~isempty(rdr.Split) && (rdr.Split.Offset ~= 0)
                    % if the split does not start at 0, we must skip up to
                    % its start point, as channels by default are at
                    % position 0
                    rdr.SizeRead = skipBytes(rdr.Stream, rdr.Split.Offset) - ...
                                                          rdr.Split.Offset;
                    % we then skip the first record we see
                    eorStr = sprintf(rdr.Eor);
                    [~,numBytes] = readUpto(rdr.Stream, eorStr);
                    rdr.SizeRead = rdr.SizeRead + numBytes;
                end
            catch ME
                % restore the sizeRead as we do not want to change the
                % sizeRead on a failed reset
                rdr.SizeRead = prevSizeRead;
                
                % throw the pathlookup ID as stream ID has IRI information
                if strcmp(ME.identifier, ...
                    'MATLAB:datastoreio:stream:fileNotFound')
                    error(message('MATLAB:datastoreio:pathlookup:fileNotFound', ...
                                                      rdr.Split.Filename));
                end
                
                throw(ME);
            end
        end
        
        function frac = progress(rdr)
        % Percentage of read completion between 0.0 and 1.0 for the split.
            frac = min(rdr.SizeRead/rdr.Split.Size, 1.0);
        end
        
        function delete(rdr)
        % Delete the reader
            rdr.Stream = [];
        end

    end
    
    methods (Access = 'protected')
        function rdrCopy = copyElement(rdr)
           % make a shallow copy of all properties
           rdrCopy = copyElement@matlab.mixin.Copyable(rdr);
           % unshare the shallow copy to the handle of the stream
           rdrCopy.Stream = [];
        end 
    end
    
    methods (Access = 'private')

        function initStream(rdr)
            import matlab.io.datastore.internal.filesys.createStream;
            
            split = rdr.Split;
            if isempty(split)
                return;
            end

            stream = createStream(split.Filename,'rt', rdr.FileEncoding);

            % get rid of the old ref (self destructing Stream)
            % and take the new ref
            rdr.Stream = stream;
        end
        
    end
end