www.gusucode.com > matlab 案例源码 matlab代码程序 > matlab/SplitDataIntoGroupsAndCalculateStatisticsExample.m

    %% Split Data into Groups and Calculate Statistics
% This example shows how to split data from the |patients.mat| data file into 
% groups. Then it shows how to calculate mean weights and body mass
% indices, and variances in blood pressure readings, for the groups of patients. 
% It also shows how to summarize the results in a table. 

% Copyright 2015 The MathWorks, Inc.


%% Load Patient Data
% Load sample data gathered from 100 patients.
load patients

%%
% Convert |Gender| and |SelfAssessedHealthStatus| to categorical arrays.
Gender = categorical(Gender);
SelfAssessedHealthStatus = categorical(SelfAssessedHealthStatus);
whos

%% Calculate Mean Weights
% Split the patients into nonsmokers and smokers using the |Smoker| variable.
% Calculate the mean weight for each group.
[G,smoker] = findgroups(Smoker);
meanWeight = splitapply(@mean,Weight,G)

%% 
% The |findgroups| function returns |G|, a vector of group numbers created 
% from |Smoker|. The |splitapply| function uses |G| to split |Weight| into 
% two groups. |splitapply| applies the |mean| function to each group and 
% concatenates the mean weights into a vector.
%
% |findgroups| returns a vector of group identifiers as the second output argument. 
% The group identifiers are logical values because |Smoker| contains logical 
% values. The patients in the first group are nonsmokers, and the patients in 
% the second group are smokers.
smoker

%%
% Split the patient weights by both gender and status as a smoker and calculate 
% the mean weights.
G = findgroups(Gender,Smoker);
meanWeight = splitapply(@mean,Weight,G)

%% 
% The unique combinations across |Gender| and |Smoker| identify four groups of 
% patients: female nonsmokers, female smokers, male nonsmokers, and male smokers. 
% Summarize the four groups and their mean weights in a table.
[G,gender,smoker] = findgroups(Gender,Smoker);
T = table(gender,smoker,meanWeight)

%%
% |T.gender| contains categorical values, and |T.smoker| contains logical values. 
% The data types of these table variables match the data types of |Gender| and 
% |Smoker| respectively.
%
% Calculate body mass index (BMI) for the four groups of patients. Define a 
% function that takes |Height| and |Weight| as its two input arguments, and
% that calculates BMI.
meanBMIfcn = @(h,w)mean((w ./ (h.^2)) * 703);
BMI = splitapply(meanBMIfcn,Height,Weight,G)

%% Group Patients Based on Self-Reports
% Calculate the fraction of patients who report their health as either |Poor| 
% or |Fair|. First, use |splitapply| to count the number of patients
% in each group: female nonsmokers, female smokers, male nonsmokers,
% and male smokers. Then, count only those patients who report their
% health as either |Poor| or |Fair|, using logical indexing on |S| and |G|.
% From these two sets of counts, calculate the fraction for each group.
[G,gender,smoker] = findgroups(Gender,Smoker);
S = SelfAssessedHealthStatus;
I = ismember(S,{'Poor','Fair'});
numPatients = splitapply(@numel,S,G);
numPF = splitapply(@numel,S(I),G(I));
numPF./numPatients

%%
% Compare the standard deviation in |Diastolic| readings of those patients 
% who report |Poor| or |Fair| health, and those patients who report |Good| 
% or |Excellent| health.
stdDiastolicPF = splitapply(@std,Diastolic(I),G(I));
stdDiastolicGE = splitapply(@std,Diastolic(~I),G(~I));

%%
% Collect results in a table. For these patients, the female nonsmokers who report 
% |Poor| or |Fair| health show the widest variation in blood pressure readings.
T = table(gender,smoker,numPatients,numPF,stdDiastolicPF,stdDiastolicGE,BMI)