www.gusucode.com > stats 源码程序 matlab案例代码 > stats/CategorizeNumericDataExample.m

    %% Categorize Numeric Data  
% This example shows how to categorize numeric data into a categorical ordinal
% array using |ordinal|. This is useful for discretizing continuous data.   

% Copyright 2015 The MathWorks, Inc.


%% Load sample data. 
% The dataset array, |hospital|, contains variables measured on a sample
% of patients. Compute the minimum, median, and maximum of the variable |Age|. 
load hospital
quantile(hospital.Age,[0,.5,1]) 

%%
% The patient ages range from 25 to 50.  

%% Convert a numeric array to an ordinal array. 
% Group patients into the age categories |Under 30|, |30-39|, |Over 40|. 
hospital.AgeCat = ordinal(hospital.Age,{'Under 30','30-39','Over 40'},...
                       [],[25,30,40,50]);
getlevels(hospital.AgeCat) 

%%
% The last input argument to |ordinal| has the endpoints for the categories.
% The first category begins at age 25, the second at age 30, and so on.
% The last category contains ages 40 and above, so begins at 40 and ends
% at 50 (the maximum age in the data set). To specify three categories,
% you must specify four endpoints (the last endpoint is the upper bound
% of the last category).  

%% Explore categories. 
% Display the age and age category for the second patient. 
dataset({hospital.Age(2),'Age'},...
        {hospital.AgeCat(2),'AgeCategory'}) 

%%
% When you discretize a numeric array into categories, the categorical array
% loses all information about the actual numeric values. In this example,
% |AgeCat| is not numeric, and you cannot recover the raw data values from it.  

%% Categorize a numeric array into quartiles. 
% The variable |Weight| has weight measurements for the sample patients.
% Categorize the patient weights into four categories, by quartile. 
p = 0:.25:1;
breaks = quantile(hospital.Weight,p);
hospital.WeightQ = ordinal(hospital.Weight,{'Q1','Q2','Q3','Q4'},...
                   [],breaks);
getlevels(hospital.WeightQ)  

%% Explore categories. 
% Display the weight and weight quartile for the second patient. 
dataset({hospital.Weight(2),'Weight'},...
        {hospital.WeightQ(2),'WeightQuartile'})  

%% Summary statistics grouped by category levels. 
% Compute the mean systolic and diastolic blood pressure for each age and
% weight category. 
grpstats(hospital,{'AgeCat','WeightQ'},'mean','DataVars','BloodPressure') 

%%
% The variable |BloodPressure| is a matrix with two columns. The first column
% is systolic blood pressure, and the second column is diastolic blood pressure.
% The group in the sample with the highest mean diastolic blood pressure,
% |87.273|, is aged 30–39 and in the highest weight quartile, |30-39_Q4|.