www.gusucode.com > econ 案例源码程序 matlab代码 > econ/VARModelCaseStudyExample.m

    %% VAR Model Case Study
% This example shows how to analyze a VAR model.
%% Overview of Case Study
% This section contains an example of the workflow described in
% <docid:econ_ug.bsxcdon-1>. The example uses three time series: GDP, M1
% money supply, and the 3-month T-bill rate. The example shows:
%
% * Loading and transforming the data for stationarity
% * Partitioning the transformed data into presample, estimation, and
% forecast intervals to support a backtesting experiment
% * Making several models
% * Fitting the models to the data
% * Deciding which of the models is best
% * Making forecasts based on the best model
%

% Copyright 2015 The MathWorks, Inc.


%% Load and Transform Data
% The file |Data_USEconModel| ships with Econometrics Toolbox(TM) software.
% The file contains time series from the Federal Reserve Bank of St. Louis
% Economics Data (FRED) database in a tabular array. This example uses
% three of the time series:
%
% * GDP (|GDP|)
% * M1 money supply (|M1SL|)
% * 3-month T-bill rate (|TB3MS|)
%
% Load the data into a time series matrix |Y|.
load Data_USEconModel
gdp = DataTable.GDP;
m1 = DataTable.M1SL;
tb3 = DataTable.TB3MS;
Y = [gdp,m1,tb3];
%%
% Plot the data to look for trends. 
figure
subplot(3,1,1)
plot(dates,Y(:,1),'r');
title('GDP')
datetick('x')
grid on
subplot(3,1,2);
plot(dates,Y(:,2),'b');
title('M1')
datetick('x')
grid on
subplot(3,1,3);
plot(dates, Y(:,3), 'k')
title('3-mo T-bill')
datetick('x')
grid on
hold off
%%
% The GDP and M1 data appear to grow, while the
% T-bill returns show no long-term growth. To counter the trends in GDP and
% M1, take a difference of the logarithms of the data. Taking a difference
% shortens the time series. Therefore, truncate the
% T-bill series and date series |X| so that the
% |Y| data matrix has the same number of rows for each
% column.
Y = [diff(log(Y(:,1:2))), Y(2:end,3)]; % Transformed data
X = dates(2:end);

figure
subplot(3,1,1)
plot(X,Y(:,1),'r');
title('GDP')
datetick('x')
grid on
subplot(3,1,2);
plot(X,Y(:,2),'b');
title('M1')
datetick('x')
grid on
subplot(3,1,3);
plot(X, Y(:,3),'k'),
title('3-mo T-bill')
datetick('x')
grid on
%%
% The scale of the first two columns is about 100 times
% smaller than the third. Multiply the first two columns by 100 so that the
% time series are all roughly on the same scale. This scaling makes it easy
% to plot all the series on the same plot. More importantly, this type of
% scaling makes optimizations more numerically stable (for example,
% maximizing loglikelihoods).
Y(:,1:2) = 100*Y(:,1:2);
figure
plot(X,Y(:,1),'r');
hold on
plot(X,Y(:,2),'b'); 
datetick('x') 
grid on
plot(X,Y(:,3),'k');
legend('GDP','M1','3-mo T-bill'); 
hold off
%% Select and Fit the Models
% You can select many different models for the data. This example
% uses four models.
%
% * VAR(2) with diagonal autoregressive and covariance matrices
% * VAR(2) with full autoregressive and covariance matrices
% * VAR(4) with diagonal autoregressive and covariance matrices
% * VAR(4) with full autoregressive and covariance matrices
%%
% Make all the series the same length, and transform them to be stationary
% and on a similar scale.
dGDP = 100*diff(log(gdp(49:end)));
dM1 = 100*diff(log(m1(49:end)));
dT3 = diff(tb3(49:end));
Y = [dGDP dM1 dT3];
%%
% Create the four models.
dt = logical(eye(3));
VAR2diag = vgxset('ARsolve',repmat({dt},2,1),...
    'asolve',true(3,1),'Series',{'GDP','M1','3-mo T-bill'});
VAR2full = vgxset(VAR2diag,'ARsolve',[]);
VAR4diag = vgxset(VAR2diag,'nAR',4,'ARsolve',repmat({dt},4,1));
VAR4full = vgxset(VAR2full,'nAR',4);
%%
% The matrix |dt| is a diagonal logical matrix. |dt| specifies
% that both the autoregressive matrices for |VAR2diag| and |VAR4diag| are
% diagonal. In contrast, the specifications for |VAR2full| and |VAR4full| have
% empty matrices instead of |dt|. Therefore, |vgxvarx| fits
% the defaults, which are full matrices for autoregressive and correlation
% matrices.
%%
% To assess the quality of the models, divide the response data
% |Y| into three periods: presample, estimation, and
% forecast. Fit the models to the estimation data, using the presample
% period to provide lagged data. Compare the predictions of the fitted
% models to the forecast data. The estimation period is in sample, and the
% forecast period is out of sample (also known as backtesting).
%%
% For the two VAR(4) models, the presample period is the first four rows of |Y|.
% Use the same presample period for the VAR(2) models so that all the
% models are fit to the same data. This is necessary for model fit comparisons.
% For both models, the forecast period is the final 10% of the rows
% of |Y|. The estimation period for the models goes
% from row 5 to the 90% row. Define these data periods.
YPre = Y(1:4,:);
T = ceil(.9*size(Y,1));
YEst = Y(5:T,:);
YF = Y((T+1):end,:);
TF = size(YF,1);
%%
% Now that the models and time series exist, you can easily fit the models
% to the data.
[EstSpec1,EstStdErrors1,logL1,W1] = ...
    vgxvarx(VAR2diag,YEst,[],YPre,'CovarType','Diagonal');
[EstSpec2,EstStdErrors2,logL2,W2] = ...
    vgxvarx(VAR2full,YEst,[],YPre);
[EstSpec3,EstStdErrors3,logL3,W3] = ...
    vgxvarx(VAR4diag,YEst,[],YPre,'CovarType','Diagonal');
[EstSpec4,EstStdErrors4,logL4,W4] = ...
    vgxvarx(VAR4full,YEst,[],YPre);
%%
% * The |EstSpec| structures are the fitted models.
% * The |EstStdErrors| structures contain the standard errors of the fitted models.
% * The |logL| values are the loglikelihoods of the fitted models, which you use to
% help select the best model. 
% * The |W| vectors are the estimated innovations (residuals) processes, which are
% the same size as |YEst|.
% * The specification for |EstSpec1| and |EstSpec3| includes diagonal covariance matrices.
%% Check Model Adequacy
% You can check whether the estimated models are stable and invertible
% using the |vgxqual| function. (There are no MA terms in these models, so
% the models are necessarily invertible.) The test shows that all the
% estimated models are stable.
[isStable1,isInvertible1] = vgxqual(EstSpec1);
[isStable2,isInvertible2] = vgxqual(EstSpec2);
[isStable3,isInvertible3] = vgxqual(EstSpec3);
[isStable4,isInvertible4] = vgxqual(EstSpec4);
[isStable1,isStable2,isStable3,isStable4]
%%
% You can also look at the estimated specification structures. Each
% contains a line stating whether the model is stable.
EstSpec4
%%
% |AR: {4x1 cell} stable autoregressive process| appears in the output
% indicating that the autoregressive process is stable.
%%
% You can compare the restricted (diagonal) AR models to their unrestricted
% (full) counterparts using |lratiotest|. The test rejects or
% fails to reject the hypothesis that the restricted models are adequate,
% with a default 5% tolerance. This is an in-sample test.
%%
% Apply the likelihood ratio tests by counting the parameters in the models
% using |vgxcount|, and then use |lratiotest| to perform the tests.
[n1,n1p] = vgxcount(EstSpec1);
[n2,n2p] = vgxcount(EstSpec2);
[n3,n3p] = vgxcount(EstSpec3);
[n4,n4p] = vgxcount(EstSpec4);
reject1 = lratiotest(logL2,logL1,n2p - n1p)
reject3 = lratiotest(logL4,logL3,n4p - n3p)
reject4 = lratiotest(logL4,logL2,n4p - n2p)
%%
% The |1| results indicate that the likelihood ratio test rejected both the
% restricted models, AR(1) and AR(3), in favor of the corresponding
% unrestricted models. Therefore, based on this test, the unrestricted
% AR(2) and AR(4) models are preferable. However, the test does not reject
% the unrestricted AR(2) model in favor of the unrestricted AR(4) model.
% (This test regards the AR(2) model as an AR(4) model with restrictions
% that the autoregression matrices AR(3) and AR(4) are 0.) Therefore, it
% seems that the unrestricted AR(2) model is the best model.
%%
% To find the best model in a set, minimize the Akaike information
% criterion (AIC). Use in-sample data to compute the AIC. Calculate the
% criterion for the four models.
AIC = aicbic([logL1 logL2 logL3 logL4],[n1p n2p n3p n4p])
%%
% The best model according to this criterion is the unrestricted AR(2)
% model. Notice, too, that the unrestricted AR(4) model has lower Akaike
% information than either of the restricted models. Based on this
% criterion, the unrestricted AR(2) model is best, with the unrestricted
% AR(4) model coming next in preference.
%%
% To compare the predictions of the four models against the forecast data
% |YF|, use |vgxpred|. This function returns both a prediction of
% the mean time series, and an error covariance matrix that gives
% confidence intervals about the means. This is an out-of-sample
% calculation.
[FY1,FYCov1] = vgxpred(EstSpec1,TF,[],YEst);
[FY2,FYCov2] = vgxpred(EstSpec2,TF,[],YEst);
[FY3,FYCov3] = vgxpred(EstSpec3,TF,[],YEst);
[FY4,FYCov4] = vgxpred(EstSpec4,TF,[],YEst);
%%
% This plot shows the predictions in the shaded region to the right.
figure
vgxplot(EstSpec2,YEst,FY2,FYCov2)
%%
% It is now straightforward to calculate the sum-of-squares error between
% the predictions and the data, |YF|.
Error1 = YF - FY1;
Error2 = YF - FY2;
Error3 = YF - FY3;
Error4 = YF - FY4;

SSerror1 = Error1(:)' * Error1(:);
SSerror2 = Error2(:)' * Error2(:);
SSerror3 = Error3(:)' * Error3(:);
SSerror4 = Error4(:)' * Error4(:);
figure
bar([SSerror1 SSerror2 SSerror3 SSerror4],.5)
ylabel('Sum of squared errors')
set(gca,'XTickLabel',...
    {'AR2 diag' 'AR2 full' 'AR4 diag' 'AR4 full'})
title('Sum of Squared Forecast Errors')
%%
% The predictive performances of the four models are similar.
%%
% The full AR(2) model seems to be the best and most parsimonious fit. Its
% model parameters are as follows.
vgxdisp(EstSpec2)
%% Forecast Observations
% You can make predictions or forecasts using the fitted model
% (|EstSpec4|) either by:
%
% * Running |vgxpred| based on the last few rows of |YF|
% * Simulating several time series with |vgxsim|
%
% In both cases, transform the forecasts so they are directly comparable to
% the original time series.
%%
% Generate 10 predictions from the fitted model beginning at the latest
% times using |vgxpred|.
[YPred,YCov] = vgxpred(EstSpec2,10,[],YF);
%%
% Transform the predictions by undoing the scaling and differencing
% applied to the original data. Make sure to insert the last observation at
% the beginning of the time series before using |cumsum| to undo the
% differencing. And, since differencing occurred after taking logarithms,
% insert the logarithm before using |cumsum|.
YFirst = [gdp,m1,tb3]; 
YFirst = YFirst(49:end,:);           % Remove NaNs
dates = dates(49:end);
EndPt = YFirst(end,:);
EndPt(1:2) = log(EndPt(1:2)); 
YPred(:,1:2) = YPred(:,1:2)/100;     % Rescale percentage
YPred = [EndPt; YPred];              % Prepare for cumsum
YPred(:,1:3) = cumsum(YPred(:,1:3));
YPred(:,1:2) = exp(YPred(:,1:2));
lastime = dates(end);
timess = lastime:91:lastime+910;     % Insert forecast horizon

figure
subplot(3,1,1)
plot(timess,YPred(:,1),':r')
hold on
plot(dates,YFirst(:,1),'k')
datetick('x')
grid on 
title('GDP')
subplot(3,1,2);
plot(timess,YPred(:,2),':r')
hold on
plot(dates,YFirst(:,2),'k')
datetick('x')
grid on
title('M1')
subplot(3,1,3);
plot(timess,YPred(:,3),':r')
hold on
plot(dates,YFirst(:,3),'k')
datetick('x')
grid on
title('3-mo T-bill')
hold off
%%
% The plots show the extrapolations in dotted red, and the original data
% series in solid black.
%% 
% Look at the last few years in this plot to get a sense of how the
% predictions relate to the latest data points.
YLast = YFirst(170:end,:);
timeslast = dates(170:end);

figure
subplot(3,1,1)
plot(timess,YPred(:,1),'--r')
hold on
plot(timeslast,YLast(:,1),'k')
datetick('x')
grid on
title('GDP')
subplot(3,1,2);
plot(timess,YPred(:,2),'--r')
hold on
plot(timeslast,YLast(:,2),'k')
datetick('x')
grid on
title('M1')
subplot(3,1,3);
plot(timess,YPred(:,3),'--r')
hold on
plot(timeslast,YLast(:,3),'k')
datetick('x')
grid on
title('3-mo T-bill')
hold off
%%
% The forecast shows increasing GDP, little growth in M1, and a slight
% decline in the interest rate. However, the forecast has no error bars.
%%
% Alternatively, you can generate 10 predictions from the fitted model
% beginning at the latest times using |vgxsim|.  This method simulates 2000
% time series times, and then generates the means and standard deviations
% for each period.  The means of the deviates for each period are the
% predictions for that period.
%%
% Simulate a time series from the fitted model beginning at the latest
% times.
rng(1); % For reproducibility
YSim = vgxsim(EstSpec2,10,[],YF,[],2000);
%%
% Transform the predictions by undoing the scaling and differencing applied
% to the original data. Make sure to insert the last observation at the
% beginning of the time series before using |cumsum| to undo the
% differencing. And, since differencing occurred after taking logarithms,
% insert the logarithm before using |cumsum|.
YFirst = [gdp,m1,tb3]; 
EndPt = YFirst(end,:); 
EndPt(1:2) = log(EndPt(1:2)); 
YSim(:,1:2,:) = YSim(:,1:2,:)/100; 
YSim = [repmat(EndPt,[1,1,2000]);YSim]; 
YSim(:,1:3,:) = cumsum(YSim(:,1:3,:)); 
YSim(:,1:2,:) = exp(YSim(:,1:2,:));
%%
% Compute the mean and standard deviation of each series, and plot the
% results. The plot has the mean in black, with a +/- 1 standard deviation in
% red.
YMean = mean(YSim,3);
YSTD = std(YSim,0,3);

figure
subplot(3,1,1)
plot(timess,YMean(:,1),'k')
datetick('x')
grid on
hold on
plot(timess,YMean(:,1)+YSTD(:,1),'--r')
plot(timess,YMean(:,1)-YSTD(:,1),'--r')
title('GDP')
subplot(3,1,2);
plot(timess,YMean(:,2),'k')
hold on
datetick('x')
grid on
plot(timess,YMean(:,2)+YSTD(:,2),'--r')
plot(timess,YMean(:,2)-YSTD(:,2),'--r')
title('M1')
subplot(3,1,3);
plot(timess,YMean(:,3),'k')
hold on
datetick('x')
grid on
plot(timess,YMean(:,3)+YSTD(:,3),'--r')
plot(timess,YMean(:,3)-YSTD(:,3),'--r')
title('3-mo T-bill')
hold off
%%
% The plots show increasing growth in GDP, moderate to little growth in
% M1, and uncertainty about the direction of T-bill rates.