www.gusucode.com > IPCV_Eval_Kit_R2019a_0ce6858工具箱matlab程序源码 > IPCV_Eval_Kit_R2019a_0ce6858/code/demo_files/I3_12_1_ddpg_with_image.m

    %% 夋憸擖椡偺DDPG僄乕僕僃儞僩偵傛傞怳傝巕偺怳傝忋偘
% 偙偺椺偱偼MATLAB偱儌僨儖壔偝傟偨怳傝巕傪Deep Deterministic Policy Gradient
% (DDPG)僄乕僕僃儞僩傪巊偭偰偳偺傛偆偵妛廗偝偣傞偐傪徯夘偟傑偡丅
% 幚峴偵偼Reinforcement Learning Toolbox偲Deep Learning Toolbox偑昁梫偱偡丅

%% 娐嫬偲偺僀儞僞乕僼僃乕僗傪嶌惉
% 怳巕偺偨傔偺帠慜掕媊偝傟偨娐嫬傪嶌惉
env = rlPredefinedEnv('SimplePendulumWithImage-Continuous')

%% 娤應偲峴摦偺巇條傪娐嫬僀儞僞乕僼僃乕僗偐傜庢摼丅
obsInfo = getObservationInfo(env);
actInfo = getActionInfo(env);

%% 嵞尰惈偺偨傔偵棎悢偺僔乕僪傪屌掕偡傞丅
rng(0)

%% DDPG僄乕僕僃儞僩偺掕媊
hiddenLayerSize1 = 400;
hiddenLayerSize2 = 300;

imgPath = [
    imageInputLayer(obsInfo(1).Dimension,'Normalization','none','Name',obsInfo(1).Name)
    convolution2dLayer(10,2,'Name','conv1','Stride',5,'Padding',0)
    reluLayer('Name','relu1')
    fullyConnectedLayer(2,'Name','fc1')
    concatenationLayer(3,2,'Name','cat1')
    fullyConnectedLayer(hiddenLayerSize1,'Name','fc2')
    reluLayer('Name','relu2')
    fullyConnectedLayer(hiddenLayerSize2,'Name','fc3')
    additionLayer(2,'Name','add')
    reluLayer('Name','relu3')
    fullyConnectedLayer(1,'Name','fc4')
    ];
dthetaPath = [
    imageInputLayer(obsInfo(2).Dimension,'Normalization','none','Name',obsInfo(2).Name)
    fullyConnectedLayer(1,'Name','fc5','BiasLearnRateFactor',0,'Bias',0)
    ];
actPath =[
    imageInputLayer(actInfo(1).Dimension,'Normalization','none','Name','action')
    fullyConnectedLayer(hiddenLayerSize2,'Name','fc6','BiasLearnRateFactor',0,'Bias',zeros(hiddenLayerSize2,1))
    ];

criticNetwork = layerGraph(imgPath);
criticNetwork = addLayers(criticNetwork,dthetaPath);
criticNetwork = addLayers(criticNetwork,actPath);
criticNetwork = connectLayers(criticNetwork,'fc5','cat1/in2');
criticNetwork = connectLayers(criticNetwork,'fc6','add/in2');

%% Critic僱僢僩儚乕僋偺峔惉傪壜帇壔偟傑偡丅
figure
plot(criticNetwork)

%% critic偺僆僾僔儑儞
criticOptions = rlRepresentationOptions('LearnRate',1e-03,'GradientThreshold',1);
% GPU傪巊偭偰CNN傪妛廗偡傞応崌偼壓婰傪僐儊儞僩傪奜偟偰偟偰偔偩偝偄丅
% criticOptions.UseDevice = 'gpu';

%% Critic昞尰傪嶌惉
critic = rlRepresentation(criticNetwork,criticOptions,'Observation',{'pendImage','angularRate'},obsInfo,'Action',{'action'},actInfo);

%% Actor偺偨傔偺僱僢僩儚乕僋傪掕媊
imgPath = [
    imageInputLayer(obsInfo(1).Dimension,'Normalization','none','Name',obsInfo(1).Name)
    convolution2dLayer(10,2,'Name','conv1','Stride',5,'Padding',0)
    reluLayer('Name','relu1')
    fullyConnectedLayer(2,'Name','fc1')
    concatenationLayer(3,2,'Name','cat1')
    fullyConnectedLayer(hiddenLayerSize1,'Name','fc2')
    reluLayer('Name','relu2')
    fullyConnectedLayer(hiddenLayerSize2,'Name','fc3')
    reluLayer('Name','relu3')
    fullyConnectedLayer(1,'Name','fc4')
    tanhLayer('Name','tanh1')
    scalingLayer('Name','scale1','Scale',max(actInfo.UpperLimit))
    ];
dthetaPath = [
    imageInputLayer(obsInfo(2).Dimension,'Normalization','none','Name',obsInfo(2).Name)
    fullyConnectedLayer(1,'Name','fc5','BiasLearnRateFactor',0,'Bias',0)
    ];

actorNetwork = layerGraph(imgPath);
actorNetwork = addLayers(actorNetwork,dthetaPath);
actorNetwork = connectLayers(actorNetwork,'fc5','cat1/in2');

actorOptions = rlRepresentationOptions('LearnRate',1e-04,'GradientThreshold',1);
% GPU傪巊偆応崌偼壓婰偺僐儊儞僩傪奜偟偰偔偩偝偄丅
% criticOptions.UseDevice = 'gpu';

%% Actor昞尰傪嶌惉
actor = rlRepresentation(actorNetwork,actorOptions,'Observation',{'pendImage','angularRate'},obsInfo,'Action',{'scale1'},actInfo);

%% Actor僱僢僩儚乕僋偺峔惉傪壜帇壔
figure
plot(actorNetwork)

%% DDPG僄乕僕僃儞僩傪嶌惉
agentOptions = rlDDPGAgentOptions(...
    'SampleTime',env.Ts,...
    'TargetSmoothFactor',1e-3,...
    'ExperienceBufferLength',1e6,...
    'DiscountFactor',0.99,...
    'MiniBatchSize',128);
agentOptions.NoiseOptions.Variance = 0.6;
agentOptions.NoiseOptions.VarianceDecayRate = 1e-6;
agent = rlDDPGAgent(actor,critic,agentOptions);

%% 僄乕僕僃儞僩偺妛廗僆僾僔儑儞愝掕
maxepisodes = 5000;
maxsteps = 400;
trainingOptions = rlTrainingOptions(...
    'MaxEpisodes',maxepisodes,...
    'MaxStepsPerEpisode',maxsteps,...
    'Plots','training-progress',...
    'StopTrainingCriteria','AverageReward',...
    'StopTrainingValue',-740);

%% 怳傝巕偺忬懺傪壜帇壔
plot(env);

%% 妛廗傪幚峴
doTraining = false;
if doTraining    
    % Train the agent.
    trainingStats = train(agent,env,trainingOptions);
else
    % Load pretrained agent for the example.
    load(fullfile(matlabroot,'examples','rl','SimplePendulumWithImageDDPG.mat'),...
        'agent'); 
end
%% 妛廗嵪傒DDPG僄乕僕僃儞僩偺幚峴
simOptions = rlSimulationOptions('MaxSteps',500);
experience = sim(env,agent,simOptions);

%% 
% _Copyright 2019 The MathWorks, Inc._