www.gusucode.com > IPCV_Eval_Kit_R2019a_0ce6858工具箱matlab程序源码 > IPCV_Eval_Kit_R2019a_0ce6858/code/demo_files/I3_12_1_ddpg_with_image.m
%% 夋憸擖椡偺DDPG僄乕僕僃儞僩偵傛傞怳傝巕偺怳傝忋偘 % 偙偺椺偱偼MATLAB偱儌僨儖壔偝傟偨怳傝巕傪Deep Deterministic Policy Gradient % (DDPG)僄乕僕僃儞僩傪巊偭偰偳偺傛偆偵妛廗偝偣傞偐傪徯夘偟傑偡丅 % 幚峴偵偼Reinforcement Learning Toolbox偲Deep Learning Toolbox偑昁梫偱偡丅 %% 娐嫬偲偺僀儞僞乕僼僃乕僗傪嶌惉 % 怳巕偺偨傔偺帠慜掕媊偝傟偨娐嫬傪嶌惉 env = rlPredefinedEnv('SimplePendulumWithImage-Continuous') %% 娤應偲峴摦偺巇條傪娐嫬僀儞僞乕僼僃乕僗偐傜庢摼丅 obsInfo = getObservationInfo(env); actInfo = getActionInfo(env); %% 嵞尰惈偺偨傔偵棎悢偺僔乕僪傪屌掕偡傞丅 rng(0) %% DDPG僄乕僕僃儞僩偺掕媊 hiddenLayerSize1 = 400; hiddenLayerSize2 = 300; imgPath = [ imageInputLayer(obsInfo(1).Dimension,'Normalization','none','Name',obsInfo(1).Name) convolution2dLayer(10,2,'Name','conv1','Stride',5,'Padding',0) reluLayer('Name','relu1') fullyConnectedLayer(2,'Name','fc1') concatenationLayer(3,2,'Name','cat1') fullyConnectedLayer(hiddenLayerSize1,'Name','fc2') reluLayer('Name','relu2') fullyConnectedLayer(hiddenLayerSize2,'Name','fc3') additionLayer(2,'Name','add') reluLayer('Name','relu3') fullyConnectedLayer(1,'Name','fc4') ]; dthetaPath = [ imageInputLayer(obsInfo(2).Dimension,'Normalization','none','Name',obsInfo(2).Name) fullyConnectedLayer(1,'Name','fc5','BiasLearnRateFactor',0,'Bias',0) ]; actPath =[ imageInputLayer(actInfo(1).Dimension,'Normalization','none','Name','action') fullyConnectedLayer(hiddenLayerSize2,'Name','fc6','BiasLearnRateFactor',0,'Bias',zeros(hiddenLayerSize2,1)) ]; criticNetwork = layerGraph(imgPath); criticNetwork = addLayers(criticNetwork,dthetaPath); criticNetwork = addLayers(criticNetwork,actPath); criticNetwork = connectLayers(criticNetwork,'fc5','cat1/in2'); criticNetwork = connectLayers(criticNetwork,'fc6','add/in2'); %% Critic僱僢僩儚乕僋偺峔惉傪壜帇壔偟傑偡丅 figure plot(criticNetwork) %% critic偺僆僾僔儑儞 criticOptions = rlRepresentationOptions('LearnRate',1e-03,'GradientThreshold',1); % GPU傪巊偭偰CNN傪妛廗偡傞応崌偼壓婰傪僐儊儞僩傪奜偟偰偟偰偔偩偝偄丅 % criticOptions.UseDevice = 'gpu'; %% Critic昞尰傪嶌惉 critic = rlRepresentation(criticNetwork,criticOptions,'Observation',{'pendImage','angularRate'},obsInfo,'Action',{'action'},actInfo); %% Actor偺偨傔偺僱僢僩儚乕僋傪掕媊 imgPath = [ imageInputLayer(obsInfo(1).Dimension,'Normalization','none','Name',obsInfo(1).Name) convolution2dLayer(10,2,'Name','conv1','Stride',5,'Padding',0) reluLayer('Name','relu1') fullyConnectedLayer(2,'Name','fc1') concatenationLayer(3,2,'Name','cat1') fullyConnectedLayer(hiddenLayerSize1,'Name','fc2') reluLayer('Name','relu2') fullyConnectedLayer(hiddenLayerSize2,'Name','fc3') reluLayer('Name','relu3') fullyConnectedLayer(1,'Name','fc4') tanhLayer('Name','tanh1') scalingLayer('Name','scale1','Scale',max(actInfo.UpperLimit)) ]; dthetaPath = [ imageInputLayer(obsInfo(2).Dimension,'Normalization','none','Name',obsInfo(2).Name) fullyConnectedLayer(1,'Name','fc5','BiasLearnRateFactor',0,'Bias',0) ]; actorNetwork = layerGraph(imgPath); actorNetwork = addLayers(actorNetwork,dthetaPath); actorNetwork = connectLayers(actorNetwork,'fc5','cat1/in2'); actorOptions = rlRepresentationOptions('LearnRate',1e-04,'GradientThreshold',1); % GPU傪巊偆応崌偼壓婰偺僐儊儞僩傪奜偟偰偔偩偝偄丅 % criticOptions.UseDevice = 'gpu'; %% Actor昞尰傪嶌惉 actor = rlRepresentation(actorNetwork,actorOptions,'Observation',{'pendImage','angularRate'},obsInfo,'Action',{'scale1'},actInfo); %% Actor僱僢僩儚乕僋偺峔惉傪壜帇壔 figure plot(actorNetwork) %% DDPG僄乕僕僃儞僩傪嶌惉 agentOptions = rlDDPGAgentOptions(... 'SampleTime',env.Ts,... 'TargetSmoothFactor',1e-3,... 'ExperienceBufferLength',1e6,... 'DiscountFactor',0.99,... 'MiniBatchSize',128); agentOptions.NoiseOptions.Variance = 0.6; agentOptions.NoiseOptions.VarianceDecayRate = 1e-6; agent = rlDDPGAgent(actor,critic,agentOptions); %% 僄乕僕僃儞僩偺妛廗僆僾僔儑儞愝掕 maxepisodes = 5000; maxsteps = 400; trainingOptions = rlTrainingOptions(... 'MaxEpisodes',maxepisodes,... 'MaxStepsPerEpisode',maxsteps,... 'Plots','training-progress',... 'StopTrainingCriteria','AverageReward',... 'StopTrainingValue',-740); %% 怳傝巕偺忬懺傪壜帇壔 plot(env); %% 妛廗傪幚峴 doTraining = false; if doTraining % Train the agent. trainingStats = train(agent,env,trainingOptions); else % Load pretrained agent for the example. load(fullfile(matlabroot,'examples','rl','SimplePendulumWithImageDDPG.mat'),... 'agent'); end %% 妛廗嵪傒DDPG僄乕僕僃儞僩偺幚峴 simOptions = rlSimulationOptions('MaxSteps',500); experience = sim(env,agent,simOptions); %% % _Copyright 2019 The MathWorks, Inc._