www.gusucode.com > 马尔科夫决策过程包括一些例程源码程序 > mk_grid_world_simple.m
function [T, A2] = mk_grid2(nrows, ncols, psucc_act, obstacle, terminal, absorb, wrap_around, noop) % This is a simplified (non-vectorised) version of mk_grid_world % obstacle(i,j) is assumed to be 0 or 1 N = 1; E = 2; S = 3; W = 4; nact = 4; nstates = nrows*ncols; if wrap_around rows{N} = [nrows 1:nrows-1]; cols{N} = 1:ncols; rows{E} = 1:nrows; cols{E} = [2:ncols 1]; rows{S} = [2:nrows 1]; cols{S} = 1:ncols; rows{W} = 1:nrows; cols{W} = [ncols 1:ncols-1]; else rows{N} = [1 1:nrows-1]; cols{N} = 1:ncols; rows{E} = 1:nrows; cols{E} = [2:ncols ncols]; rows{S} = [2:nrows nrows]; cols{S} = 1:ncols; rows{W} = 1:nrows; cols{W} = [1 1:ncols-1]; end M = reshape(1:nrows*ncols, [nrows ncols]); T1 = cell(1, nact); for a=1:4 T1{a} = M(rows{a}, cols{a}); end dir = [N E W; E N S; S E W; W N S]; p = psucc_act; q = (1-p)/2; prob = [p q q; p q q; p q q; p q q]; A2 = cell(1,nact); for a=1:4 A2{a} = zeros(nstates, nstates); for i=1:nrows for j=1:ncols s = subv2ind([nrows ncols], [i j]); for d=1:3 aa = dir(a,d); ss = T1{aa}(s); p = prob(a,d); if obstacle(ss) A2{a}(s,s) = A2{a}(s,s) + p; else A2{a}(s,ss) = A2{a}(s,ss) + p; end end end end end term = M(logical(terminal(:))); if absorb T = zeros(nstates + 1, nact, nstates + 1); for i=1:4 T(1:nstates, i, 1:nstates) = A2{i}; end astate = nstates + 1; T(astate, :, astate) = 1; T(term, :, :) = 0; T(term, :, astate) = 1; else tmp = subv2ind([nstates nstates], [term term]); T = zeros(nstates, nact, nstates); for i=1:4 A2{i}(term, :) = 0; A2{i}(tmp) = 1; % equivalent to A2{i}(term(j),term(j)) = 1 for all j T(:,i,:) = A2{i}; end end if noop nact = 5; Told = T; ns = size(Told, 1); % might be nstates or nstates+1 T = zeros(ns, nact, ns); T(:,1:nact-1,:) = Told; T(:,nact,:) = eye(ns); end