www.gusucode.com > 马尔科夫决策过程包括一些例程源码程序 > mk_grid_world.m

    function [T, T2] = mk_grid_world(nrows, ncols, psucc_act, obstacle, terminal, absorb, wrap_around, noop)
% MK_GRID_WORLD Make the transition matrix for a grid world with stochastic actions.
% T = mk_grid_world(nrows, ncols, psucc_act, obstacle, terminal, absorb, wrap_around)
%
% An action succeeds with prob. p = psucc_act and moves in a perpendicular direction with prob. 1-p.
% If you try to move into an obstacle, you stay where you are.
%
% Input:
% obstacle(i,j)=p means  there is an obstacle at i,j with prob p.
% terminal(i,j)=1 if i,j is a terminal state
% If absorb = 1, terminal states jump to a unique absorbing state.
% If absorb = 0, terminal states are themselves absorbing.
% If noop = 1, we allow a no-op action that has no affect.
% If wrap_around = 1, we use toroidal boundary conditions.
% If wrap_around = 0, we cannot go past the boundaries.
%
% Output:
% T(s,a,s') = Pr(s' | s, a), where s in [1, 2, ..., nrows*ncols].
% States are numbered as in the following example
%     1     5     9
%     2     6    10
%     3     7    11
%     4     8    12
% and actions are numbered as North(1), South (2), East(3), West (4)


N = 1; E = 2; S = 3; W = 4;
nact = 4;
nstates = nrows*ncols;

% T1{N}(i,j)=k means s->k when we go north, where s = encode_xy(i,j)
% This assumes no noise and no obstacles.
%
% Example: nrows = 4, ncols = 3, wrap_around = 1
% M = 
%     1     5     9
%     2     6    10
%     3     7    11
%     4     8    12
% T1{N} = 
%     4     8    12
%     1     5     9
%     2     6    10
%     3     7    11
% This means that state 1 goes to 4, state 2 goes to 1, etc.


if wrap_around
  rows{N} = [nrows 1:nrows-1]; cols{N} = 1:ncols;
  rows{E} = 1:nrows; cols{E} = [2:ncols 1];
  rows{S} = [2:nrows 1]; cols{S} = 1:ncols;
  rows{W} = 1:nrows; cols{W} = [ncols 1:ncols-1];
else
  rows{N} = [1 1:nrows-1]; cols{N} = 1:ncols;
  rows{E} = 1:nrows; cols{E} = [2:ncols ncols];
  rows{S} = [2:nrows nrows]; cols{S} = 1:ncols;
  rows{W} = 1:nrows; cols{W} = [1 1:ncols-1];
end

M = reshape(1:nrows*ncols, [nrows ncols]);
T1 = cell(1, nact);
for i=1:4
  T1{i} = M(rows{i}, cols{i});
end

% T2{N}(s,s')=r means s->s' with prob r when we go north.
% This is computed by assigning prob mass p to the cell north of s,
% and q to the cells to the east and west of s, where
% p = psucc_act, q = (1-p)/2.
% If there is an obstacle in a target cell, that amount of prob mass
% is added to the self-loop prob.
%
% Example
% obstacle =
%     0     0     0
%     0     1     0
%     0     0     0
%     0     0     1
% p = 0.8, q = 0.1
% T2{N}(1,:) =
%         0    0.1000    0.1000
%         0         0         0
%         0         0         0
%    0.8000         0         0
%
% i.e., P(1->5) = P(1->9) = q, P(1->4) = p since going north from 1 wraps round to 4
%
% reshape(T2{1}(7,:), 4,3)
%         0         0         0
%         0         0         0
%    0.1000    0.8000    0.1000
%         0         0         0
% i.e., P(7->3) = P(7->11) = q, P(7->7) = p since going north from 7 hits the obstacle at 6
%
% If there is uncertainty about the obstacle map, the prob of a move gets reduced by a factor
% of r, where r = prob. destn is UNoccupied

dir = [N E W; E N S; S E W; W N S];
p = psucc_act;
q = (1-p)/2;
prob = [p q q; p q q; p q q; p q q];

T2 = cell(1,nact);
for i=1:4
  T2{i} = zeros(nstates, nstates);
  for j=1:3
    d = dir(i,j);
    p = prob(i,j);
    tmp = subv2ind([nstates nstates], [(1:nstates)' T1{d}(:)]);
    prob_unoccupied  = (1-obstacle(rows{d}, cols{d}));
    T2{i}(tmp) = T2{i}(tmp)' + p * prob_unoccupied(:);
  end
  tmp = subv2ind([nstates nstates], [(1:nstates)' M(:)]);
  T2{i}(tmp) = T2{i}(tmp)' + ones(nstates, 1) - sum(T2{i},2);
end


term = M(logical(terminal(:)));
if absorb
  T = zeros(nstates + 1, nact, nstates + 1);
  for i=1:4
    T(1:nstates, i, 1:nstates) = T2{i};
  end
  astate = nstates + 1;
  T(astate, :, astate) = 1;
  T(term, :, :) = 0;
  T(term, :, astate) = 1;
else
  tmp = subv2ind([nstates nstates], [term term]);
  T = zeros(nstates, nact, nstates);
  for i=1:4
    T2{i}(term, :) = 0;
    T2{i}(tmp) = 1; % equivalent to T2{i}(term(j),term(j)) = 1 for all j
    T(:,i,:) = T2{i};
  end
end

if noop
  nact = 5;
  Told = T;
  ns = size(Told, 1); % might be nstates or nstates+1
  T = zeros(ns, nact, ns);
  T(:,1:nact-1,:) = Told;
  T(:,nact,:) = eye(ns);
end