% % N arm bandit problem % for NISS2000 % % Programed by Y.Koike and modified by K.Samejima Aug 1, 2000 % function [Mq, Qs] = narmbandit(narm,play_num,repeat_num, epsi_table, anneal) beta = 0.0001; eps_num = size(epsi_table,2); Qs = randn(repeat_num,narm)+1; % Q* Qh = zeros(1,play_num); % Value Qp = zeros(1,play_num); % selected arm Mq = zeros(eps_num,play_num); for l = 1:eps_num epsi = epsi_table(l) for i = 1:repeat_num Q = Qs(i,:); Ct = zeros(narm,1); % select num Qt = zeros(narm,1); % Estimate Q a = ceil(rand*narm); Qp(1) = a; reward = Q(a) + randn*0.1; Qh(1) = reward; Ct(a) = Ct(a) + 1; Qt(a) = reward; for j = 2:play_num if anneal(eps_num) == 1 epsianneal = epsi * exp(-beta*j); if rand(1) > (1-epsianneal) a = ceil(rand*narm); else [maxQ, a] = max(Qt); end else if rand(1) > (1-epsi) a = ceil(rand*narm); else [maxQ, a] = max(Qt); end end Qp(j) = a; reward = Q(a) + randn*0.1; Qh(j) = reward; Ct(a) = Ct(a) + 1; Qt(a) = (Qt(a) * (Ct(a) - 1) + reward) / Ct(a); end Mq(l,:) = Qh + Mq(l,:); end Mq(l,:) = Mq(l,:) ./ repeat_num; end plot(Mq');