finish exercise 2.5

This commit is contained in:
Bart Moyaers
2020-02-24 16:08:44 +01:00
parent e3e929689b
commit 63e85d2a60
3 changed files with 39 additions and 11 deletions

BIN
exercise_2-5.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 59 KiB

View File

@@ -2,6 +2,8 @@ import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
from tqdm import tqdm
from mpl_toolkits.axes_grid1.axes_divider import make_axes_area_auto_adjustable
class KArmedBandit:
def __init__(self,
@@ -45,7 +47,13 @@ class KArmedBandit:
if step >= self.steps or step < 0:
ValueError("Step {} out of bounds. Current steps: {}".format(step, self.steps))
return self.rewards[action][step]
if self.random_walk:
# Slightly move all the averages
moves = np.random.normal(loc=0.0, scale=0.01, size=self.arms)
self.average_rewards = np.array(self.average_rewards) + moves
return np.random.normal(loc=self.average_rewards[action], scale=1, size=1)
# return self.rewards[action][step]
def get_max_reward_action(self):
return (max(self.average_rewards), np.argmax(self.average_rewards))
@@ -82,10 +90,15 @@ class Average:
self.count += 1
class Plotter:
def __init__(self, karmedbandit: KArmedBandit, epsilongreedy: EpsilonGreedy, steps: int):
def __init__(self,
karmedbandit: KArmedBandit,
epsilongreedy: EpsilonGreedy,
steps: int,
alpha: bool = True):
self.karmedbandit = karmedbandit
self.agent = epsilongreedy
self.steps = steps
self.alpha = alpha
self.reset()
def reset(self):
@@ -111,8 +124,8 @@ class Plotter:
self.optimal_action_fraction[step].update(self.optimal_action_counter / (step+1))
def run(self, runs=1):
for i in range(runs):
self.karmedbandit.generate_rewards()
for i in tqdm(range(runs), desc="Runs"):
self.karmedbandit.generate_average_rewards()
# optimal_action = self.karmedbandit.get_max_reward_action()[1]
self.agent.reset()
self.actions_counter = [0] * self.karmedbandit.arms
@@ -121,8 +134,11 @@ class Plotter:
optimal_action = self.karmedbandit.get_max_reward_action()[1]
action = self.agent.choose_action()
reward = self.karmedbandit.get_reward(action, step)
# self.agent.update(action, reward, 1/(self.actions_counter[action]+1))
self.agent.update(action, reward, 0.1)
if self.alpha:
self.agent.update(action, reward, 0.1)
else:
self.agent.update(action, reward, 1/(self.actions_counter[action]+1))
self.average_rewards[step].update(reward)
self.count_action(action)
if action == optimal_action:
@@ -138,27 +154,38 @@ class Plotter:
violin_data = self.karmedbandit.sample_rewards(steps=10000)
bp = ax1.violinplot(violin_data, showmeans=True, showmedians=False,
showextrema=False)
ax1.set_ylim([-4,4])
ax1.set_ylabel("Reward distribution", rotation='vertical')
# ax1.set_xlabel("Actions", rotation='horizontal')
ax2.plot(range(self.steps), [x.value for x in self.average_rewards])
ylabel = "Average reward. Max average reward: {}".format(self.karmedbandit.get_max_reward_action()[0])
# ylabel = "Average reward. Max average reward: {}".format(self.karmedbandit.get_max_reward_action()[0])
ylabel = "Average reward."
ax2.set(ylabel=ylabel, title='')
ax2.set_ylabel(ylabel, rotation='vertical')
# ax2.set_xlabel("Steps", rotation='horizontal')
ax3.plot(range(self.steps), [x.value for x in self.optimal_action_fraction])
ax3.set_ylim([0,1])
ax3.set_ylabel("Optimal action", rotation='vertical')
# ax3.set_xlabel("Steps", rotation='horizontal')
plt.xlabel("Steps")
fig.set_figheight(8)
fig.set_figwidth(8)
# make_axes_area_auto_adjustable(ax1)
# make_axes_area_auto_adjustable(ax2)
# make_axes_area_auto_adjustable(ax3)
fig.savefig("exercise_2-5.png")
plt.show()
if __name__ == "__main__":
arms = 10
steps = 1000
armedbandit = KArmedBandit(arms=arms, steps=steps, random_walk=False)
steps = 10000
armedbandit = KArmedBandit(arms=arms, steps=steps, random_walk=True)
greedy = EpsilonGreedy(0.1, arms)
plotter = Plotter(armedbandit, greedy, steps)
plotter.run(runs=100)
plotter.run(runs=1000)
plotter.plot()

View File

@@ -142,4 +142,5 @@ Q_{n+1} = & \; Q_n + \alpha_n \left[R_n - Q_n\right] \\
# Exercise 2.5
See [exercise_2-5.py](./exercise_2-5.py) for code.
![](./exercise_2-5.png)