finish exercise 2.5
This commit is contained in:
BIN
exercise_2-5.png
Normal file
BIN
exercise_2-5.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 59 KiB |
@@ -2,6 +2,8 @@ import numpy as np
|
||||
import random
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
from tqdm import tqdm
|
||||
from mpl_toolkits.axes_grid1.axes_divider import make_axes_area_auto_adjustable
|
||||
|
||||
class KArmedBandit:
|
||||
def __init__(self,
|
||||
@@ -45,7 +47,13 @@ class KArmedBandit:
|
||||
if step >= self.steps or step < 0:
|
||||
ValueError("Step {} out of bounds. Current steps: {}".format(step, self.steps))
|
||||
|
||||
return self.rewards[action][step]
|
||||
if self.random_walk:
|
||||
# Slightly move all the averages
|
||||
moves = np.random.normal(loc=0.0, scale=0.01, size=self.arms)
|
||||
self.average_rewards = np.array(self.average_rewards) + moves
|
||||
|
||||
return np.random.normal(loc=self.average_rewards[action], scale=1, size=1)
|
||||
# return self.rewards[action][step]
|
||||
|
||||
def get_max_reward_action(self):
|
||||
return (max(self.average_rewards), np.argmax(self.average_rewards))
|
||||
@@ -82,10 +90,15 @@ class Average:
|
||||
self.count += 1
|
||||
|
||||
class Plotter:
|
||||
def __init__(self, karmedbandit: KArmedBandit, epsilongreedy: EpsilonGreedy, steps: int):
|
||||
def __init__(self,
|
||||
karmedbandit: KArmedBandit,
|
||||
epsilongreedy: EpsilonGreedy,
|
||||
steps: int,
|
||||
alpha: bool = True):
|
||||
self.karmedbandit = karmedbandit
|
||||
self.agent = epsilongreedy
|
||||
self.steps = steps
|
||||
self.alpha = alpha
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
@@ -111,8 +124,8 @@ class Plotter:
|
||||
self.optimal_action_fraction[step].update(self.optimal_action_counter / (step+1))
|
||||
|
||||
def run(self, runs=1):
|
||||
for i in range(runs):
|
||||
self.karmedbandit.generate_rewards()
|
||||
for i in tqdm(range(runs), desc="Runs"):
|
||||
self.karmedbandit.generate_average_rewards()
|
||||
# optimal_action = self.karmedbandit.get_max_reward_action()[1]
|
||||
self.agent.reset()
|
||||
self.actions_counter = [0] * self.karmedbandit.arms
|
||||
@@ -121,8 +134,11 @@ class Plotter:
|
||||
optimal_action = self.karmedbandit.get_max_reward_action()[1]
|
||||
action = self.agent.choose_action()
|
||||
reward = self.karmedbandit.get_reward(action, step)
|
||||
# self.agent.update(action, reward, 1/(self.actions_counter[action]+1))
|
||||
self.agent.update(action, reward, 0.1)
|
||||
|
||||
if self.alpha:
|
||||
self.agent.update(action, reward, 0.1)
|
||||
else:
|
||||
self.agent.update(action, reward, 1/(self.actions_counter[action]+1))
|
||||
self.average_rewards[step].update(reward)
|
||||
self.count_action(action)
|
||||
if action == optimal_action:
|
||||
@@ -138,27 +154,38 @@ class Plotter:
|
||||
violin_data = self.karmedbandit.sample_rewards(steps=10000)
|
||||
bp = ax1.violinplot(violin_data, showmeans=True, showmedians=False,
|
||||
showextrema=False)
|
||||
ax1.set_ylim([-4,4])
|
||||
ax1.set_ylabel("Reward distribution", rotation='vertical')
|
||||
# ax1.set_xlabel("Actions", rotation='horizontal')
|
||||
|
||||
ax2.plot(range(self.steps), [x.value for x in self.average_rewards])
|
||||
|
||||
ylabel = "Average reward. Max average reward: {}".format(self.karmedbandit.get_max_reward_action()[0])
|
||||
# ylabel = "Average reward. Max average reward: {}".format(self.karmedbandit.get_max_reward_action()[0])
|
||||
ylabel = "Average reward."
|
||||
ax2.set(ylabel=ylabel, title='')
|
||||
ax2.set_ylabel(ylabel, rotation='vertical')
|
||||
# ax2.set_xlabel("Steps", rotation='horizontal')
|
||||
|
||||
ax3.plot(range(self.steps), [x.value for x in self.optimal_action_fraction])
|
||||
ax3.set_ylim([0,1])
|
||||
ax3.set_ylabel("Optimal action", rotation='vertical')
|
||||
# ax3.set_xlabel("Steps", rotation='horizontal')
|
||||
|
||||
plt.xlabel("Steps")
|
||||
fig.set_figheight(8)
|
||||
fig.set_figwidth(8)
|
||||
# make_axes_area_auto_adjustable(ax1)
|
||||
# make_axes_area_auto_adjustable(ax2)
|
||||
# make_axes_area_auto_adjustable(ax3)
|
||||
fig.savefig("exercise_2-5.png")
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
arms = 10
|
||||
steps = 1000
|
||||
armedbandit = KArmedBandit(arms=arms, steps=steps, random_walk=False)
|
||||
steps = 10000
|
||||
armedbandit = KArmedBandit(arms=arms, steps=steps, random_walk=True)
|
||||
greedy = EpsilonGreedy(0.1, arms)
|
||||
|
||||
plotter = Plotter(armedbandit, greedy, steps)
|
||||
plotter.run(runs=100)
|
||||
plotter.run(runs=1000)
|
||||
plotter.plot()
|
||||
@@ -142,4 +142,5 @@ Q_{n+1} = & \; Q_n + \alpha_n \left[R_n - Q_n\right] \\
|
||||
|
||||
# Exercise 2.5
|
||||
See [exercise_2-5.py](./exercise_2-5.py) for code.
|
||||

|
||||
|
||||
|
||||
Reference in New Issue
Block a user