qgallouedec/GLIE_update_Q.py

## GLIE_update_Q.py
for t in range(len(states)):
  # Compute the dicounted reward Gt from time t
  # Gt = rewards[t] + gamma*Gt
  Gt = compute_gain(rewards, t, gamma)

  # \delta_t = G_t - Q(S_t, A_t)
  delta_t = Gt - Q[states[t]][actions[t]]

  # Add pair state-action to the counter
  N[states[t]][actions[t]] += 1

  # Add delta_t to the current value function
  # Q(S_t, A_t) += \frac{\delta_t}{N(S_t, A_t)}
  Q[states[t]][actions[t]] += delta_t/N[states[t]][actions[t]]
	for t in range(len(states)):
	# Compute the dicounted reward Gt from time t
	# Gt = rewards[t] + gamma*Gt
	Gt = compute_gain(rewards, t, gamma)

	# \delta_t = G_t - Q(S_t, A_t)
	delta_t = Gt - Q[states[t]][actions[t]]

	# Add pair state-action to the counter
	N[states[t]][actions[t]] += 1

	# Add delta_t to the current value function
	# Q(S_t, A_t) += \frac{\delta_t}{N(S_t, A_t)}
	Q[states[t]][actions[t]] += delta_t/N[states[t]][actions[t]]
No results found