qgallouedec/SARSA_improve_Q.py

## SARSA_improve_Q.py
# \delta_t = R_{t+1} + \gamma * Q(S_{t+1}, A_{t+1}) - Q(S_{t}, A_{t})
delta_t = reward + gamma* Q[next_state][next_action] - Q[state][action]

# Add delta_t to the current value function
# Q(S_t, A_t) += alpha * \delta_t
Q[state][action] += alpha * delta_t
	# \delta_t = R_{t+1} + \gamma * Q(S_{t+1}, A_{t+1}) - Q(S_{t}, A_{t})
	delta_t = reward + gamma* Q[next_state][next_action] - Q[state][action]

	# Add delta_t to the current value function
	# Q(S_t, A_t) += alpha * \delta_t
	Q[state][action] += alpha * delta_t
No results found