This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| current_board_state = move_states[move] | |
| if first_unvisited_node: | |
| rollout_path.append((current_board_state, current_side)) | |
| if current_board_state not in state_samples: | |
| first_unvisited_node = False | |
| state_values[current_board_state] = value_func(current_board_state) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| if all((state in state_samples) for _, state in move_states): | |
| log_total_samples = math.log(sum(state_samples[s] for s in move_states.values())) | |
| move, state = max(move_states, | |
| key=lambda _, s:upper_confidence_bounds(state_results[s],state_samples[s], log_total_samples)) | |
| else: | |
| move = random.choice(list(move_states.keys())) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| while result == 0: | |
| move_states = {move: apply_move(current_board_state, move, current_side) | |
| for move in available_moves(current_board_state)} | |
| if not move_states: | |
| result = 0 | |
| break |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def monte_carlo_tree_search_uct(board_state, side, number_of_rollouts): | |
| state_results = collections.defaultdict(float) | |
| state_samples = collections.defaultdict(float) | |
| for _ in range(number_of_rollouts): | |
| current_side = side | |
| current_board_state = board_state | |
| first_unvisited_node = True | |
| rollout_path = [] | |
| result = 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def upper_confidence_bounds(payout, samples_for_this_machine, log_total_samples): | |
| return payout / samples_for_this_machine + math.sqrt((2 * log_total_samples) / samples_for_this_machine) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def monte_carlo_tree_search(board_state, side, number_of_samples): | |
| results_per_move = collections.defaultdict(lambda: [0, 0]) | |
| for _ in range(number_of_samples): | |
| result, move = monte_carlo_sample(board_state, side) | |
| results_per_move[move][0] += result | |
| results_per_move[move][1] += 1 | |
| move = max(results_per_move, | |
| key=lambda x: results_per_move.get(x)[0] / | |
| results_per_move[move][1]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # select a random move | |
| move = random.choice(moves) | |
| result, next_move = monte_carlo_sample(apply_move(board_state, move, side), -side) | |
| return result, move |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ Trains an agent with (stochastic) Policy Gradients on Pong. Uses OpenAI Gym. """ | |
| import numpy as np | |
| import cPickle as pickle | |
| import gym | |
| # hyperparameters | |
| H = 200 # number of hidden layer neurons | |
| batch_size = 10 # every how many episodes to do a param update? | |
| learning_rate = 1e-4 | |
| gamma = 0.99 # discount factor for reward |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import tensorflow as tf | |
| import numpy as np | |
| NUM_STATES = 10 | |
| NUM_ACTIONS = 2 | |
| GAMMA = 0.5 | |
| def hot_one_state(index): | |
| array = np.zeros(NUM_STATES) |