# Copyright 2017 Amra Omanović, Nejka Bolčič, Magda Nowak-Trzos # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. import numpy as np from EV3Robot import * import csv gamma = 0.8 alpha = 1. robot = Robot() robot.connect_motor( 'left' ) robot.connect_motor( 'right' ) robot.connect_sensor( 'color' ) r = np.array([[1, -10, -1], [-100, 10, -1], [-100, -10, 100]]).astype("float32") #squre the difference q = np.random.rand(3,3) def update_q(state, next_state, action): r_sa = r[state, action] #reward acc to state and action q_sa = q[state, action] # q value acc to state and action new_q = q_sa + alpha * (r_sa + gamma * max(q[next_state, :]) - q_sa) q[state, action] = new_q # renormalize row to be between 0 and 1 rn = q[state][q[state] > 0] / np.sum(q[state][q[state] > 0]) q[state][q[state] > 0] = rn return r[state, action] def bgw(isee, follow_color=50, grey_zone=25): if isee < follow_color - grey_zone: # BLACK return 0 elif isee > follow_color + grey_zone: # WHITE return 1 else: # move forward if in the grey zone return 2 def get_state(): isee = robot.color_sensor_measure('reflected_light_intensity') color = bgw(isee) return color def do_action(action, speed): if action == 0: robot.move(0,speed) elif action == 1: robot.move(speed, 0) elif action == 2: robot.move(speed, speed) def run(speed): data_file = open('training_data.csv', 'wb') while(1): #check the state state_1 = get_state() #action taken according to maximum value of q table in color column action = np.argmax(q,axis=0)[state_1] # do the action do_action(action, speed) state_2 = get_state() update_q(state_1, state_2, action) action2 = np.argmax(q,axis=0)[state_2] do_action(action2,speed) state_3 = get_state() update_q(state_2, state_3, action2) collect_data(data_file, state_1, state_2, action2) def collect_data(data_file, prev_state, current_state, action): writer = csv.writer(data_file, delimiter=',') writer.writerow([prev_state, current_state, action]) run(15)