This repository was archived by the owner on May 31, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenvironment.py
More file actions
71 lines (58 loc) · 2.05 KB
/
environment.py
File metadata and controls
71 lines (58 loc) · 2.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gym
from gym import spaces
import random
import numpy as np
#environment should have the entire dataset as an input parameter, but train and test methods
class Environment(gym.Env):
def __init__(self):
super(Environment, self).__init__()
self.state = None
self.state_space_dims = 256
#self.action_space_dims = 1
#actions are 0..15
self.n_actions = 16
def step(self, action, obs):
#update state
start_size = len(self.state)
self.state += obs
self.state = self.state[16:]
next_state = self.state
if (start_size != len(self.state)):
print("Error in update state")
reward = self.calculate_reward(action, obs)
return next_state, reward
def step5(self, action, obs,obs2,obs3,obs4,obs5):
#update state
start_size = len(self.state)
self.state += obs
self.state = self.state[16:]
next_state = self.state
if (start_size != len(self.state)):
print("Error in update state")
reward = self.calculate_reward5(action, obs,obs2,obs3,obs4,obs5)
return next_state, reward
#action takes values 0..15, so do indices of obs that has 16 values
def calculate_reward5(self, action, obs,obs2,obs3,obs4,obs5):
#print("obs-------------",obs)
reward = 0.0
x = obs[action]+obs2[action]+obs3[action]+obs4[action]+obs5[action]
if ( x >= 3 ):
reward = 1.0
elif (x>=0 and x<=2 ):
reward = -1.0
else:
print ("Error: channel quality should be 1 or 0")
return reward
def calculate_reward(self, action, obs):
#print("obs-------------",obs)
reward = 0.0
if (obs[action] == 1):
reward = 1.0
elif (obs[action] == 0):
reward = -1.0
else:
print ("Error: channel quality should be 1 or 0")
return reward
def reset(self, state_variables):
self.state = state_variables
return self.state