Naive-Framework/facade.py at main · NTUT-RL-Lab/Naive-Framework · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from typing import Any
import numpy as np
import gymnasium as gym
from typing import List
from gymnasium import Env, logger, Wrapper
from matplotlib.pylab import f
from director import Director
from guise import Guise


class Facade(Wrapper):
    """Facade class to wrap the environment and provide a single interface to the agent
    """

    def __init__(self, envs: List[Guise], director: Director) -> None:
        """Constructor for the Facade class
        Args:
            envs (List[Env]): List of environments to be used
        """
        self.index = 0
        self.envs = envs
        if (len(envs) == 0):
            raise ValueError("No envs provided 😫")
        for env in envs:
            env.reset()
        self.env = envs[0]
        self.director = director
        self.blend = director.blend
        self._reward_space = gym.spaces.Box(
            low=0.0, high=1.0, shape=(1, ), dtype=np.float32)
        super().__init__(envs[0])

    @property
    def reward_space(self):
        return self._reward_space

    def switch_env(self, index: int) -> None:
        """Switches the environment to the one at the index
        Args:
            index (int): Index of the environment to switch to
        """
        if (index < 0 or index >= len(self.envs)):
            raise ValueError("Invalid index provided")
        if (index == self.index):
            return
        # logger.info(f"Switching to env {index}")
        self.index = index
        self.env = self.envs[index]

    def step(self, action):
        """Step function to step the environment
        """

        if self.blend:
            observations, rewards, terminateds, truncateds, infos = [], [], [], [], []
            for index in range(len(self.envs)):
                self.switch_env(index)
                observation, reward, terminated, truncated, info = super().step(
                    self.env.map_action(action))
                observations.append(observation)
                rewards.append(self.env.reward(reward))
                terminateds.append(terminated)
                truncateds.append(truncated)
                infos.append(info)
            observation = np.mean(observations, axis=0)
            reward = np.max(rewards)
            terminated = np.min(terminateds) != 0
            truncated = np.min(truncateds) != 0
            info = {k: np.mean([i[k] for i in infos]) for k in infos[0]}
            return observation, reward, terminated, truncated, info
        observation, reward, terminated, truncated, info = super().step(
            self.env.map_action(action))
        # apply reward weights
        reward = self.env.reward(reward)

        index,  = self.director.update(
            observation, reward, terminated, truncated, info)
        self.switch_env(index)
        return observation, reward, terminated, truncated, info

    def reset(self, *, seed: int | None = None, options: dict[str, Any] | None = None):
        if self.blend:
            observations = []
            infos = []
            for index in range(len(self.envs)):
                self.switch_env(index)
                obs, info = super().reset(seed=seed, options=options)
                observations.append(obs)
                infos.append(info)
            return np.mean(observations, axis=0), {k: np.mean([i[k] for i in infos]) for k in infos[0]}

        observation, infos = super().reset(seed=seed, options=options)
        return observation, infos

    def close(self):
        if self.blend:
            for env in self.envs:
                env.close()
            return
        return super().close()