-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreplay_buffer.py
More file actions
24 lines (20 loc) · 1006 Bytes
/
replay_buffer.py
File metadata and controls
24 lines (20 loc) · 1006 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import random
import numpy as np
import torch
class ReplayBuffer:
def __init__(self, capacity, seed):
random.seed(seed)
self.capacity = capacity
self.buffer = []
self.position = 0
def push(self, state, action, reward, next_state, done):
if len(self.buffer) < self.capacity:
self.buffer.append(None)
self.buffer[self.position] = (state, action, reward, next_state, done)
self.position = (self.position + 1) % self.capacity
def sample(self, batch_size):
batch = random.sample(self.buffer, batch_size)
state_batch, action_batch, reward_batch, next_state_batch, done_batch = map(np.stack, zip(*batch))
return torch.from_numpy(state_batch), torch.from_numpy(action_batch.reshape((batch_size, 1))), torch.from_numpy(reward_batch.reshape(batch_size, 1)), torch.from_numpy(next_state_batch), torch.from_numpy(done_batch.reshape(batch_size,1))
def __len__(self):
return len(self.buffer)