all.memory

class all.memory.ExperienceReplayBuffer(size, device=torch.device)

Bases: all.memory.replay_buffer.ReplayBuffer

sample(batch_size)

Sample from the stored transitions

store(state, action, next_state)

Store the transition in the buffer

update_priorities(td_errors)

Update priorities based on the TD error

class all.memory.GeneralizedAdvantageBuffer(v, features, n_steps, n_envs, discount_factor=1, lam=1)

Bases: object

advantages(states)
store(states, actions, rewards)
class all.memory.NStepAdvantageBuffer(v, features, n_steps, n_envs, discount_factor=1)

Bases: object

advantages(states)
store(states, actions, rewards)
class all.memory.NStepReplayBuffer(steps, discount_factor, buffer)

Bases: all.memory.replay_buffer.ReplayBuffer

Converts any ReplayBuffer into an NStepReplayBuffer

sample(*args, **kwargs)

Sample from the stored transitions

store(state, action, next_state)

Store the transition in the buffer

update_priorities(*args, **kwargs)

Update priorities based on the TD error

class all.memory.PrioritizedReplayBuffer(buffer_size, alpha=0.6, beta=0.4, epsilon=1e-05, device=torch.device)

Bases: all.memory.replay_buffer.ExperienceReplayBuffer, all.optim.scheduler.Schedulable

sample(batch_size)

Sample from the stored transitions

store(state, action, next_state)

Store the transition in the buffer

update_priorities(priorities)

Update priorities based on the TD error

class all.memory.ReplayBuffer

Bases: abc.ABC

abstract sample(batch_size)

Sample from the stored transitions

abstract store(state, action, reward, next_state)

Store the transition in the buffer

abstract update_priorities(indexes, td_errors)

Update priorities based on the TD error