all.memory

class all.memory.ExperienceReplayBuffer(size, device='cpu', store_device=None)

Bases: ReplayBuffer

sample(batch_size)

Sample from the stored transitions

store(state, action, next_state)

Store the transition in the buffer

update_priorities(td_errors)

Update priorities based on the TD error

class all.memory.GeneralizedAdvantageBuffer(v, features, n_steps, n_envs, discount_factor=1, lam=1, compute_batch_size=256)

Bases: Schedulable

advantages(next_states)
store(states, actions, rewards)
class all.memory.NStepAdvantageBuffer(v, features, n_steps, n_envs, discount_factor=1)

Bases: object

advantages(states)
store(states, actions, rewards)
class all.memory.NStepReplayBuffer(steps, discount_factor, buffer)

Bases: ReplayBuffer

Converts any ReplayBuffer into an NStepReplayBuffer

sample(*args, **kwargs)

Sample from the stored transitions

store(state, action, next_state)

Store the transition in the buffer

update_priorities(*args, **kwargs)

Update priorities based on the TD error

class all.memory.PrioritizedReplayBuffer(buffer_size, alpha=0.6, beta=0.4, epsilon=1e-05, device=torch.device, store_device=None)

Bases: ExperienceReplayBuffer, Schedulable

sample(batch_size)

Sample from the stored transitions

store(state, action, next_state)

Store the transition in the buffer

update_priorities(priorities)

Update priorities based on the TD error

class all.memory.ReplayBuffer

Bases: ABC

abstract sample(batch_size)

Sample from the stored transitions

abstract store(state, action, reward, next_state)

Store the transition in the buffer

abstract update_priorities(indexes, td_errors)

Update priorities based on the TD error