Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions speechbrain/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ def _train_loader_specifics(self, dataset, loader_kwargs):
elif loader_kwargs.get("batch_sampler") is None:
# no sampler and batch-sampler
self.train_sampler = DistributedSampler(
dataset, rank=self.rank, shuffle=False, drop_last=drop_last
dataset, rank=self.rank, shuffle=True, drop_last=drop_last
)

# with DistributedSamplerWrapper, one must disable shuffling for dataloader
Expand All @@ -765,7 +765,7 @@ def _train_loader_specifics(self, dataset, loader_kwargs):
self.train_sampler = DistributedSamplerWrapper(
loader_kwargs.get("batch_sampler", None),
rank=self.rank,
shuffle=False,
shuffle=True,
)
loader_kwargs["batch_sampler"] = self.train_sampler
elif self.distributed_launch and isinstance(dataset, IterableDataset):
Expand Down
3 changes: 2 additions & 1 deletion speechbrain/dataio/sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,8 @@ def __len__(self):
# Heavily inspired by Catalyst, which is under Apache 2.0 licence.
# https://github.com/catalyst-team/catalyst/blob/51428d7756e62b9b8ee5379f38e9fd576eeb36e5/catalyst/data/sampler.py#L522
class DistributedSamplerWrapper(DistributedSampler):
"""This wrapper allows using any sampler with Distributed Data Parallel (DDP) correctly.
"""This wrapper allows using any sampler (for example batch) with Distributed Data Parallel (DDP)
correctly.

Passing blindly the sampler to each DDP process will cause to have access
within each process to all the data in the dataset instead of only a subset
Expand Down