Merge pull request #319 from RahulSChand/warning

Give better error message in Tinystories data loader
This commit is contained in:
Andrej 2023-08-19 10:00:27 -07:00 committed by GitHub
commit d2a546c577
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -196,6 +196,7 @@ class PretokDataset(torch.utils.data.IterableDataset):
shard_filenames = sorted(glob.glob(os.path.join(bin_dir, "*.bin")))
# train/test split. let's use only shard 0 for test split, rest train
shard_filenames = shard_filenames[1:] if self.split == "train" else shard_filenames[:1]
assert len(shard_filenames)>0, f"No bin files found in {bin_dir}"
while True:
rng.shuffle(shard_filenames)
for shard in shard_filenames: