DatasetDict to dataset object.

This commit is contained in:
Wayner Barrios 2023-04-01 23:52:25 -04:00 committed by GitHub
parent e1357c3720
commit 1a451445a2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -68,7 +68,7 @@ def load_data(config, tokenizer):
dataset = load_dataset("json", data_files=files, split="train") dataset = load_dataset("json", data_files=files, split="train")
else: else:
dataset = load_dataset(dataset_path) dataset = load_dataset(dataset_path,split='train')
dataset = dataset.train_test_split(test_size=.05, seed=config["seed"]) dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])