mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-08 07:10:32 +00:00
DatasetDict to dataset object.
This commit is contained in:
parent
6a9b3fc3f7
commit
157bb8f602
2
data.py
2
data.py
@ -68,7 +68,7 @@ def load_data(config, tokenizer):
|
|||||||
dataset = load_dataset("json", data_files=files, split="train")
|
dataset = load_dataset("json", data_files=files, split="train")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
dataset = load_dataset(dataset_path)
|
dataset = load_dataset(dataset_path,split='train')
|
||||||
|
|
||||||
dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])
|
dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user