mirror of
https://github.com/nomic-ai/gpt4all
synced 2024-11-08 07:10:32 +00:00
Slight cleanup of superfluous comment and space after comma
This commit is contained in:
parent
f59ab5153f
commit
b8292dd7d0
3
data.py
3
data.py
@ -57,7 +57,6 @@ def load_data(config, tokenizer):
|
||||
dataset_path = config["dataset_path"]
|
||||
|
||||
if os.path.exists(dataset_path):
|
||||
# check if path is a directory
|
||||
if os.path.isdir(dataset_path):
|
||||
files = glob.glob(os.path.join(dataset_path, "*_clean.jsonl"))
|
||||
else:
|
||||
@ -68,7 +67,7 @@ def load_data(config, tokenizer):
|
||||
dataset = load_dataset("json", data_files=files, split="train")
|
||||
|
||||
else:
|
||||
dataset = load_dataset(dataset_path,split='train')
|
||||
dataset = load_dataset(dataset_path, split='train')
|
||||
|
||||
dataset = dataset.train_test_split(test_size=.05, seed=config["seed"])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user