diff --git a/configs/train/finetune_gptj.yaml b/configs/train/finetune_gptj.yaml index f37283b3..aa6bf4a8 100644 --- a/configs/train/finetune_gptj.yaml +++ b/configs/train/finetune_gptj.yaml @@ -2,14 +2,14 @@ model_name: "EleutherAI/gpt-j-6B" tokenizer_name: "EleutherAI/gpt-j-6B" gradient_checkpointing: true -save_name: "nomic-ai/gpt4all-gptj-multinode-deepspeed" +save_name: "nomic-ai/gpt4all-mosaic" # dataset streaming: false num_proc: 64 -dataset_path: "data_multiplus" +dataset_path: "nomic-ai/turbo-500k-multi" max_length: 1024 -batch_size: 32 +batch_size: 16 # train dynamics lr: 2.0e-5 @@ -23,7 +23,7 @@ output_dir: "ckpts/gpt4all-gptj-multinode" checkpoint: null lora: false warmup_steps: 500 -num_epochs: 4 +num_epochs: 2 # logging wandb: true