mirror of
https://github.com/GammaTauAI/reflexion-human-eval
synced 2024-11-13 13:10:26 +00:00
146 lines
7.6 KiB
YAML
146 lines
7.6 KiB
YAML
dataset:
|
|
data_path: '$ALFWORLD_DATA/json_2.1.1/train'
|
|
eval_id_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_seen' # null/None to disable
|
|
eval_ood_data_path: '$ALFWORLD_DATA/json_2.1.1/valid_unseen' # null/None to disable
|
|
num_train_games: -1 # max training games (<=0 indicates full dataset)
|
|
num_eval_games: -1 # max evaluation games (<=0 indicates full dataset)
|
|
|
|
logic:
|
|
domain: '$ALFWORLD_DATA/logic/alfred.pddl' # PDDL domain file that defines the world dynamics
|
|
grammar: '$ALFWORLD_DATA/logic/alfred.twl2' # Grammar file that defines the text feedbacks
|
|
|
|
env:
|
|
type: 'AlfredTWEnv' # 'AlfredTWEnv' or 'AlfredThorEnv' or 'AlfredHybrid'
|
|
regen_game_files: False # check if game is solvable by expert and save to game.tw-pddl file
|
|
domain_randomization: False # shuffle Textworld print order and object id nums
|
|
task_types: [1, 2, 3, 4, 5, 6] # task-type ids: 1 - Pick & Place, 2 - Examine in Light, 3 - Clean & Place, 4 - Heat & Place, 5 - Cool & Place, 6 - Pick Two & Place
|
|
expert_timeout_steps: 150 # max steps before timeout for expert to solve the task
|
|
expert_type: "handcoded" # 'handcoded' or 'downward'. Note: the downward planner is very slow for real-time use
|
|
goal_desc_human_anns_prob: 0.0 # prob of using human-annotated goal language instead of templated goals (1.0 indicates all human annotations from ALFRED)
|
|
|
|
hybrid:
|
|
start_eps: 100000 # starting episode of hybrid training, tw-only training upto this point
|
|
thor_prob: 0.5 # prob of AlfredThorEnv during hybrid training
|
|
eval_mode: "tw" # 'tw' or 'thor' - env used for evaluation during hybrid training
|
|
|
|
thor:
|
|
screen_width: 300 # width of THOR window
|
|
screen_height: 300 # height of THOR window
|
|
smooth_nav: False # smooth rotations, looks, and translations during navigation (very slow)
|
|
save_frames_to_disk: False # save frame PNGs to disk (useful for making videos)
|
|
save_frames_path: './videos/' # path to save frame PNGs
|
|
|
|
controller:
|
|
type: 'oracle' # 'oracle' or 'oracle_astar' or 'mrcnn' or 'mrcnn_astar' (aka BUTLER)
|
|
debug: False
|
|
load_receps: True # load receptacle locations from precomputed dict (if available)
|
|
|
|
mask_rcnn:
|
|
pretrained_model_path: '$ALFWORLD_DATA/detectors/mrcnn.pth'
|
|
|
|
general:
|
|
random_seed: 42
|
|
use_cuda: True # disable this when running on machine without cuda
|
|
visdom: False # plot training/eval curves, run with visdom server
|
|
task: 'alfred'
|
|
training_method: 'dagger' # 'dqn' or 'dagger'
|
|
save_path: './training/' # path to save pytorch models
|
|
observation_pool_capacity: 3 # k-size queue, 0 indicates no observation
|
|
hide_init_receptacles: False # remove initial observation containing navigable receptacles
|
|
|
|
training:
|
|
batch_size: 10
|
|
max_episode: 50000
|
|
smoothing_eps: 0.1
|
|
optimizer:
|
|
learning_rate: 0.001
|
|
clip_grad_norm: 5
|
|
|
|
evaluate:
|
|
run_eval: True
|
|
batch_size: 10
|
|
env:
|
|
type: "AlfredTWEnv"
|
|
|
|
checkpoint:
|
|
report_frequency: 1000 # report every N episode
|
|
experiment_tag: 'test' # name of experiment
|
|
load_pretrained: False # during test, enable this so that the agent load your pretrained model
|
|
load_from_tag: 'not loading anything' # name of pre-trained model to load in save_path
|
|
|
|
model:
|
|
encoder_layers: 1
|
|
decoder_layers: 1
|
|
encoder_conv_num: 5
|
|
block_hidden_dim: 64
|
|
n_heads: 1
|
|
dropout: 0.1
|
|
block_dropout: 0.1
|
|
recurrent: True
|
|
|
|
rl:
|
|
action_space: "admissible" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'beam_search_choice' or 'exhaustive' (not working)
|
|
max_target_length: 20 # max token length for seq2seq generation
|
|
beam_width: 10 # 1 means greedy
|
|
generate_top_k: 3
|
|
|
|
training:
|
|
max_nb_steps_per_episode: 50 # terminate after this many steps
|
|
learn_start_from_this_episode: 0 # delay updates until this epsiode
|
|
target_net_update_frequency: 500 # sync target net with online net per this many epochs
|
|
|
|
replay:
|
|
accumulate_reward_from_final: True
|
|
count_reward_lambda: 0.0 # 0 to disable
|
|
novel_object_reward_lambda: 0.0 # 0 to disable
|
|
discount_gamma_game_reward: 0.9
|
|
discount_gamma_count_reward: 0.5
|
|
discount_gamma_novel_object_reward: 0.5
|
|
replay_memory_capacity: 500000 # adjust this depending on your RAM size
|
|
replay_memory_priority_fraction: 0.5
|
|
update_per_k_game_steps: 5
|
|
replay_batch_size: 64
|
|
multi_step: 3
|
|
replay_sample_history_length: 4
|
|
replay_sample_update_from: 2
|
|
|
|
epsilon_greedy:
|
|
noisy_net: False # if this is true, then epsilon greedy is disabled
|
|
epsilon_anneal_episodes: 1000 # -1 if not annealing
|
|
epsilon_anneal_from: 0.3
|
|
epsilon_anneal_to: 0.1
|
|
|
|
dagger:
|
|
action_space: "generation" # 'admissible' (candidates from text engine) or 'generation' (seq2seq-style generation) or 'exhaustive' (not working)
|
|
max_target_length: 20 # max token length for seq2seq generation
|
|
beam_width: 10 # 1 means greedy
|
|
generate_top_k: 5
|
|
unstick_by_beam_search: False # use beam-search for failed actions, set True during evaluation
|
|
|
|
training:
|
|
max_nb_steps_per_episode: 50 # terminate after this many steps
|
|
|
|
fraction_assist:
|
|
fraction_assist_anneal_episodes: 50000
|
|
fraction_assist_anneal_from: 1.0
|
|
fraction_assist_anneal_to: 0.01
|
|
|
|
fraction_random:
|
|
fraction_random_anneal_episodes: 0
|
|
fraction_random_anneal_from: 0.0
|
|
fraction_random_anneal_to: 0.0
|
|
|
|
replay:
|
|
replay_memory_capacity: 500000
|
|
update_per_k_game_steps: 5
|
|
replay_batch_size: 64
|
|
replay_sample_history_length: 4
|
|
replay_sample_update_from: 2
|
|
|
|
vision_dagger:
|
|
model_type: "resnet" # 'resnet' (whole image features) or 'maskrcnn_whole' (whole image MaskRCNN feats) or 'maskrcnn' (top k MaskRCNN detection feats) or 'no_vision' (zero vision input)
|
|
resnet_fc_dim: 64
|
|
maskrcnn_top_k_boxes: 10 # top k box features
|
|
use_exploration_frame_feats: False # append feats from initial exploration (memory intensive!)
|
|
sequence_aggregation_method: "average" # 'sum' or 'average' or 'rnn'
|