Loading configs/mini_reverse_100_20_20_5_5.yaml +3 −2 Original line number Diff line number Diff line Loading @@ -86,6 +86,7 @@ dqn: beam_min: 1 beam_max: 50 state_type: 'hidden' reward_type: 'hc' nu_pretrain: 0 reward_type: 'bleu_fin' nu_pretrain: 50 non_stop: False other_descrip: '_with_best_parameters' configs/mini_reverse_1_1_1_5_5.yaml +6 −5 Original line number Diff line number Diff line name: "mini_reverse_2000_200_200_5_5" name: "mini_reverse_1_1_1_5_5" data: src: "src" Loading Loading @@ -37,7 +37,7 @@ training: validation_freq: 10 logging_freq: 10 eval_metric: "bleu" model_dir: "mini_reverse_1_1_1_5_5_model" model_dir: 'mini_reverse_1_1_1_5_5_lr_bleu_fin_model' overwrite: True shuffle: True use_cuda: False Loading Loading @@ -85,7 +85,7 @@ model: dqn: epochs: 5000 sample_size: 64 lr: 0.0001 lr: 0.00001 egreed_max: 0.9 egreed_min: 0.001 gamma: 0.99 Loading @@ -94,8 +94,9 @@ dqn: beam_min: 1 beam_max: 50 state_type: 'attention' reward_type: 'hc_batch' reward_type: 'bleu_fin' nu_pretrain: 0 non_stop: False test_variable: 'lr' test_range: [0.001, 0.0001, 0.00001] test_range: [] other_descrip: 'lr_var' configs/mini_reverse_2000_200_200_5_5.yaml +1 −1 Original line number Diff line number Diff line Loading @@ -82,7 +82,6 @@ model: # test_range: [0.8, 0.9, 0.99] # other_descrip: 'gamma_var' #reward_type: 'bleu_fin' or 'hc_batch' #state_type: 'attention' or 'hidden' dqn: Loading @@ -99,6 +98,7 @@ dqn: state_type: 'attention' reward_type: 'hc_batch' nu_pretrain: 0 non_stop: False test_variable: 'lr' test_range: [0.001, 0.0001, 0.00001] other_descrip: 'lr_var' configs/mini_reverse_gdrive.yamldeleted 100644 → 0 +0 −93 Original line number Diff line number Diff line name: "mini_reverse_batch_experiment" data: src: "src" trg: "trg" # generate data with scripts/generate_reverse_task.py train: "test/data/mini_reverse_0/train" dev: "test/data/mini_reverse_0/dev" test: "test/data/mini_reverse_0/test" level: "word" lowercase: False max_sent_length: 25 src_voc_min_freq: 0 src_voc_limit: 100 trg_voc_min_freq: 0 trg_voc_limit: 100 #src_vocab: "mini_reverse_0/src_vocab.txt" #trg_vocab: "mini_reverse_0/trg_vocab.txt" testing: beam_size: 1 alpha: 1.0 training: random_seed: 42 optimizer: "adam" learning_rate: 0.001 learning_rate_min: 0.0002 weight_decay: 0.0 clip_grad_norm: 1.0 batch_size: 1 batch_type: "sentence" scheduling: "plateau" patience: 5 decrease_factor: 0.5 early_stopping_metric: "eval_metric" epochs: 10 validation_freq: 10 logging_freq: 10 eval_metric: "bleu" model_dir: "/content/gdrive/My Drive/models/batch_V0" overwrite: True shuffle: True use_cuda: False max_output_length: 10 print_valid_sents: [0, 3, 6] keep_last_ckpts: 2 model: initializer: "xavier" embed_initializer: "normal" embed_init_weight: 0.1 bias_initializer: "zeros" init_rnn_orthogonal: False lstm_forget_gate: 0. encoder: rnn_type: "lstm" embeddings: embedding_dim: 16 scale: False hidden_size: 24 bidirectional: True dropout: 0.1 num_layers: 1 decoder: rnn_type: "lstm" embeddings: embedding_dim: 16 scale: False hidden_size: 24 dropout: 0.1 hidden_dropout: 0.1 num_layers: 1 input_feeding: True init_hidden: "bridge" attention: "luong" dqn: epochs: 2000 sample_size: 256 lr: 0.00005 egreed_max: 0.9 egreed_min: 0.001 gamma_max: 0.9 gamma_min: 0.3 nu_iter: 300 mem_cap: 5000 beam_min: 1 beam_max: 50 state_type: 'attention' reward_type: 'hc_batch' nu_pretrain: 40 other_descrip: '_mini_reverse_with_batch_w_attention' configs/mini_reverse_mega_tiny.yamldeleted 100644 → 0 +0 −95 Original line number Diff line number Diff line name: "mini_reverse_100_10_10_5_5" data: src: "src" trg: "trg" # generate data with scripts/generate_reverse_task.py train: "test/data/mini_reverse_100_10_10_5_5/train" dev: "test/data/mini_reverse_100_10_10_5_5/dev" test: "test/data/mini_reverse_100_10_10_5_5/test" level: "word" lowercase: False max_sent_length: 25 src_voc_min_freq: 0 src_voc_limit: 100 trg_voc_min_freq: 0 trg_voc_limit: 100 #src_vocab: "mini_reverse_100_10_10_5_5_model/src_vocab.txt" #trg_vocab: "mini_reverse_100_10_10_5_5_model/trg_vocab.txt" testing: beam_size: 1 alpha: 1.0 training: random_seed: 42 optimizer: "adam" learning_rate: 0.01 learning_rate_min: 0.00002 weight_decay: 0.0 clip_grad_norm: 1.0 batch_size: 10 batch_type: "sentence" scheduling: "plateau" patience: 5 decrease_factor: 0.5 early_stopping_metric: "eval_metric" epochs: 50 validation_freq: 10 logging_freq: 10 eval_metric: "bleu" model_dir: "mini_reverse_100_10_10_5_5_model" # model_dir: "/content/gdrive/My Drive/models/batch_V0" overwrite: True shuffle: True use_cuda: False max_output_length: 10 print_valid_sents: [0, 3, 6] keep_last_ckpts: 2 model: initializer: "xavier" embed_initializer: "normal" embed_init_weight: 0.1 bias_initializer: "zeros" init_rnn_orthogonal: False lstm_forget_gate: 0. encoder: rnn_type: "lstm" embeddings: embedding_dim: 16 scale: False hidden_size: 24 bidirectional: True dropout: 0.1 num_layers: 1 decoder: rnn_type: "lstm" embeddings: embedding_dim: 16 scale: False hidden_size: 24 dropout: 0.1 hidden_dropout: 0.1 num_layers: 1 input_feeding: True init_hidden: "bridge" attention: "luong" dqn: epochs: 1000 sample_size: 256 lr: 0.00005 egreed_max: 0.9 egreed_min: 0.001 gamma_max: 0.9 gamma_min: 0.3 nu_iter: 100 mem_cap: 5000 beam_min: 1 beam_max: 50 state_type: 'attention' test_variable: 'lr' test_range: [0.5, 0.7, 0.4] reward_type: 'hc_batch' nu_pretrain: 0 other_descrip: 'lr__00005_rep_results' Loading
configs/mini_reverse_100_20_20_5_5.yaml +3 −2 Original line number Diff line number Diff line Loading @@ -86,6 +86,7 @@ dqn: beam_min: 1 beam_max: 50 state_type: 'hidden' reward_type: 'hc' nu_pretrain: 0 reward_type: 'bleu_fin' nu_pretrain: 50 non_stop: False other_descrip: '_with_best_parameters'
configs/mini_reverse_1_1_1_5_5.yaml +6 −5 Original line number Diff line number Diff line name: "mini_reverse_2000_200_200_5_5" name: "mini_reverse_1_1_1_5_5" data: src: "src" Loading Loading @@ -37,7 +37,7 @@ training: validation_freq: 10 logging_freq: 10 eval_metric: "bleu" model_dir: "mini_reverse_1_1_1_5_5_model" model_dir: 'mini_reverse_1_1_1_5_5_lr_bleu_fin_model' overwrite: True shuffle: True use_cuda: False Loading Loading @@ -85,7 +85,7 @@ model: dqn: epochs: 5000 sample_size: 64 lr: 0.0001 lr: 0.00001 egreed_max: 0.9 egreed_min: 0.001 gamma: 0.99 Loading @@ -94,8 +94,9 @@ dqn: beam_min: 1 beam_max: 50 state_type: 'attention' reward_type: 'hc_batch' reward_type: 'bleu_fin' nu_pretrain: 0 non_stop: False test_variable: 'lr' test_range: [0.001, 0.0001, 0.00001] test_range: [] other_descrip: 'lr_var'
configs/mini_reverse_2000_200_200_5_5.yaml +1 −1 Original line number Diff line number Diff line Loading @@ -82,7 +82,6 @@ model: # test_range: [0.8, 0.9, 0.99] # other_descrip: 'gamma_var' #reward_type: 'bleu_fin' or 'hc_batch' #state_type: 'attention' or 'hidden' dqn: Loading @@ -99,6 +98,7 @@ dqn: state_type: 'attention' reward_type: 'hc_batch' nu_pretrain: 0 non_stop: False test_variable: 'lr' test_range: [0.001, 0.0001, 0.00001] other_descrip: 'lr_var'
configs/mini_reverse_gdrive.yamldeleted 100644 → 0 +0 −93 Original line number Diff line number Diff line name: "mini_reverse_batch_experiment" data: src: "src" trg: "trg" # generate data with scripts/generate_reverse_task.py train: "test/data/mini_reverse_0/train" dev: "test/data/mini_reverse_0/dev" test: "test/data/mini_reverse_0/test" level: "word" lowercase: False max_sent_length: 25 src_voc_min_freq: 0 src_voc_limit: 100 trg_voc_min_freq: 0 trg_voc_limit: 100 #src_vocab: "mini_reverse_0/src_vocab.txt" #trg_vocab: "mini_reverse_0/trg_vocab.txt" testing: beam_size: 1 alpha: 1.0 training: random_seed: 42 optimizer: "adam" learning_rate: 0.001 learning_rate_min: 0.0002 weight_decay: 0.0 clip_grad_norm: 1.0 batch_size: 1 batch_type: "sentence" scheduling: "plateau" patience: 5 decrease_factor: 0.5 early_stopping_metric: "eval_metric" epochs: 10 validation_freq: 10 logging_freq: 10 eval_metric: "bleu" model_dir: "/content/gdrive/My Drive/models/batch_V0" overwrite: True shuffle: True use_cuda: False max_output_length: 10 print_valid_sents: [0, 3, 6] keep_last_ckpts: 2 model: initializer: "xavier" embed_initializer: "normal" embed_init_weight: 0.1 bias_initializer: "zeros" init_rnn_orthogonal: False lstm_forget_gate: 0. encoder: rnn_type: "lstm" embeddings: embedding_dim: 16 scale: False hidden_size: 24 bidirectional: True dropout: 0.1 num_layers: 1 decoder: rnn_type: "lstm" embeddings: embedding_dim: 16 scale: False hidden_size: 24 dropout: 0.1 hidden_dropout: 0.1 num_layers: 1 input_feeding: True init_hidden: "bridge" attention: "luong" dqn: epochs: 2000 sample_size: 256 lr: 0.00005 egreed_max: 0.9 egreed_min: 0.001 gamma_max: 0.9 gamma_min: 0.3 nu_iter: 300 mem_cap: 5000 beam_min: 1 beam_max: 50 state_type: 'attention' reward_type: 'hc_batch' nu_pretrain: 40 other_descrip: '_mini_reverse_with_batch_w_attention'
configs/mini_reverse_mega_tiny.yamldeleted 100644 → 0 +0 −95 Original line number Diff line number Diff line name: "mini_reverse_100_10_10_5_5" data: src: "src" trg: "trg" # generate data with scripts/generate_reverse_task.py train: "test/data/mini_reverse_100_10_10_5_5/train" dev: "test/data/mini_reverse_100_10_10_5_5/dev" test: "test/data/mini_reverse_100_10_10_5_5/test" level: "word" lowercase: False max_sent_length: 25 src_voc_min_freq: 0 src_voc_limit: 100 trg_voc_min_freq: 0 trg_voc_limit: 100 #src_vocab: "mini_reverse_100_10_10_5_5_model/src_vocab.txt" #trg_vocab: "mini_reverse_100_10_10_5_5_model/trg_vocab.txt" testing: beam_size: 1 alpha: 1.0 training: random_seed: 42 optimizer: "adam" learning_rate: 0.01 learning_rate_min: 0.00002 weight_decay: 0.0 clip_grad_norm: 1.0 batch_size: 10 batch_type: "sentence" scheduling: "plateau" patience: 5 decrease_factor: 0.5 early_stopping_metric: "eval_metric" epochs: 50 validation_freq: 10 logging_freq: 10 eval_metric: "bleu" model_dir: "mini_reverse_100_10_10_5_5_model" # model_dir: "/content/gdrive/My Drive/models/batch_V0" overwrite: True shuffle: True use_cuda: False max_output_length: 10 print_valid_sents: [0, 3, 6] keep_last_ckpts: 2 model: initializer: "xavier" embed_initializer: "normal" embed_init_weight: 0.1 bias_initializer: "zeros" init_rnn_orthogonal: False lstm_forget_gate: 0. encoder: rnn_type: "lstm" embeddings: embedding_dim: 16 scale: False hidden_size: 24 bidirectional: True dropout: 0.1 num_layers: 1 decoder: rnn_type: "lstm" embeddings: embedding_dim: 16 scale: False hidden_size: 24 dropout: 0.1 hidden_dropout: 0.1 num_layers: 1 input_feeding: True init_hidden: "bridge" attention: "luong" dqn: epochs: 1000 sample_size: 256 lr: 0.00005 egreed_max: 0.9 egreed_min: 0.001 gamma_max: 0.9 gamma_min: 0.3 nu_iter: 100 mem_cap: 5000 beam_min: 1 beam_max: 50 state_type: 'attention' test_variable: 'lr' test_range: [0.5, 0.7, 0.4] reward_type: 'hc_batch' nu_pretrain: 0 other_descrip: 'lr__00005_rep_results'