Commit d79659ab authored by Antonio Ruiz's avatar Antonio Ruiz
Browse files

tests with one sequence.

parent d36bcf31
Loading
Loading
Loading
Loading

configs/one_copy.yaml

0 → 100644
+92 −0
Original line number Diff line number Diff line
name: "copy_experiment"

data:
    src: "src"
    trg: "trg"
    # generate data with scripts/generate_copy_task.py
    train: "test/data/one_copy/train"
    dev: "test/data/one_copy/dev"
    test: "test/data/one_copy/test"
    level: "word"
    lowercase: False
    max_sent_length: 25
    src_voc_min_freq: 0
    src_voc_limit: 100
    trg_voc_min_freq: 0
    trg_voc_limit: 100
    #src_vocab: "one_copy_model/src_vocab.txt"
    #trg_vocab: "one_copy_model/trg_vocab.txt"

testing:
    beam_size: 1
    alpha: -1.0

training:
    random_seed: 42
    optimizer: "adam"
    learning_rate: 0.001
    learning_rate_min: 0.0002
    weight_decay: 0.0
    clip_grad_norm: 1.0
    batch_size: 1
    batch_type: "sentence"
    scheduling: "plateau"
    patience: 5
    decrease_factor: 0.5
    early_stopping_metric: "eval_metric"
    epochs: 100
    validation_freq: 10
    logging_freq: 10
    eval_metric: "bleu"
    model_dir: "one_copy_model"
    overwrite: True
    shuffle: True
    use_cuda: False
    max_output_length: 10
    print_valid_sents: [0, 3, 6]
    keep_last_ckpts: 2

model:
    initializer: "xavier"
    embed_initializer: "normal"
    embed_init_weight: 0.1
    bias_initializer: "zeros"
    init_rnn_orthogonal: False
    lstm_forget_gate: 0.
    encoder:
        rnn_type: "lstm"
        embeddings:
            embedding_dim: 16
            scale: False
        hidden_size: 24
        bidirectional: True
        dropout: 0.1
        num_layers: 1
    decoder:
        rnn_type: "lstm"
        embeddings:
            embedding_dim: 16
            scale: False
        hidden_size: 24
        dropout: 0.1
        hidden_dropout: 0.1
        num_layers: 1
        input_feeding: True
        init_hidden: "bridge"
        attention: "luong"
dqn:
    epochs: 5000
    sample_size: 32
    lr: 0.00005
    egreed_max: 0.9
    egreed_min: 0.001
    gamma_max: 0.9
    gamma_min: 0.3
    nu_iter: 10
    mem_cap: 110
    beam_min: 1
    beam_max: 50
    state_type: 'hidden'
    reward_type: 'bleu_fin'
    nu_pretrain: 0
    other_descrip: 'debug_one_copy_sample_bleu_with_smooth'
 No newline at end of file
+92 −0
Original line number Diff line number Diff line
name: "reverse_experiment"

data:
    src: "src"
    trg: "trg"
    # generate data with scripts/generate_reverse_task.py
    train: "test/data/one_reverse/train"
    dev: "test/data/one_reverse/dev"
    test: "test/data/one_reverse/test"
    level: "word"
    lowercase: False
    max_sent_length: 25
    src_voc_min_freq: 0
    src_voc_limit: 100
    trg_voc_min_freq: 0
    trg_voc_limit: 100
    #src_vocab: "one_reverse_model/src_vocab.txt"
    #trg_vocab: "one_reverse_model/trg_vocab.txt"

testing:
    beam_size: 1
    alpha: -1.0

training:
    random_seed: 42
    optimizer: "adam"
    learning_rate: 0.001
    learning_rate_min: 0.0002
    weight_decay: 0.0
    clip_grad_norm: 1.0
    batch_size: 1
    batch_type: "sentence"
    scheduling: "plateau"
    patience: 5
    decrease_factor: 0.5
    early_stopping_metric: "eval_metric"
    epochs: 100
    validation_freq: 10
    logging_freq: 10
    eval_metric: "bleu"
    model_dir: "one_reverse_model"
    overwrite: True
    shuffle: True
    use_cuda: False
    max_output_length: 10
    print_valid_sents: [0, 3, 6]
    keep_last_ckpts: 2

model:
    initializer: "xavier"
    embed_initializer: "normal"
    embed_init_weight: 0.1
    bias_initializer: "zeros"
    init_rnn_orthogonal: False
    lstm_forget_gate: 0.
    encoder:
        rnn_type: "lstm"
        embeddings:
            embedding_dim: 16
            scale: False
        hidden_size: 24
        bidirectional: True
        dropout: 0.1
        num_layers: 1
    decoder:
        rnn_type: "lstm"
        embeddings:
            embedding_dim: 16
            scale: False
        hidden_size: 24
        dropout: 0.1
        hidden_dropout: 0.1
        num_layers: 1
        input_feeding: True
        init_hidden: "bridge"
        attention: "luong"
dqn:
    epochs: 5000
    sample_size: 32
    lr: 0.00005
    egreed_max: 0.9
    egreed_min: 0.001
    gamma_max: 0.9
    gamma_min: 0.3
    nu_iter: 10
    mem_cap: 110
    beam_min: 1
    beam_max: 50
    state_type: 'hidden'
    reward_type: 'bleu_fin'
    nu_pretrain: 0
    other_descrip: 'debug_one_sample'
 No newline at end of file
+49 −31
Original line number Diff line number Diff line
@@ -234,7 +234,8 @@ class QManager(object):
        :param rew: rewards for every experince. Of lenght of the hypotesis
        """
        for epoch_no in range(self.epochs):
            print("EPOCH %d", epoch_no + 1)

            #print("EPOCH %d", epoch_no + 1)
            
            #beam_dqn = self.beam_min + int(self.beam_max * epoch_no/self.epochs)
            #egreed = self.egreed_max*(1 - epoch_no/(1.1*self.epochs))
@@ -272,7 +273,7 @@ class QManager(object):
            self.tb_writer.add_scalar("parameters/gamma",
                                              self.gamma, epoch_no)
            
            print(' beam_dqn, egreed, gamma: ', beam_dqn, egreed, self.gamma)
            #print(' beam_dqn, egreed, gamma: ', beam_dqn, egreed, self.gamma)
            for _, data_set in self.data_to_train_dqn.items():
                
                valid_iter = make_data_iter(
@@ -321,7 +322,7 @@ class QManager(object):
                    # * Defining it as zeros:____------------------------------------


                    prev_att_vector = encoder_output.new_zeros([batch_size, 1, self.state_size])
                    prev_att_vector = encoder_output.new_zeros([batch_size, 1, int (self.state_size/2)])

                    #----------------------------------------------------------------
                    #We can try 2 options (using a state 0 from attention):
@@ -395,7 +396,7 @@ class QManager(object):
                        is_eos = torch.eq(next_word, self.eos_index)
                        finished += is_eos

                        if t != 0:
                        if t > 0:
                            tup = (self.memory_counter, state, a, state_, a_, 1)
                            exp_list.append(tup)
                            self.memory_counter += 1
@@ -426,8 +427,6 @@ class QManager(object):
                    else:
                        r = self.Reward(batch.trg_input, hyp, show=False)  # 1 , time -1 



                    if i_sample < 3:
                        self.store_transition(exp_list, r, show = True)
                    else:
@@ -461,8 +460,8 @@ class QManager(object):
            #testing the preformace of the network
            if self.learn_step_counter == 0:
                print('As referece this first test on dev data. Is maded with the Q networks, initialized randomly : ' )
            else:
                print("\n Lets copy the Q-value Net in to Q-target net!. And test the performace on the dev data: ")
            #else:
            #    print("\n Lets copy the Q-value Net in to Q-target net!. And test the performace on the dev data: ")
            
            current_bleu = self.dev_network()
            self.bleu_list += [current_bleu]
@@ -473,10 +472,14 @@ class QManager(object):

        
        long_Batch = self.sample_size*3
        #print ("long_Batch: ",long_Batch)

        # Sampling the higgest rewards values
        b_memory_big = self.memory[np.argsort(-self.memory[:-self.max_output_length, self.state_size+1])][:long_Batch]
        
        sample_index = np.random.choice(long_Batch, self.sample_size)
        #print (" b_memory_big: ", b_memory_big.shape)
        #print ("sample_index: ", sample_index.shape)
        b_memory = b_memory_big[sample_index, :]

        b_s = torch.FloatTensor(b_memory[:, :self.state_size])
@@ -496,18 +499,21 @@ class QManager(object):
        # taking the most likely action.
        # use the hyperparameter nu_pretrain to take the true action
        # or the one take from the one computed from the q_target
        if self.learn_step_counter % 50 == 1:
        #if self.learn_step_counter % 50 == 1:

        print ("learn step counter: ", self.learn_step_counter)
        print ("dev_network_count: ", self.dev_network_count )

        if self.learn_step_counter < self.nu_pretrain:
            if self.learn_step_counter == 1:
                print ("Using pretraining...")
            b_a_ = torch.LongTensor(b_memory[:, self.state_size+2 + self.state_size]).view(self.sample_size, 1)
        else:
            if self.learn_step_counter == self.nu_pretrain:
                print ("Starting using Q target net....")
        #if self.learn_step_counter < self.nu_pretrain:
        #    if self.learn_step_counter == 1:
        #        print ("Using pretraining...")
        #    b_a_ = torch.LongTensor(b_memory[:, self.state_size+2 + self.state_size]).view(self.sample_size, 1)
        #else:
        #   if self.learn_step_counter == self.nu_pretrain:
        #   print ("Starting using Q target net....")

        b_a_ = torch.LongTensor(q_next.max(1)[1].view(self.sample_size, 1).long())

        #b_a_ = q_next.max(1)[0].view(self.sample_size, 1).long()   # shape (batch, 1)
        q_eval_next = self.eval_net(b_s_).gather(1, b_a_)   # shape (batch, 1)
        
@@ -522,7 +528,10 @@ class QManager(object):
        #print(soft_func(a))
        q_eval_all = self.eval_net(b_s)
        entro = entropy(soft_func(q_eval_all.detach()).T, base=self.actions_size)
        print("entropy: ", entro)

        #if self.learn_step_counter % 50 == 1:
        #print("entropy: ", entro)

        aver_entro = entro.sum()/self.sample_size
        self.tb_writer.add_scalar("learn/q_eval_entropy",
                        aver_entro, self.learn_step_counter)
@@ -566,8 +575,10 @@ class QManager(object):
            
            r = rew[i]
            transition = np.hstack((state, [a, r], state_, a_,  is_eos))
            if show == True:
                print(index, a,r,a_,is_eos, ' ... s[:3]: ', state[:3], ' ... s_[:5]: ', state_[:3], )

            #if show == True:
            #    print(index, a,r,a_,is_eos, ' ... s[:3]: ', state[:3], ' ... s_[:5]: ', state_[:3], )

            self.memory[index, :] = transition


@@ -948,6 +959,10 @@ class QManager(object):
                                                     , valid_references[0], smooth_method='floor', smooth_value=smooth
                                                     , use_effective_order=True).score



        #current_valid_score = sacrebleu.raw_corpus_bleu(sys_stream=valid_hypotheses, ref_streams=[valid_references]).score

        final_rew[-1] = current_valid_score
        if show:
            print("\n Sample-------------Target vs Eval_net prediction:--Raw---and---Decoded-----")
@@ -1028,8 +1043,9 @@ class QManager(object):
                        state = torch.cat(hidden, dim=2).squeeze(1).detach().cpu()[0]
                    else:
                        state = torch.FloatTensor(prev_att_vector.squeeze(1).detach().cpu().numpy()[0])
                    if i_sample < 3:
                        print('So far: ', output, ' the state[:3] is: ', state[:3])

                    #if i_sample < 3:
                    #    print('So far: ', output, ' the state[:3] is: ', state[:3])

                    logits = self.eval_net(state)
                    batch_size_aux =  len(logits)
@@ -1113,8 +1129,10 @@ class QManager(object):
            valid_hypotheses = [join_char.join(t) for t in decoded_valid]

            #print('On dataset: ', data_set.trg[:10])
            print('valid_references: ',valid_references[:10])
            print('valid_hypotheses: ',valid_hypotheses[:10])
            print('On ', data_set_name)
            print('valid_references \t vs \t predicted_hypotheses')
            for i in np.arange(min(10,len(valid_references))):
                print(valid_references[i], '\t vs \t', valid_hypotheses[i])

            # post-process
            if self.level == "bpe":

log_1155.log

0 → 100644
+17319 −0

File added.

File size exceeds preview limit.

log_1203.log

0 → 100644
+16485 −0

File added.

File size exceeds preview limit.

Loading