From 3d0bc0ac664de100cd0c846050571f3c3ac3343b Mon Sep 17 00:00:00 2001
From: ceyda <15624271+cceyda@users.noreply.github.com>
Date: Fri, 2 Apr 2021 18:49:51 +0000
Subject: [PATCH] fairseq tuned

---
 config.json             | 19 +++++++++----------
 pytorch_model.bin       |  4 ++--
 special_tokens_map.json |  2 +-
 tokenizer_config.json   |  2 +-
 vocab.json              | 40 +++++++++++++++++++++++++++++++++++++++-
 5 files changed, 52 insertions(+), 15 deletions(-)
diff --git a/config.json b/config.json
index cf53f72..7275e8e 100644
--- a/config.json
+++ b/config.json
@@ -1,12 +1,11 @@
 {
-  "_name_or_path": "./pretrained/checkpoint_0.60",
   "activation_dropout": 0.1,
   "apply_spec_augment": true,
   "architectures": [
     "Wav2Vec2ForCTC"
   ],
   "attention_dropout": 0.1,
-  "bos_token_id": 1,
+  "bos_token_id": 0,
   "conv_bias": false,
   "conv_dim": [
     512,
@@ -35,17 +34,17 @@
     2,
     2
   ],
-  "ctc_loss_reduction": "mean",
-  "ctc_zero_infinity": true,
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
   "do_stable_layer_norm": false,
   "eos_token_id": 2,
   "feat_extract_activation": "gelu",
   "feat_extract_norm": "group",
   "feat_proj_dropout": 0.1,
   "final_dropout": 0.1,
-  "gradient_checkpointing": true,
+  "gradient_checkpointing": false,
   "hidden_act": "gelu",
-  "hidden_dropout": 0.05,
+  "hidden_dropout": 0.1,
   "hidden_size": 768,
   "initializer_range": 0.02,
   "intermediate_size": 3072,
@@ -54,14 +53,14 @@
   "mask_feature_length": 10,
   "mask_feature_prob": 0.0,
   "mask_time_length": 10,
-  "mask_time_prob": 0.5,
+  "mask_time_prob": 0.05,
   "model_type": "wav2vec2",
   "num_attention_heads": 12,
   "num_conv_pos_embedding_groups": 16,
   "num_conv_pos_embeddings": 128,
   "num_feat_extract_layers": 7,
   "num_hidden_layers": 12,
-  "pad_token_id": 36,
-  "transformers_version": "4.5.0.dev0",
-  "vocab_size": 37
+  "pad_token_id": 1,
+  "transformers_version": "4.4.2",
+  "vocab_size": 39
 }
diff --git a/pytorch_model.bin b/pytorch_model.bin
index 6b20cde..72adfa1 100644
--- a/pytorch_model.bin
+++ b/pytorch_model.bin
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ecb5a521af5105ca67abd19a841c08e97b071a8b69e186b06b25633c6fc36804
-size 377690860
+oid sha256:7ac7eaad990b15315d1772928ea15b9c77d2e259311b5189f9772b04da157294
+size 377691502
diff --git a/special_tokens_map.json b/special_tokens_map.json
index 9abf719..25bc396 100644
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -1 +1 @@
-{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
\ No newline at end of file
+{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
\ No newline at end of file
diff --git a/tokenizer_config.json b/tokenizer_config.json
index a2a8340..42e4a14 100644
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -1 +1 @@
-{"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|"}
\ No newline at end of file
+{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|","special_tokens_map_file": "/home/ceyda/workspace/libs/fairseq/hf_finetuned_output/special_tokens_map.json", "tokenizer_file": null}
\ No newline at end of file
diff --git a/vocab.json b/vocab.json
index 7f974b7..2edf7b2 100644
--- a/vocab.json
+++ b/vocab.json
@@ -1 +1,39 @@
-{"e": 0, "x": 1, "î": 2, "l": 3, "â": 4, "j": 5, "ç": 6, "ş": 7, "g": 8, "ı": 9, "v": 10, "d": 11, "t": 12, "n": 13, "a": 14, "c": 15, "h": 16, "p": 17, "r": 18, "w": 19, "z": 20, "k": 21, "u": 22, "b": 23, "ü": 24, "y": 26, "o": 27, "q": 28, "m": 29, "f": 30, "s": 31, "ö": 32, "ğ": 33, "i": 34, "|": 25, "[UNK]": 35, "[PAD]": 36}
\ No newline at end of file
+{"|": 4,
+ "p": 5,
+ "i": 6,
+ "r": 7,
+ "n": 8,
+ "s": 9,
+ "ö": 10,
+ "z": 11,
+ "l": 12,
+ "e": 13,
+ "h": 14,
+ "â": 15,
+ "y": 16,
+ "a": 17,
+ "k": 18,
+ "ı": 19,
+ "o": 20,
+ "m": 21,
+ "ü": 22,
+ "g": 23,
+ "c": 24,
+ "b": 25,
+ "ş": 26,
+ "d": 27,
+ "u": 28,
+ "t": 29,
+ "ç": 30,
+ "ğ": 31,
+ "v": 32,
+ "f": 33,
+ "j": 34,
+ "x": 35,
+ "w": 36,
+ "q": 37,
+ "î": 38,
+ "<s>": 0,
+ "<pad>": 1,
+ "</s>": 2,
+ "<unk>": 3}