From 3dee02cafdc0c0bdf387b0bbab566426c51601df Mon Sep 17 00:00:00 2001 From: ceyda <15624271+cceyda@users.noreply.github.com> Date: Tue, 6 Apr 2021 23:50:00 +0000 Subject: [PATCH] fix attention flag --- README.md | 10 +++++++--- preprocessor_config.json | 2 +- pytorch_model.bin | 2 +- tokenizer_config.json | 2 +- vocab.json | 40 +--------------------------------------- 5 files changed, 11 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index c67f30f..601f0de 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ model-index: metrics: - name: Test WER type: wer - value: 24.91 + value: 22.60 --- # Wav2Vec2-Base-760-Turkish @@ -102,11 +102,13 @@ test_dataset = test_dataset.map(speech_file_to_array_fn) # Preprocessing the datasets. # We need to read the aduio files as arrays + +#Attention mask is not used because the base-model was not trained with it. reference: https://github.com/huggingface/transformers/blob/403d530eec105c0e229fc2b754afdf77a4439def/src/transformers/models/wav2vec2/tokenization_wav2vec2.py#L305 def evaluate(batch): inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True) with torch.no_grad(): - logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits + logits = model(inputs.input_values.to("cuda")).logits pred_ids = torch.argmax(logits, dim=-1) batch["pred_strings"] = processor.batch_decode(pred_ids,skip_special_tokens=True) @@ -117,7 +119,9 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8) print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"]))) ``` -**Test Result**: 24.91 % (in progress) +**Test Results**: +- WER: 22.602390 +- CER: 6.054137 ## Training diff --git a/preprocessor_config.json b/preprocessor_config.json index 0886a48..2120dc9 100644 --- a/preprocessor_config.json +++ b/preprocessor_config.json @@ -2,7 +2,7 @@ "do_normalize": true, "feature_size": 1, "padding_side": "right", - "padding_value": 0.0, + "padding_value": 0, "return_attention_mask": true, "sampling_rate": 16000 } diff --git a/pytorch_model.bin b/pytorch_model.bin index 72adfa1..0c98a17 100644 --- a/pytorch_model.bin +++ b/pytorch_model.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7ac7eaad990b15315d1772928ea15b9c77d2e259311b5189f9772b04da157294 +oid sha256:adeefd83b89a25212c0d6c74b43b28e367e54cc7fbce63599927f7bc6d2b8ae9 size 377691502 diff --git a/tokenizer_config.json b/tokenizer_config.json index 42e4a14..43772fe 100644 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -1 +1 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "do_lower_case": false, "word_delimiter_token": "|","special_tokens_map_file": "/home/ceyda/workspace/libs/fairseq/hf_finetuned_output/special_tokens_map.json", "tokenizer_file": null} \ No newline at end of file +{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "do_lower_case": false, "word_delimiter_token": "|"} \ No newline at end of file diff --git a/vocab.json b/vocab.json index 2edf7b2..071cf4d 100644 --- a/vocab.json +++ b/vocab.json @@ -1,39 +1 @@ -{"|": 4, - "p": 5, - "i": 6, - "r": 7, - "n": 8, - "s": 9, - "ö": 10, - "z": 11, - "l": 12, - "e": 13, - "h": 14, - "â": 15, - "y": 16, - "a": 17, - "k": 18, - "ı": 19, - "o": 20, - "m": 21, - "ü": 22, - "g": 23, - "c": 24, - "b": 25, - "ş": 26, - "d": 27, - "u": 28, - "t": 29, - "ç": 30, - "ğ": 31, - "v": 32, - "f": 33, - "j": 34, - "x": 35, - "w": 36, - "q": 37, - "î": 38, - "": 0, - "": 1, - "": 2, - "": 3} +{"": 0, "": 1, "": 2, "": 3, "|": 4, "p": 5, "i": 6, "r": 7, "n": 8, "s": 9, "ö": 10, "z": 11, "l": 12, "e": 13, "h": 14, "â": 15, "y": 16, "a": 17, "k": 18, "ı": 19, "o": 20, "m": 21, "ü": 22, "g": 23, "c": 24, "b": 25, "ş": 26, "d": 27, "u": 28, "t": 29, "ç": 30, "ğ": 31, "v": 32, "f": 33, "j": 34, "x": 35, "w": 36, "q": 37, "î": 38} \ No newline at end of file