From 15bc8ce0fb2bd1382576ad71991cd64f58de5475 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 9 Feb 2021 09:11:54 +0000 Subject: [PATCH] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 26d4bcb..54fbb3e 100644 --- a/README.md +++ b/README.md @@ -30,14 +30,14 @@ The original model can be found under https://github.com/pytorch/fairseq/tree/ma To transcribe audio files the model can be used as a standalone acoustic model as follows: ```python - from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForMaskedLM + from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC from datasets import load_dataset import soundfile as sf import torch # load model and tokenizer tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") - model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") + model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self") # define function to read in sound file def map_to_array(batch): @@ -66,7 +66,7 @@ To transcribe audio files the model can be used as a standalone acoustic model a ```python from datasets import load_dataset -from transformers import Wav2Vec2ForMaskedLM, Wav2Vec2Tokenizer +from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer import soundfile as sf import torch from jiwer import wer @@ -74,7 +74,7 @@ from jiwer import wer librispeech_eval = load_dataset("librispeech_asr", "clean", split="test") -model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-large-960h-lv60-self").to("cuda") +model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self").to("cuda") tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h-lv60-self") def map_to_array(batch):