From d79204c24a2b6cb6d09b19d31f1b45fc40377817 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Tue, 9 Feb 2021 09:11:28 +0000 Subject: [PATCH] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ac21045..fdf4d5b 100644 --- a/README.md +++ b/README.md @@ -32,14 +32,14 @@ The original model can be found under https://github.com/pytorch/fairseq/tree/ma To transcribe audio files the model can be used as a standalone acoustic model as follows: ```python - from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForMaskedLM + from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC from datasets import load_dataset import soundfile as sf import torch # load model and tokenizer tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h") - model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-base-960h") + model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") # define function to read in sound file def map_to_array(batch): @@ -68,7 +68,7 @@ To transcribe audio files the model can be used as a standalone acoustic model a ```python from datasets import load_dataset -from transformers import Wav2Vec2ForMaskedLM, Wav2Vec2Tokenizer +from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer import soundfile as sf import torch from jiwer import wer @@ -76,7 +76,7 @@ from jiwer import wer librispeech_eval = load_dataset("librispeech_asr", "clean", split="test") -model = Wav2Vec2ForMaskedLM.from_pretrained("facebook/wav2vec2-base-960h").to("cuda") +model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h").to("cuda") tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h") def map_to_array(batch):