Update readme, `whisper-large` -> `whisper-large-v2` (#4)
- Update readme, `whisper-large` -> `whisper-large-v2` (6827ce791b91ce7af083878b8a153bcd768c1c74)
This commit is contained in:
parent
dae1ec8f4c
commit
468057c79f
18
README.md
18
README.md
|
@ -174,8 +174,8 @@ The "<|en|>" token is used to specify that the speech is in english and should b
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> # load model and processor
|
>>> # load model and processor
|
||||||
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
||||||
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
||||||
|
|
||||||
>>> # load dummy dataset and read soundfiles
|
>>> # load dummy dataset and read soundfiles
|
||||||
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
|
@ -199,8 +199,8 @@ transcription.
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> # load model and processor
|
>>> # load model and processor
|
||||||
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
||||||
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
||||||
|
|
||||||
>>> # load dummy dataset and read soundfiles
|
>>> # load dummy dataset and read soundfiles
|
||||||
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
||||||
|
@ -227,8 +227,8 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
||||||
>>> import torch
|
>>> import torch
|
||||||
|
|
||||||
>>> # load model and processor
|
>>> # load model and processor
|
||||||
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
||||||
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
||||||
|
|
||||||
>>> # load dummy dataset and read soundfiles
|
>>> # load dummy dataset and read soundfiles
|
||||||
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
||||||
|
@ -245,7 +245,7 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
||||||
|
|
||||||
## Evaluation
|
## Evaluation
|
||||||
|
|
||||||
This code snippet shows how to evaluate **openai/whisper-large** on LibriSpeech's "clean" and "other" test data.
|
This code snippet shows how to evaluate **openai/whisper-large-v2** on LibriSpeech's "clean" and "other" test data.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
|
@ -257,8 +257,8 @@ This code snippet shows how to evaluate **openai/whisper-large** on LibriSpeech'
|
||||||
|
|
||||||
>>> librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
|
>>> librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
|
||||||
|
|
||||||
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large").to("cuda")
|
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2").to("cuda")
|
||||||
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
||||||
|
|
||||||
>>> def map_to_pred(batch):
|
>>> def map_to_pred(batch):
|
||||||
>>> input_features = processor(batch["audio"]["array"], return_tensors="pt").input_features
|
>>> input_features = processor(batch["audio"]["array"], return_tensors="pt").input_features
|
||||||
|
|
Loading…
Reference in New Issue