Upload README.md

2022-07-16 02:43:10 +00:00 · 2022-07-16 02:43:10 +00:00 · 5312b0749f
parent 670752485b
commit 5312b0749f
1 changed files with 44 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,44 @@
 ---
 language: en
 datasets:
 - LIUM/tedlium
 tags:
 - speech
 - audio
 - automatic-speech-recognition
 ---
 Finetuned from [facebook/wav2vec2-large-960h-lv60-self](https://huggingface.co/facebook/wav2vec2-large-960h-lv60-self).
 # Installation
 1. PyTorch installation: https://pytorch.org/
 2. Install transformers: https://huggingface.co/docs/transformers/installation
 e.g., installation by conda
 ```
 >> conda create -n wav2vec2 python=3.8
 >> conda install pytorch cudatoolkit=11.3 -c pytorch
 >> conda install -c conda-forge transformers
 ```
 # Usage
 ```python
 # Load the model and processor
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import numpy as np
 import torch
 model = Wav2Vec2ForCTC.from_pretrained(r'yongjian/wav2vec2-large-a')
 processor = Wav2Vec2Processor.from_pretrained(r'yongjian/wav2vec2-large-a')
 # Load input
 np_wav = np.random.normal(size=(16000)).clip(-1, 1) # change it to your sample
 # Inference
 sample_rate = processor.feature_extractor.sampling_rate
 with torch.no_grad():
    model_inputs = processor(np_wav, sampling_rate=sample_rate, return_tensors="pt", padding=True)
    logits = model(model_inputs.input_values, attention_mask=model_inputs.attention_mask).logits # use .cuda() for GPU acceleration
    pred_ids = torch.argmax(logits, dim=-1).cpu()
    pred_text = processor.batch_decode(pred_ids)
 print('Transcription:', pred_text)
 ```