Update README.md (#1)
- Update README.md (3af156a76220a22440ec6b0a3e80a91c64a97a0a) Co-authored-by: Younes Belkada <ybelkada@users.noreply.huggingface.co>
This commit is contained in:
parent
b9797fcd26
commit
f16db5558f
105
README.md
105
README.md
|
@ -43,4 +43,107 @@ fine-tuned versions on a task that interests you.
|
|||
|
||||
### How to use
|
||||
|
||||
For code examples, we refer to the [documentation](https://huggingface.co/docs/transformers/main/en/model_doc/blip-2#transformers.Blip2ForConditionalGeneration.forward.example).
|
||||
For code examples, we refer to the [documentation](https://huggingface.co/docs/transformers/main/en/model_doc/blip-2#transformers.Blip2ForConditionalGeneration.forward.example), or refer to the snippets below depending on your usecase:
|
||||
|
||||
#### Running the model on CPU
|
||||
|
||||
<details>
|
||||
<summary> Click to expand </summary>
|
||||
|
||||
```python
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import BlipProcessor, Blip2ForConditionalGeneration
|
||||
|
||||
processor = BlipProcessor.from_pretrained("Salesforce/blip2-flan-t5-xxl")
|
||||
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xxl")
|
||||
|
||||
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
||||
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
||||
|
||||
question = "how many dogs are in the picture?"
|
||||
inputs = processor(raw_image, question, return_tensors="pt")
|
||||
|
||||
out = model.generate(**inputs)
|
||||
print(processor.decode(out[0], skip_special_tokens=True))
|
||||
```
|
||||
</details>
|
||||
|
||||
#### Running the model on GPU
|
||||
|
||||
##### In full precision
|
||||
|
||||
<details>
|
||||
<summary> Click to expand </summary>
|
||||
|
||||
```python
|
||||
# pip install accelerate
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
||||
|
||||
processor = Blip2Processor.from_pretrained("Salesforce/blip2-flan-t5-xxl")
|
||||
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xxl", device_map="auto")
|
||||
|
||||
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
||||
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
||||
|
||||
question = "how many dogs are in the picture?"
|
||||
inputs = processor(raw_image, question, return_tensors="pt").to("cuda")
|
||||
|
||||
out = model.generate(**inputs)
|
||||
print(processor.decode(out[0], skip_special_tokens=True))
|
||||
```
|
||||
</details>
|
||||
|
||||
##### In half precision (`float16`)
|
||||
|
||||
<details>
|
||||
<summary> Click to expand </summary>
|
||||
|
||||
```python
|
||||
# pip install accelerate
|
||||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
||||
|
||||
processor = Bli2pProcessor.from_pretrained("Salesforce/blip2-flan-t5-xxl")
|
||||
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xxl", torch_dtype=torch.float16, device_map="auto")
|
||||
|
||||
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
||||
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
||||
|
||||
question = "how many dogs are in the picture?"
|
||||
inputs = processor(raw_image, question, return_tensors="pt").to("cuda", torch.float16)
|
||||
|
||||
out = model.generate(**inputs)
|
||||
print(processor.decode(out[0], skip_special_tokens=True))
|
||||
```
|
||||
</details>
|
||||
|
||||
##### In 8-bit precision (`int8`)
|
||||
|
||||
<details>
|
||||
<summary> Click to expand </summary>
|
||||
|
||||
```python
|
||||
# pip install accelerate bitsandbytes
|
||||
import torch
|
||||
import requests
|
||||
from PIL import Image
|
||||
from transformers import Blip2Processor, Blip2ForConditionalGeneration
|
||||
|
||||
processor = Bli2pProcessor.from_pretrained("Salesforce/blip2-flan-t5-xxl")
|
||||
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-flan-t5-xxl", load_in_8bit=True, device_map="auto")
|
||||
|
||||
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'
|
||||
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
|
||||
|
||||
question = "how many dogs are in the picture?"
|
||||
inputs = processor(raw_image, question, return_tensors="pt").to("cuda", torch.float16)
|
||||
|
||||
out = model.generate(**inputs)
|
||||
print(processor.decode(out[0], skip_special_tokens=True))
|
||||
```
|
||||
</details>
|
Loading…
Reference in New Issue