From 060ed34a4a3501c5e8a328c69862871c2e242016 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Fri, 5 Aug 2022 13:29:29 +0000 Subject: [PATCH] Improve README --- README.md | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 2f0800f..f70e522 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,8 @@ The model is trained using a "bipartite matching loss": one compares the predict DETR can be naturally extended to perform panoptic segmentation, by adding a mask head on top of the decoder outputs. +![model image](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/detr_architecture.png) + ## Intended uses & limitations You can use the raw model for panoptic segmentation. See the [model hub](https://huggingface.co/models?search=facebook/detr) to look for all available DETR models. @@ -39,22 +41,36 @@ You can use the raw model for panoptic segmentation. See the [model hub](https:/ Here is how to use this model: ```python -from transformers import DetrFeatureExtractor, DetrForSegmentation -from PIL import Image +import io import requests +from PIL import Image +import torch +import numpy -url = 'http://images.cocodataset.org/val2017/000000039769.jpg' +from transformers import DetrFeatureExtractor, DetrForSegmentation +from transformers.models.detr.feature_extraction_detr import rgb_to_id + +url = "http://images.cocodataset.org/val2017/000000039769.jpg" image = Image.open(requests.get(url, stream=True).raw) -feature_extractor = DetrFeatureExtractor.from_pretrained('facebook/detr-resnet-50-panoptic') -model = DetrForSegmentation.from_pretrained('facebook/detr-resnet-50-panoptic') +feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50-panoptic") +model = DetrForSegmentation.from_pretrained("facebook/detr-resnet-50-panoptic") +# prepare image for the model inputs = feature_extractor(images=image, return_tensors="pt") + +# forward pass outputs = model(**inputs) -# model predicts COCO classes, bounding boxes, and masks -logits = outputs.logits -bboxes = outputs.pred_boxes -masks = outputs.pred_masks + +# use the `post_process_panoptic` method of `DetrFeatureExtractor` to convert to COCO format +processed_sizes = torch.as_tensor(inputs["pixel_values"].shape[-2:]).unsqueeze(0) +result = feature_extractor.post_process_panoptic(outputs, processed_sizes)[0] + +# the segmentation is stored in a special-format png +panoptic_seg = Image.open(io.BytesIO(result["png_string"])) +panoptic_seg = numpy.array(panoptic_seg, dtype=numpy.uint8) +# retrieve the ids corresponding to each mask +panoptic_seg_id = rgb_to_id(panoptic_seg) ``` Currently, both the feature extractor and model support PyTorch.