Compare commits

..

No commits in common. "6010ab27127f28679da94c183f62db4322210737" and "fbfdfbc59cd2b8d93022654fc9d09cee9a37b073" have entirely different histories.

5 changed files with 311 additions and 389 deletions

View File

@ -1,77 +0,0 @@
---
license: other
tags:
- vision
- image-segmentation
datasets:
- scene_parse_150
widget:
- src: https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000001.jpg
example_title: House
- src: https://huggingface.co/datasets/hf-internal-testing/fixtures_ade20k/resolve/main/ADE_val_00000002.jpg
example_title: Castle
---
# SegFormer (b0-sized) model fine-tuned on ADE20k
SegFormer model fine-tuned on ADE20k at resolution 512x512. It was introduced in the paper [SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) by Xie et al. and first released in [this repository](https://github.com/NVlabs/SegFormer).
Disclaimer: The team releasing SegFormer did not write a model card for this model so this model card has been written by the Hugging Face team.
## Model description
SegFormer consists of a hierarchical Transformer encoder and a lightweight all-MLP decode head to achieve great results on semantic segmentation benchmarks such as ADE20K and Cityscapes. The hierarchical Transformer is first pre-trained on ImageNet-1k, after which a decode head is added and fine-tuned altogether on a downstream dataset.
## Intended uses & limitations
You can use the raw model for semantic segmentation. See the [model hub](https://huggingface.co/models?other=segformer) to look for fine-tuned versions on a task that interests you.
### How to use
Here is how to use this model to classify an image of the COCO 2017 dataset into one of the 1,000 ImageNet classes:
```python
from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
from PIL import Image
import requests
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
inputs = feature_extractor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits # shape (batch_size, num_labels, height/4, width/4)
```
For more code examples, we refer to the [documentation](https://huggingface.co/transformers/model_doc/segformer.html#).
### License
The license for this model can be found [here](https://github.com/NVlabs/SegFormer/blob/master/LICENSE).
### BibTeX entry and citation info
```bibtex
@article{DBLP:journals/corr/abs-2105-15203,
author = {Enze Xie and
Wenhai Wang and
Zhiding Yu and
Anima Anandkumar and
Jose M. Alvarez and
Ping Luo},
title = {SegFormer: Simple and Efficient Design for Semantic Segmentation with
Transformers},
journal = {CoRR},
volume = {abs/2105.15203},
year = {2021},
url = {https://arxiv.org/abs/2105.15203},
eprinttype = {arXiv},
eprint = {2105.15203},
timestamp = {Wed, 02 Jun 2021 11:46:42 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2105-15203.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
```

View File

@ -1,6 +1,6 @@
{ {
"architectures": [ "architectures": [
"SegformerForSemanticSegmentation" "SegFormerForImageSegmentation"
], ],
"attention_probs_dropout_prob": 0.0, "attention_probs_dropout_prob": 0.0,
"classifier_dropout_prob": 0.1, "classifier_dropout_prob": 0.1,
@ -27,310 +27,310 @@
256 256
], ],
"id2label": { "id2label": {
"0": "wall", "0": "LABEL_0",
"1": "building", "1": "LABEL_1",
"2": "sky", "2": "LABEL_2",
"3": "floor", "3": "LABEL_3",
"4": "tree", "4": "LABEL_4",
"5": "ceiling", "5": "LABEL_5",
"6": "road", "6": "LABEL_6",
"7": "bed ", "7": "LABEL_7",
"8": "windowpane", "8": "LABEL_8",
"9": "grass", "9": "LABEL_9",
"10": "cabinet", "10": "LABEL_10",
"11": "sidewalk", "11": "LABEL_11",
"12": "person", "12": "LABEL_12",
"13": "earth", "13": "LABEL_13",
"14": "door", "14": "LABEL_14",
"15": "table", "15": "LABEL_15",
"16": "mountain", "16": "LABEL_16",
"17": "plant", "17": "LABEL_17",
"18": "curtain", "18": "LABEL_18",
"19": "chair", "19": "LABEL_19",
"20": "car", "20": "LABEL_20",
"21": "water", "21": "LABEL_21",
"22": "painting", "22": "LABEL_22",
"23": "sofa", "23": "LABEL_23",
"24": "shelf", "24": "LABEL_24",
"25": "house", "25": "LABEL_25",
"26": "sea", "26": "LABEL_26",
"27": "mirror", "27": "LABEL_27",
"28": "rug", "28": "LABEL_28",
"29": "field", "29": "LABEL_29",
"30": "armchair", "30": "LABEL_30",
"31": "seat", "31": "LABEL_31",
"32": "fence", "32": "LABEL_32",
"33": "desk", "33": "LABEL_33",
"34": "rock", "34": "LABEL_34",
"35": "wardrobe", "35": "LABEL_35",
"36": "lamp", "36": "LABEL_36",
"37": "bathtub", "37": "LABEL_37",
"38": "railing", "38": "LABEL_38",
"39": "cushion", "39": "LABEL_39",
"40": "base", "40": "LABEL_40",
"41": "box", "41": "LABEL_41",
"42": "column", "42": "LABEL_42",
"43": "signboard", "43": "LABEL_43",
"44": "chest of drawers", "44": "LABEL_44",
"45": "counter", "45": "LABEL_45",
"46": "sand", "46": "LABEL_46",
"47": "sink", "47": "LABEL_47",
"48": "skyscraper", "48": "LABEL_48",
"49": "fireplace", "49": "LABEL_49",
"50": "refrigerator", "50": "LABEL_50",
"51": "grandstand", "51": "LABEL_51",
"52": "path", "52": "LABEL_52",
"53": "stairs", "53": "LABEL_53",
"54": "runway", "54": "LABEL_54",
"55": "case", "55": "LABEL_55",
"56": "pool table", "56": "LABEL_56",
"57": "pillow", "57": "LABEL_57",
"58": "screen door", "58": "LABEL_58",
"59": "stairway", "59": "LABEL_59",
"60": "river", "60": "LABEL_60",
"61": "bridge", "61": "LABEL_61",
"62": "bookcase", "62": "LABEL_62",
"63": "blind", "63": "LABEL_63",
"64": "coffee table", "64": "LABEL_64",
"65": "toilet", "65": "LABEL_65",
"66": "flower", "66": "LABEL_66",
"67": "book", "67": "LABEL_67",
"68": "hill", "68": "LABEL_68",
"69": "bench", "69": "LABEL_69",
"70": "countertop", "70": "LABEL_70",
"71": "stove", "71": "LABEL_71",
"72": "palm", "72": "LABEL_72",
"73": "kitchen island", "73": "LABEL_73",
"74": "computer", "74": "LABEL_74",
"75": "swivel chair", "75": "LABEL_75",
"76": "boat", "76": "LABEL_76",
"77": "bar", "77": "LABEL_77",
"78": "arcade machine", "78": "LABEL_78",
"79": "hovel", "79": "LABEL_79",
"80": "bus", "80": "LABEL_80",
"81": "towel", "81": "LABEL_81",
"82": "light", "82": "LABEL_82",
"83": "truck", "83": "LABEL_83",
"84": "tower", "84": "LABEL_84",
"85": "chandelier", "85": "LABEL_85",
"86": "awning", "86": "LABEL_86",
"87": "streetlight", "87": "LABEL_87",
"88": "booth", "88": "LABEL_88",
"89": "television receiver", "89": "LABEL_89",
"90": "airplane", "90": "LABEL_90",
"91": "dirt track", "91": "LABEL_91",
"92": "apparel", "92": "LABEL_92",
"93": "pole", "93": "LABEL_93",
"94": "land", "94": "LABEL_94",
"95": "bannister", "95": "LABEL_95",
"96": "escalator", "96": "LABEL_96",
"97": "ottoman", "97": "LABEL_97",
"98": "bottle", "98": "LABEL_98",
"99": "buffet", "99": "LABEL_99",
"100": "poster", "100": "LABEL_100",
"101": "stage", "101": "LABEL_101",
"102": "van", "102": "LABEL_102",
"103": "ship", "103": "LABEL_103",
"104": "fountain", "104": "LABEL_104",
"105": "conveyer belt", "105": "LABEL_105",
"106": "canopy", "106": "LABEL_106",
"107": "washer", "107": "LABEL_107",
"108": "plaything", "108": "LABEL_108",
"109": "swimming pool", "109": "LABEL_109",
"110": "stool", "110": "LABEL_110",
"111": "barrel", "111": "LABEL_111",
"112": "basket", "112": "LABEL_112",
"113": "waterfall", "113": "LABEL_113",
"114": "tent", "114": "LABEL_114",
"115": "bag", "115": "LABEL_115",
"116": "minibike", "116": "LABEL_116",
"117": "cradle", "117": "LABEL_117",
"118": "oven", "118": "LABEL_118",
"119": "ball", "119": "LABEL_119",
"120": "food", "120": "LABEL_120",
"121": "step", "121": "LABEL_121",
"122": "tank", "122": "LABEL_122",
"123": "trade name", "123": "LABEL_123",
"124": "microwave", "124": "LABEL_124",
"125": "pot", "125": "LABEL_125",
"126": "animal", "126": "LABEL_126",
"127": "bicycle", "127": "LABEL_127",
"128": "lake", "128": "LABEL_128",
"129": "dishwasher", "129": "LABEL_129",
"130": "screen", "130": "LABEL_130",
"131": "blanket", "131": "LABEL_131",
"132": "sculpture", "132": "LABEL_132",
"133": "hood", "133": "LABEL_133",
"134": "sconce", "134": "LABEL_134",
"135": "vase", "135": "LABEL_135",
"136": "traffic light", "136": "LABEL_136",
"137": "tray", "137": "LABEL_137",
"138": "ashcan", "138": "LABEL_138",
"139": "fan", "139": "LABEL_139",
"140": "pier", "140": "LABEL_140",
"141": "crt screen", "141": "LABEL_141",
"142": "plate", "142": "LABEL_142",
"143": "monitor", "143": "LABEL_143",
"144": "bulletin board", "144": "LABEL_144",
"145": "shower", "145": "LABEL_145",
"146": "radiator", "146": "LABEL_146",
"147": "glass", "147": "LABEL_147",
"148": "clock", "148": "LABEL_148",
"149": "flag" "149": "LABEL_149"
}, },
"image_size": 224, "image_size": 224,
"initializer_range": 0.02, "initializer_range": 0.02,
"label2id": { "label2id": {
"airplane": 90, "LABEL_0": 0,
"animal": 126, "LABEL_1": 1,
"apparel": 92, "LABEL_10": 10,
"arcade machine": 78, "LABEL_100": 100,
"armchair": 30, "LABEL_101": 101,
"ashcan": 138, "LABEL_102": 102,
"awning": 86, "LABEL_103": 103,
"bag": 115, "LABEL_104": 104,
"ball": 119, "LABEL_105": 105,
"bannister": 95, "LABEL_106": 106,
"bar": 77, "LABEL_107": 107,
"barrel": 111, "LABEL_108": 108,
"base": 40, "LABEL_109": 109,
"basket": 112, "LABEL_11": 11,
"bathtub": 37, "LABEL_110": 110,
"bed ": 7, "LABEL_111": 111,
"bench": 69, "LABEL_112": 112,
"bicycle": 127, "LABEL_113": 113,
"blanket": 131, "LABEL_114": 114,
"blind": 63, "LABEL_115": 115,
"boat": 76, "LABEL_116": 116,
"book": 67, "LABEL_117": 117,
"bookcase": 62, "LABEL_118": 118,
"booth": 88, "LABEL_119": 119,
"bottle": 98, "LABEL_12": 12,
"box": 41, "LABEL_120": 120,
"bridge": 61, "LABEL_121": 121,
"buffet": 99, "LABEL_122": 122,
"building": 1, "LABEL_123": 123,
"bulletin board": 144, "LABEL_124": 124,
"bus": 80, "LABEL_125": 125,
"cabinet": 10, "LABEL_126": 126,
"canopy": 106, "LABEL_127": 127,
"car": 20, "LABEL_128": 128,
"case": 55, "LABEL_129": 129,
"ceiling": 5, "LABEL_13": 13,
"chair": 19, "LABEL_130": 130,
"chandelier": 85, "LABEL_131": 131,
"chest of drawers": 44, "LABEL_132": 132,
"clock": 148, "LABEL_133": 133,
"coffee table": 64, "LABEL_134": 134,
"column": 42, "LABEL_135": 135,
"computer": 74, "LABEL_136": 136,
"conveyer belt": 105, "LABEL_137": 137,
"counter": 45, "LABEL_138": 138,
"countertop": 70, "LABEL_139": 139,
"cradle": 117, "LABEL_14": 14,
"crt screen": 141, "LABEL_140": 140,
"curtain": 18, "LABEL_141": 141,
"cushion": 39, "LABEL_142": 142,
"desk": 33, "LABEL_143": 143,
"dirt track": 91, "LABEL_144": 144,
"dishwasher": 129, "LABEL_145": 145,
"door": 14, "LABEL_146": 146,
"earth": 13, "LABEL_147": 147,
"escalator": 96, "LABEL_148": 148,
"fan": 139, "LABEL_149": 149,
"fence": 32, "LABEL_15": 15,
"field": 29, "LABEL_16": 16,
"fireplace": 49, "LABEL_17": 17,
"flag": 149, "LABEL_18": 18,
"floor": 3, "LABEL_19": 19,
"flower": 66, "LABEL_2": 2,
"food": 120, "LABEL_20": 20,
"fountain": 104, "LABEL_21": 21,
"glass": 147, "LABEL_22": 22,
"grandstand": 51, "LABEL_23": 23,
"grass": 9, "LABEL_24": 24,
"hill": 68, "LABEL_25": 25,
"hood": 133, "LABEL_26": 26,
"house": 25, "LABEL_27": 27,
"hovel": 79, "LABEL_28": 28,
"kitchen island": 73, "LABEL_29": 29,
"lake": 128, "LABEL_3": 3,
"lamp": 36, "LABEL_30": 30,
"land": 94, "LABEL_31": 31,
"light": 82, "LABEL_32": 32,
"microwave": 124, "LABEL_33": 33,
"minibike": 116, "LABEL_34": 34,
"mirror": 27, "LABEL_35": 35,
"monitor": 143, "LABEL_36": 36,
"mountain": 16, "LABEL_37": 37,
"ottoman": 97, "LABEL_38": 38,
"oven": 118, "LABEL_39": 39,
"painting": 22, "LABEL_4": 4,
"palm": 72, "LABEL_40": 40,
"path": 52, "LABEL_41": 41,
"person": 12, "LABEL_42": 42,
"pier": 140, "LABEL_43": 43,
"pillow": 57, "LABEL_44": 44,
"plant": 17, "LABEL_45": 45,
"plate": 142, "LABEL_46": 46,
"plaything": 108, "LABEL_47": 47,
"pole": 93, "LABEL_48": 48,
"pool table": 56, "LABEL_49": 49,
"poster": 100, "LABEL_5": 5,
"pot": 125, "LABEL_50": 50,
"radiator": 146, "LABEL_51": 51,
"railing": 38, "LABEL_52": 52,
"refrigerator": 50, "LABEL_53": 53,
"river": 60, "LABEL_54": 54,
"road": 6, "LABEL_55": 55,
"rock": 34, "LABEL_56": 56,
"rug": 28, "LABEL_57": 57,
"runway": 54, "LABEL_58": 58,
"sand": 46, "LABEL_59": 59,
"sconce": 134, "LABEL_6": 6,
"screen": 130, "LABEL_60": 60,
"screen door": 58, "LABEL_61": 61,
"sculpture": 132, "LABEL_62": 62,
"sea": 26, "LABEL_63": 63,
"seat": 31, "LABEL_64": 64,
"shelf": 24, "LABEL_65": 65,
"ship": 103, "LABEL_66": 66,
"shower": 145, "LABEL_67": 67,
"sidewalk": 11, "LABEL_68": 68,
"signboard": 43, "LABEL_69": 69,
"sink": 47, "LABEL_7": 7,
"sky": 2, "LABEL_70": 70,
"skyscraper": 48, "LABEL_71": 71,
"sofa": 23, "LABEL_72": 72,
"stage": 101, "LABEL_73": 73,
"stairs": 53, "LABEL_74": 74,
"stairway": 59, "LABEL_75": 75,
"step": 121, "LABEL_76": 76,
"stool": 110, "LABEL_77": 77,
"stove": 71, "LABEL_78": 78,
"streetlight": 87, "LABEL_79": 79,
"swimming pool": 109, "LABEL_8": 8,
"swivel chair": 75, "LABEL_80": 80,
"table": 15, "LABEL_81": 81,
"tank": 122, "LABEL_82": 82,
"television receiver": 89, "LABEL_83": 83,
"tent": 114, "LABEL_84": 84,
"toilet": 65, "LABEL_85": 85,
"towel": 81, "LABEL_86": 86,
"tower": 84, "LABEL_87": 87,
"trade name": 123, "LABEL_88": 88,
"traffic light": 136, "LABEL_89": 89,
"tray": 137, "LABEL_9": 9,
"tree": 4, "LABEL_90": 90,
"truck": 83, "LABEL_91": 91,
"van": 102, "LABEL_92": 92,
"vase": 135, "LABEL_93": 93,
"wall": 0, "LABEL_94": 94,
"wardrobe": 35, "LABEL_95": 95,
"washer": 107, "LABEL_96": 96,
"water": 21, "LABEL_97": 97,
"waterfall": 113, "LABEL_98": 98,
"windowpane": 8 "LABEL_99": 99
}, },
"layer_norm_eps": 1e-06, "layer_norm_eps": 1e-06,
"mlp_ratios": [ "mlp_ratios": [
@ -354,7 +354,6 @@
3, 3,
3 3
], ],
"reshape_last_stage": true,
"sr_ratios": [ "sr_ratios": [
8, 8,
4, 4,
@ -367,6 +366,5 @@
2, 2,
2 2
], ],
"torch_dtype": "float32", "transformers_version": "4.9.0.dev0"
"transformers_version": "4.12.0.dev0"
} }

View File

@ -1,18 +1,22 @@
{ {
"do_normalize": true, "do_normalize": true,
"do_resize": true, "do_resize": true,
"feature_extractor_type": "SegformerFeatureExtractor", "feature_extractor_type": "SegFormerFeatureExtractor",
"image_mean": [ "image_mean": [
0.485, 0.485,
0.456, 0.456,
0.406 0.406
], ],
"image_scale": [
2048,
512
],
"image_std": [ "image_std": [
0.229, 0.229,
0.224, 0.224,
0.225 0.225
], ],
"reduce_labels": true, "keep_ratio": true,
"resample": 2, "resample": 2,
"size": 512 "size_divisor": 32
} }

BIN
pytorch_model.bin (Stored with Git LFS)

Binary file not shown.

BIN
tf_model.h5 (Stored with Git LFS)

Binary file not shown.