| { | |
| "_name_or_path": "dandelin/vilt-b32-mlm", | |
| "architectures": [ | |
| "ViltForQuestionAnswering" | |
| ], | |
| "attention_probs_dropout_prob": 0.0, | |
| "hidden_act": "gelu", | |
| "hidden_dropout_prob": 0.0, | |
| "hidden_size": 768, | |
| "id2label": { | |
| "0": "woods", | |
| "1": "down", | |
| "2": "table", | |
| "3": "backpack", | |
| "4": "wine", | |
| "5": "sky", | |
| "6": "suv", | |
| "7": "crossing", | |
| "8": "girl", | |
| "9": "shrimp", | |
| "10": "plastic", | |
| "11": "forest", | |
| "12": "double", | |
| "13": "blonde", | |
| "14": "0", | |
| "15": "white and blue", | |
| "16": "woman", | |
| "17": "leather", | |
| "18": "yes", | |
| "19": "shadows", | |
| "20": "3", | |
| "21": "6", | |
| "22": "many", | |
| "23": "cat", | |
| "24": "right", | |
| "25": "7", | |
| "26": "hat", | |
| "27": "picnic table", | |
| "28": "king", | |
| "29": "green", | |
| "30": "chair", | |
| "31": "donut", | |
| "32": "red and blue", | |
| "33": "wedding", | |
| "34": "white", | |
| "35": "wine tasting", | |
| "36": "1", | |
| "37": "gray", | |
| "38": "they aren't", | |
| "39": "8", | |
| "40": "shadow", | |
| "41": "black", | |
| "42": "fashion", | |
| "43": "dog", | |
| "44": "out", | |
| "45": "white and black", | |
| "46": "train", | |
| "47": "ice cream", | |
| "48": "bus", | |
| "49": "birthday", | |
| "50": "queen", | |
| "51": "10", | |
| "52": "cup", | |
| "53": "little girl", | |
| "54": "air", | |
| "55": "no", | |
| "56": "beige", | |
| "57": "bedroom", | |
| "58": "jeep", | |
| "59": "not sure", | |
| "60": "full", | |
| "61": "boy", | |
| "62": "at table", | |
| "63": "watching", | |
| "64": "5", | |
| "65": "wall", | |
| "66": "walking", | |
| "67": "brown", | |
| "68": "human", | |
| "69": "car", | |
| "70": "tired", | |
| "71": "chopsticks", | |
| "72": "park", | |
| "73": "4", | |
| "74": "red and yellow", | |
| "75": "blue and white", | |
| "76": "outside", | |
| "77": "pink", | |
| "78": "doughnut", | |
| "79": "red", | |
| "80": "hawaii", | |
| "81": "2", | |
| "82": "resting", | |
| "83": "crown", | |
| "84": "sun", | |
| "85": "yellow", | |
| "86": "style", | |
| "87": "skateboard" | |
| }, | |
| "image_size": 384, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 3072, | |
| "label2id": { | |
| "0": 14, | |
| "1": 36, | |
| "10": 51, | |
| "2": 81, | |
| "3": 20, | |
| "4": 73, | |
| "5": 64, | |
| "6": 21, | |
| "7": 25, | |
| "8": 39, | |
| "air": 54, | |
| "at table": 62, | |
| "backpack": 3, | |
| "bedroom": 57, | |
| "beige": 56, | |
| "birthday": 49, | |
| "black": 41, | |
| "blonde": 13, | |
| "blue and white": 75, | |
| "boy": 61, | |
| "brown": 67, | |
| "bus": 48, | |
| "car": 69, | |
| "cat": 23, | |
| "chair": 30, | |
| "chopsticks": 71, | |
| "crossing": 7, | |
| "crown": 83, | |
| "cup": 52, | |
| "dog": 43, | |
| "donut": 31, | |
| "double": 12, | |
| "doughnut": 78, | |
| "down": 1, | |
| "fashion": 42, | |
| "forest": 11, | |
| "full": 60, | |
| "girl": 8, | |
| "gray": 37, | |
| "green": 29, | |
| "hat": 26, | |
| "hawaii": 80, | |
| "human": 68, | |
| "ice cream": 47, | |
| "jeep": 58, | |
| "king": 28, | |
| "leather": 17, | |
| "little girl": 53, | |
| "many": 22, | |
| "no": 55, | |
| "not sure": 59, | |
| "out": 44, | |
| "outside": 76, | |
| "park": 72, | |
| "picnic table": 27, | |
| "pink": 77, | |
| "plastic": 10, | |
| "queen": 50, | |
| "red": 79, | |
| "red and blue": 32, | |
| "red and yellow": 74, | |
| "resting": 82, | |
| "right": 24, | |
| "shadow": 40, | |
| "shadows": 19, | |
| "shrimp": 9, | |
| "skateboard": 87, | |
| "sky": 5, | |
| "style": 86, | |
| "sun": 84, | |
| "suv": 6, | |
| "table": 2, | |
| "they aren't": 38, | |
| "tired": 70, | |
| "train": 46, | |
| "walking": 66, | |
| "wall": 65, | |
| "watching": 63, | |
| "wedding": 33, | |
| "white": 34, | |
| "white and black": 45, | |
| "white and blue": 15, | |
| "wine": 4, | |
| "wine tasting": 35, | |
| "woman": 16, | |
| "woods": 0, | |
| "yellow": 85, | |
| "yes": 18 | |
| }, | |
| "layer_norm_eps": 1e-12, | |
| "max_image_length": -1, | |
| "max_position_embeddings": 40, | |
| "modality_type_vocab_size": 2, | |
| "model_type": "vilt", | |
| "num_attention_heads": 12, | |
| "num_channels": 3, | |
| "num_hidden_layers": 12, | |
| "num_images": -1, | |
| "patch_size": 32, | |
| "qkv_bias": true, | |
| "tie_word_embeddings": false, | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.31.0", | |
| "type_vocab_size": 2, | |
| "vocab_size": 30522 | |
| } | |