From 9512a406c6e061ab0dad6b15fd41dc6f72744827 Mon Sep 17 00:00:00 2001 From: Patrick von Platen Date: Fri, 15 Oct 2021 09:09:48 +0000 Subject: [PATCH] up --- config.json | 69 ++++++++++++++++++++++++++++++++++++++++ preprocessor_config.json | 9 ++++++ pytorch_model.bin | 3 ++ special_tokens_map.json | 1 + tokenizer_config.json | 1 + vocab.json | 1 + 6 files changed, 84 insertions(+) create mode 100644 config.json create mode 100644 preprocessor_config.json create mode 100644 pytorch_model.bin create mode 100644 special_tokens_map.json create mode 100644 tokenizer_config.json create mode 100644 vocab.json diff --git a/config.json b/config.json new file mode 100644 index 0000000..9efe0e9 --- /dev/null +++ b/config.json @@ -0,0 +1,69 @@ +{ + "activation_dropout": 0.1, + "apply_spec_augment": true, + "architectures": [ + "UniSpeechForPreTraining" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "codevector_dim": 256, + "contrastive_logits_temperature": 0.1, + "conv_bias": false, + "conv_dim": [ + 32, + 32, + 32 + ], + "conv_kernel": [ + 8, + 8, + 8 + ], + "conv_stride": [ + 4, + 4, + 4 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "diversity_loss_weight": 0.1, + "do_stable_layer_norm": true, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_dropout": 0.0, + "feat_extract_norm": "layer", + "feat_proj_dropout": 0.1, + "feat_quantizer_dropout": 0.0, + "final_dropout": 0.1, + "gradient_checkpointing": false, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_dropout_prob": 0.1, + "hidden_size": 16, + "initializer_range": 0.02, + "intermediate_size": 20, + "layer_norm_eps": 1e-05, + "layerdrop": 0.1, + "mask_feature_length": 10, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_prob": 0.05, + "model_type": "unispeech", + "num_attention_heads": 2, + "num_codevector_groups": 2, + "num_codevectors_per_group": 320, + "num_conv_pos_embedding_groups": 2, + "num_conv_pos_embeddings": 16, + "num_ctc_classes": 80, + "num_feat_extract_layers": 3, + "num_hidden_layers": 4, + "num_negatives": 10, + "pad_token_id": 0, + "proj_codevector_dim": 256, + "replace_prob": 0.5, + "torch_dtype": "float32", + "transformers_version": "4.12.0.dev0", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000..a0b7227 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0.0, + "return_attention_mask": false, + "sampling_rate": 16000 +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000..9c9004b --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2643de148bfbb4889453163ff6aecd31f477e4a3afc71efcedf0d9f0c79021a +size 841515 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..25bc396 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..df3a3f7 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1 @@ +{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "do_lower_case": false, "word_delimiter_token": "|", "return_attention_mask": false, "do_normalize": true, "model_max_length": 9223372036854775807, "special_tokens_map_file": "/home/lysandre/.cache/huggingface/transformers/208086b2429fa2ba5b196810c1bcd7d61e2c8d4afd65d05d0670096d735fd5bb.9d6cd81ef646692fb1c169a880161ea1cb95f49694f220aced9b704b457e51dd", "tokenizer_file": null, "name_or_path": "facebook/wav2vec2-base-960h", "tokenizer_class": "Wav2Vec2CTCTokenizer"} \ No newline at end of file diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..88181b9 --- /dev/null +++ b/vocab.json @@ -0,0 +1 @@ +{"": 0, "": 1, "": 2, "": 3, "|": 4, "E": 5, "T": 6, "A": 7, "O": 8, "N": 9, "I": 10, "H": 11, "S": 12, "R": 13, "D": 14, "L": 15, "U": 16, "M": 17, "W": 18, "C": 19, "F": 20, "G": 21, "Y": 22, "P": 23, "B": 24, "V": 25, "K": 26, "'": 27, "X": 28, "J": 29, "Q": 30, "Z": 31} \ No newline at end of file