From 723ec3f9f85621d22c2a9fe570d3b04a2d773243 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Wed, 24 Aug 2022 17:18:56 +0000 Subject: [PATCH] Upload TFLayoutLMForQuestionAnswering --- README.md | 69 +++++++++++++++++++++++------------------------------ config.json | 8 ++++--- tf_model.h5 | 3 +++ 3 files changed, 38 insertions(+), 42 deletions(-) create mode 100644 tf_model.h5 diff --git a/README.md b/README.md index f09c45c..d9aec21 100644 --- a/README.md +++ b/README.md @@ -1,57 +1,48 @@ --- -language: en -thumbnail: https://uploads-ssl.webflow.com/5e3898dff507782a6580d710/614a23fcd8d4f7434c765ab9_logo.png license: mit +tags: +- generated_from_keras_callback +model-index: +- name: layoutlm-document-qa + results: [] --- -# LayoutLM for Visual Question Answering + -This is a fine-tuned version of the multi-modal [LayoutLM](https://aka.ms/layoutlm) model for the task of question answering on documents. It has been fine-tuned on +# layoutlm-document-qa -## Model details +This model is a fine-tuned version of [impira/layoutlm-document-qa](https://huggingface.co/impira/layoutlm-document-qa) on an unknown dataset. +It achieves the following results on the evaluation set: -The LayoutLM model was developed at Microsoft ([paper](https://arxiv.org/abs/1912.13318)) as a general purpose tool for understanding documents. This model is a fine-tuned checkpoint of [LayoutLM-Base-Cased](https://huggingface.co/microsoft/layoutlm-base-uncased), using both the [SQuAD2.0](https://huggingface.co/datasets/squad_v2) and [DocVQA](https://www.docvqa.org/) datasets. -## Getting started with the model +## Model description -To run these examples, you must have [PIL](https://pillow.readthedocs.io/en/stable/installation.html), [pytesseract](https://pypi.org/project/pytesseract/), and [PyTorch](https://pytorch.org/get-started/locally/) installed in addition to [transformers](https://huggingface.co/docs/transformers/index). +More information needed -```python -from transformers import AutoTokenizer, pipeline +## Intended uses & limitations -tokenizer = AutoTokenizer.from_pretrained( - "impira/layoutlm-document-qa", - add_prefix_space=True, - trust_remote_code=True, -) +More information needed -nlp = pipeline( - model="impira/layoutlm-document-qa", - tokenizer=tokenizer, - trust_remote_code=True, -) +## Training and evaluation data -nlp( - "https://templates.invoicehome.com/invoice-template-us-neat-750px.png", - "What is the invoice number?" -) -# {'score': 0.9943977, 'answer': 'us-001', 'start': 15, 'end': 15} +More information needed -nlp( - "https://miro.medium.com/max/787/1*iECQRIiOGTmEFLdWkVIH2g.jpeg", - "What is the purchase amount?" -) -# {'score': 0.9912159, 'answer': '$1,000,000,000', 'start': 97, 'end': 97} +## Training procedure -nlp( - "https://www.accountingcoach.com/wp-content/uploads/2013/10/income-statement-example@2x.png", - "What are the 2020 net sales?" -) -# {'score': 0.59147286, 'answer': '$ 3,750', 'start': 19, 'end': 20} -``` +### Training hyperparameters -**NOTE**: This model relies on a [model definition](https://github.com/huggingface/transformers/pull/18407) and [pipeline](https://github.com/huggingface/transformers/pull/18414) that are currently in review to be included in the transformers project. In the meantime, you'll have to use the `trust_remote_code=True` flag to run this model. +The following hyperparameters were used during training: +- optimizer: None +- training_precision: float32 -## About us +### Training results -This model was created by the team at [Impira](https://www.impira.com/). + + +### Framework versions + +- Transformers 4.22.0.dev0 +- TensorFlow 2.9.2 +- Datasets 2.4.0 +- Tokenizers 0.12.1 diff --git a/config.json b/config.json index fdf1c52..239d106 100644 --- a/config.json +++ b/config.json @@ -1,15 +1,17 @@ { - "attention_probs_dropout_prob": 0.1, + "_name_or_path": "impira/layoutlm-document-qa", "architectures": [ "LayoutLMForQuestionAnswering" ], + "attention_probs_dropout_prob": 0.1, + "bos_token_id": 0, + "classifier_dropout": null, "custom_pipelines": { "document-question-answering": { "impl": "pipeline_document_question_answering.DocumentQuestionAnsweringPipeline", "pt": "AutoModelForQuestionAnswering" } }, - "bos_token_id": 0, "eos_token_id": 2, "gradient_checkpointing": false, "hidden_act": "gelu", @@ -26,7 +28,7 @@ "pad_token_id": 1, "position_embedding_type": "absolute", "tokenizer_class": "RobertaTokenizer", - "transformers_version": "4.6.1", + "transformers_version": "4.22.0.dev0", "type_vocab_size": 1, "use_cache": true, "vocab_size": 50265 diff --git a/tf_model.h5 b/tf_model.h5 new file mode 100644 index 0000000..b253626 --- /dev/null +++ b/tf_model.h5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2796bc2f67ac8e1abe55decfa104c7182376c4bf1f8b97ab87fd8bb4768f2f07 +size 511465184