From 667af5ae46c9a09ec4563d8fb0263c8c54a20493 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Sun, 7 Aug 2022 18:37:40 -0700 Subject: [PATCH] Bug fixes + Copy formatting from transformers repo --- README.md | 6 ++++++ pipeline_document_question_answering.py | 6 ++++-- pyproject.toml | 3 +++ qa_helpers.py | 5 ++++- setup.cfg | 18 ++++++++++++++++++ 5 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 pyproject.toml create mode 100644 setup.cfg diff --git a/README.md b/README.md index 4666008..7687f38 100644 --- a/README.md +++ b/README.md @@ -13,3 +13,9 @@ This is a fine-tuned version of the multi-modal [LayoutLM](https://aka.ms/layout The LayoutLM model was developed at Microsoft ([paper](https://arxiv.org/abs/1912.13318)) as a general purpose tool for understanding documents. This model is a fine-tuned checkpoint of [LayoutLM-Base-Cased](https://huggingface.co/microsoft/layoutlm-base-uncased), using both the [SQuAD2.0](https://huggingface.co/datasets/squad_v2) and [DocVQA](https://www.docvqa.org/) datasets. ## Getting started with the model + + + +## About us + +This model was created by the team at [Impira](https://www.impira.com/). diff --git a/pipeline_document_question_answering.py b/pipeline_document_question_answering.py index 02f6684..6048eeb 100644 --- a/pipeline_document_question_answering.py +++ b/pipeline_document_question_answering.py @@ -1,4 +1,4 @@ -# NOTE: This code is currently under review for inclusion in the main +# NOTE: This code is currently under review for inclusion in the main # huggingface/transformers repository: # https://github.com/huggingface/transformers/pull/18414 from typing import List, Optional, Tuple, Union @@ -189,7 +189,9 @@ class DocumentQuestionAnsweringPipeline(Pipeline): - **answer** (`str`) -- The answer to the question. """ if isinstance(question, str): - inputs = {"question": question, "image": image, "word_boxes": word_boxes} + inputs = {"question": question, "image": image} + if word_boxes is not None: + inputs["word_boxes"] = word_boxes else: inputs = image return super().__call__(inputs, **kwargs) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..291558c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.black] +line-length = 119 +target-version = ['py35'] diff --git a/qa_helpers.py b/qa_helpers.py index 5f47f37..2b5c675 100644 --- a/qa_helpers.py +++ b/qa_helpers.py @@ -1,4 +1,4 @@ -# NOTE: This code is currently under review for inclusion in the main +# NOTE: This code is currently under review for inclusion in the main # huggingface/transformers repository: # https://github.com/huggingface/transformers/pull/18414 @@ -15,6 +15,7 @@ if is_vision_available(): from PIL import Image from transformers.image_utils import load_image + VISION_LOADED = True else: Image = None @@ -24,10 +25,12 @@ else: TESSERACT_LOADED = False if is_pytesseract_available(): import pytesseract + TESSERACT_LOADED = True else: pytesseract = None + def decode_spans( start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int, undesired_tokens: np.ndarray ) -> Tuple: diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..cc02383 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,18 @@ +[isort] +default_section = FIRSTPARTY +ensure_newline_before_comments = True +force_grid_wrap = 0 +include_trailing_comma = True +known_first_party = transformers + +line_length = 119 +lines_after_imports = 2 +multi_line_output = 3 +use_parentheses = True + +[flake8] +ignore = E203, E501, E741, W503, W605 +max-line-length = 119 + +[tool:pytest] +doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS