Bug fixes + Copy formatting from transformers repo

This commit is contained in:
Ankur Goyal 2022-08-07 18:37:40 -07:00
parent 73a21c855a
commit 667af5ae46
5 changed files with 35 additions and 3 deletions

View File

@ -13,3 +13,9 @@ This is a fine-tuned version of the multi-modal [LayoutLM](https://aka.ms/layout
The LayoutLM model was developed at Microsoft ([paper](https://arxiv.org/abs/1912.13318)) as a general purpose tool for understanding documents. This model is a fine-tuned checkpoint of [LayoutLM-Base-Cased](https://huggingface.co/microsoft/layoutlm-base-uncased), using both the [SQuAD2.0](https://huggingface.co/datasets/squad_v2) and [DocVQA](https://www.docvqa.org/) datasets. The LayoutLM model was developed at Microsoft ([paper](https://arxiv.org/abs/1912.13318)) as a general purpose tool for understanding documents. This model is a fine-tuned checkpoint of [LayoutLM-Base-Cased](https://huggingface.co/microsoft/layoutlm-base-uncased), using both the [SQuAD2.0](https://huggingface.co/datasets/squad_v2) and [DocVQA](https://www.docvqa.org/) datasets.
## Getting started with the model ## Getting started with the model
## About us
This model was created by the team at [Impira](https://www.impira.com/).

View File

@ -1,4 +1,4 @@
# NOTE: This code is currently under review for inclusion in the main # NOTE: This code is currently under review for inclusion in the main
# huggingface/transformers repository: # huggingface/transformers repository:
# https://github.com/huggingface/transformers/pull/18414 # https://github.com/huggingface/transformers/pull/18414
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union
@ -189,7 +189,9 @@ class DocumentQuestionAnsweringPipeline(Pipeline):
- **answer** (`str`) -- The answer to the question. - **answer** (`str`) -- The answer to the question.
""" """
if isinstance(question, str): if isinstance(question, str):
inputs = {"question": question, "image": image, "word_boxes": word_boxes} inputs = {"question": question, "image": image}
if word_boxes is not None:
inputs["word_boxes"] = word_boxes
else: else:
inputs = image inputs = image
return super().__call__(inputs, **kwargs) return super().__call__(inputs, **kwargs)

3
pyproject.toml Normal file
View File

@ -0,0 +1,3 @@
[tool.black]
line-length = 119
target-version = ['py35']

View File

@ -1,4 +1,4 @@
# NOTE: This code is currently under review for inclusion in the main # NOTE: This code is currently under review for inclusion in the main
# huggingface/transformers repository: # huggingface/transformers repository:
# https://github.com/huggingface/transformers/pull/18414 # https://github.com/huggingface/transformers/pull/18414
@ -15,6 +15,7 @@ if is_vision_available():
from PIL import Image from PIL import Image
from transformers.image_utils import load_image from transformers.image_utils import load_image
VISION_LOADED = True VISION_LOADED = True
else: else:
Image = None Image = None
@ -24,10 +25,12 @@ else:
TESSERACT_LOADED = False TESSERACT_LOADED = False
if is_pytesseract_available(): if is_pytesseract_available():
import pytesseract import pytesseract
TESSERACT_LOADED = True TESSERACT_LOADED = True
else: else:
pytesseract = None pytesseract = None
def decode_spans( def decode_spans(
start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int, undesired_tokens: np.ndarray start: np.ndarray, end: np.ndarray, topk: int, max_answer_len: int, undesired_tokens: np.ndarray
) -> Tuple: ) -> Tuple:

18
setup.cfg Normal file
View File

@ -0,0 +1,18 @@
[isort]
default_section = FIRSTPARTY
ensure_newline_before_comments = True
force_grid_wrap = 0
include_trailing_comma = True
known_first_party = transformers
line_length = 119
lines_after_imports = 2
multi_line_output = 3
use_parentheses = True
[flake8]
ignore = E203, E501, E741, W503, W605
max-line-length = 119
[tool:pytest]
doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS