From 1f7bede5e029575bb21ddaa01d747390813f8c82 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Wed, 24 Aug 2022 09:27:31 -0700 Subject: [PATCH] Remove model in anticipation of PR #18407 --- config.json | 4 -- configuration_layoutlm.py | 3 - modeling_layoutlm.py | 147 -------------------------------------- 3 files changed, 154 deletions(-) delete mode 100644 configuration_layoutlm.py delete mode 100644 modeling_layoutlm.py diff --git a/config.json b/config.json index eab9d11..fdf1c52 100644 --- a/config.json +++ b/config.json @@ -3,10 +3,6 @@ "architectures": [ "LayoutLMForQuestionAnswering" ], - "auto_map": { - "AutoConfig": "configuration_layoutlm.LayoutLMConfig", - "AutoModelForQuestionAnswering": "modeling_layoutlm.LayoutLMForQuestionAnswering" - }, "custom_pipelines": { "document-question-answering": { "impl": "pipeline_document_question_answering.DocumentQuestionAnsweringPipeline", diff --git a/configuration_layoutlm.py b/configuration_layoutlm.py deleted file mode 100644 index ecaef28..0000000 --- a/configuration_layoutlm.py +++ /dev/null @@ -1,3 +0,0 @@ -# This model just uses the existing LayoutLMConfig which is just imported -# as a thin wrapper -from transformers.models.layoutlm.configuration_layoutlm import LayoutLMConfig diff --git a/modeling_layoutlm.py b/modeling_layoutlm.py deleted file mode 100644 index 277af56..0000000 --- a/modeling_layoutlm.py +++ /dev/null @@ -1,147 +0,0 @@ -# NOTE: This code is currently under review for inclusion in the main -# huggingface/transformers repository: -# https://github.com/huggingface/transformers/pull/18407 -""" PyTorch LayoutLM model.""" - - -import math -from typing import Optional, Tuple, Union - -import torch -from torch import nn -from torch.nn import CrossEntropyLoss - -from transformers.modeling_outputs import QuestionAnsweringModelOutput -from transformers.models.layoutlm import LayoutLMModel, LayoutLMPreTrainedModel - - -class LayoutLMForQuestionAnswering(LayoutLMPreTrainedModel): - def __init__(self, config, has_visual_segment_embedding=True): - super().__init__(config) - self.num_labels = config.num_labels - - self.layoutlm = LayoutLMModel(config) - self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) - - # Initialize weights and apply final processing - self.post_init() - - def get_input_embeddings(self): - return self.layoutlm.embeddings.word_embeddings - - def forward( - self, - input_ids: Optional[torch.LongTensor] = None, - bbox: Optional[torch.LongTensor] = None, - attention_mask: Optional[torch.FloatTensor] = None, - token_type_ids: Optional[torch.LongTensor] = None, - position_ids: Optional[torch.LongTensor] = None, - head_mask: Optional[torch.FloatTensor] = None, - inputs_embeds: Optional[torch.FloatTensor] = None, - start_positions: Optional[torch.LongTensor] = None, - end_positions: Optional[torch.LongTensor] = None, - output_attentions: Optional[bool] = None, - output_hidden_states: Optional[bool] = None, - return_dict: Optional[bool] = None, - ) -> Union[Tuple, QuestionAnsweringModelOutput]: - r""" - start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*): - Labels for position (index) of the start of the labelled span for computing the token classification loss. - Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence - are not taken into account for computing the loss. - end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*): - Labels for position (index) of the end of the labelled span for computing the token classification loss. - Positions are clamped to the length of the sequence (`sequence_length`). Position outside of the sequence - are not taken into account for computing the loss. - - Returns: - - Example: - - In this example below, we give the LayoutLMv2 model an image (of texts) and ask it a question. It will give us - a prediction of what it thinks the answer is (the span of the answer within the texts parsed from the image). - - ```python - >>> from transformers import AutoTokenizer, LayoutLMForQuestionAnswering - >>> from datasets import load_dataset - >>> import torch - - >>> tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased", add_prefix_space=True) - >>> model = LayoutLMForQuestionAnswering.from_pretrained("microsoft/layoutlm-base-uncased") - - >>> dataset = load_dataset("nielsr/funsd-layoutlmv3", split="train") - >>> example = dataset[0] - >>> question = "what's his name?" - >>> words = example["tokens"] - >>> boxes = example["bboxes"] - - >>> encoding = tokenizer( - ... question.split(), words, is_split_into_words=True, return_token_type_ids=True, return_tensors="pt" - ... ) - >>> bbox = [] - >>> for i, s, w in zip(encoding.input_ids[0], encoding.sequence_ids(0), encoding.word_ids(0)): - ... if s == 1: - ... bbox.append(boxes[w]) - ... elif i == tokenizer.sep_token_id: - ... bbox.append([1000] * 4) - ... else: - ... bbox.append([0] * 4) - >>> encoding["bbox"] = torch.tensor([bbox]) - - >>> outputs = model(**encoding) - >>> loss = outputs.loss - >>> start_scores = outputs.start_logits - >>> end_scores = outputs.end_logits - ``` - """ - - return_dict = return_dict if return_dict is not None else self.config.use_return_dict - - outputs = self.layoutlm( - input_ids=input_ids, - bbox=bbox, - attention_mask=attention_mask, - token_type_ids=token_type_ids, - position_ids=position_ids, - head_mask=head_mask, - inputs_embeds=inputs_embeds, - output_attentions=output_attentions, - output_hidden_states=output_hidden_states, - return_dict=return_dict, - ) - - sequence_output = outputs[0] - - logits = self.qa_outputs(sequence_output) - start_logits, end_logits = logits.split(1, dim=-1) - start_logits = start_logits.squeeze(-1).contiguous() - end_logits = end_logits.squeeze(-1).contiguous() - - total_loss = None - if start_positions is not None and end_positions is not None: - # If we are on multi-GPU, split add a dimension - if len(start_positions.size()) > 1: - start_positions = start_positions.squeeze(-1) - if len(end_positions.size()) > 1: - end_positions = end_positions.squeeze(-1) - # sometimes the start/end positions are outside our model inputs, we ignore these terms - ignored_index = start_logits.size(1) - start_positions = start_positions.clamp(0, ignored_index) - end_positions = end_positions.clamp(0, ignored_index) - - loss_fct = CrossEntropyLoss(ignore_index=ignored_index) - start_loss = loss_fct(start_logits, start_positions) - end_loss = loss_fct(end_logits, end_positions) - total_loss = (start_loss + end_loss) / 2 - - if not return_dict: - output = (start_logits, end_logits) + outputs[2:] - return ((total_loss,) + output) if total_loss is not None else output - - return QuestionAnsweringModelOutput( - loss=total_loss, - start_logits=start_logits, - end_logits=end_logits, - hidden_states=outputs.hidden_states, - attentions=outputs.attentions, - )