From c0805983e93656da65232731ee3a838db5677a25 Mon Sep 17 00:00:00 2001 From: Niels Rogge Date: Tue, 6 Dec 2022 08:16:23 +0000 Subject: [PATCH] fix a typo in code snippet and processor config (#2) - fix a typo in code snippet (d4a091673f1e222362b66e76cd12503485811488) - Update README.md (283d3dadb4278dff272703e1e49660120ac9ee32) - Update README.md (3d47cc1abbe7e66e6e1508588b094529329c99a0) - fix processor config (5c99ed640fbd5953e8c10441c808bbb1d4eedca4) Co-authored-by: Fatih --- README.md | 8 ++++---- preprocessor_config.json | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 0510f9c..78d68d8 100644 --- a/README.md +++ b/README.md @@ -20,16 +20,16 @@ You can use the raw model for video classification into one of the 600 possible Here is how to use this model to classify a video: ```python -from transformers import TimesformerFeatureExtractor, TimesformerForVideoClassification +from transformers import AutoImageProcessor, TimesformerForVideoClassification import numpy as np import torch -video = list(np.random.randn(8, 3, 224, 224)) +video = list(np.random.randn(16, 3, 448, 448)) -feature_extractor = TimesformerFeatureExtractor.from_pretrained("facebook/timesformer-hr-finetuned-k600") +processor = AutoImageProcessor.from_pretrained("facebook/timesformer-hr-finetuned-k600") model = TimesformerForVideoClassification.from_pretrained("facebook/timesformer-hr-finetuned-k600") -inputs = feature_extractor(video, return_tensors="pt") +inputs = processor(images=video, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) diff --git a/preprocessor_config.json b/preprocessor_config.json index 05f8840..b824248 100644 --- a/preprocessor_config.json +++ b/preprocessor_config.json @@ -1,7 +1,7 @@ { "crop_size": { - "height": 224, - "width": 224 + "height": 448, + "width": 448 }, "do_center_crop": true, "do_normalize": true, @@ -21,6 +21,6 @@ "resample": 2, "rescale_factor": 0.00392156862745098, "size": { - "shortest_edge": 224 + "shortest_edge": 448 } }