diff --git a/README.md b/README.md index 0510f9c..78d68d8 100644 --- a/README.md +++ b/README.md @@ -20,16 +20,16 @@ You can use the raw model for video classification into one of the 600 possible Here is how to use this model to classify a video: ```python -from transformers import TimesformerFeatureExtractor, TimesformerForVideoClassification +from transformers import AutoImageProcessor, TimesformerForVideoClassification import numpy as np import torch -video = list(np.random.randn(8, 3, 224, 224)) +video = list(np.random.randn(16, 3, 448, 448)) -feature_extractor = TimesformerFeatureExtractor.from_pretrained("facebook/timesformer-hr-finetuned-k600") +processor = AutoImageProcessor.from_pretrained("facebook/timesformer-hr-finetuned-k600") model = TimesformerForVideoClassification.from_pretrained("facebook/timesformer-hr-finetuned-k600") -inputs = feature_extractor(video, return_tensors="pt") +inputs = processor(images=video, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) diff --git a/preprocessor_config.json b/preprocessor_config.json index 05f8840..b824248 100644 --- a/preprocessor_config.json +++ b/preprocessor_config.json @@ -1,7 +1,7 @@ { "crop_size": { - "height": 224, - "width": 224 + "height": 448, + "width": 448 }, "do_center_crop": true, "do_normalize": true, @@ -21,6 +21,6 @@ "resample": 2, "rescale_factor": 0.00392156862745098, "size": { - "shortest_edge": 224 + "shortest_edge": 448 } }