From 4a9b711e61d62b64ae8a07d763553a98a984d281 Mon Sep 17 00:00:00 2001 From: duzx16 Date: Thu, 23 Mar 2023 22:43:06 +0800 Subject: [PATCH] Close CPU fusion on Mac --- modeling_chatglm.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/modeling_chatglm.py b/modeling_chatglm.py index d833334..65e378e 100644 --- a/modeling_chatglm.py +++ b/modeling_chatglm.py @@ -5,6 +5,7 @@ import copy import os import warnings import re +import sys import torch import torch.utils.checkpoint @@ -32,10 +33,12 @@ from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaL from .configuration_chatglm import ChatGLMConfig # flags required to enable jit fusion kernels -torch._C._jit_set_profiling_mode(False) -torch._C._jit_set_profiling_executor(False) -torch._C._jit_override_can_fuse_on_cpu(True) -torch._C._jit_override_can_fuse_on_gpu(True) + +if sys.platform != 'darwin': + torch._C._jit_set_profiling_mode(False) + torch._C._jit_set_profiling_executor(False) + torch._C._jit_override_can_fuse_on_cpu(True) + torch._C._jit_override_can_fuse_on_gpu(True) logger = logging.get_logger(__name__) @@ -266,7 +269,7 @@ def attention_fn( if not (attention_mask == 0).all(): # if auto-regressive, skip attention_scores.masked_fill_(attention_mask, -10000.0) - dtype = attention_scores.type() + dtype = attention_scores.dtype attention_scores = attention_scores.float() attention_scores = attention_scores * query_key_layer_scaling_coeff