Add assertion when loading cpu and cuda kernel fails

2023-04-14 19:58:42 +08:00 · 2023-04-14 19:58:42 +08:00 · bcc35f08b4
parent fe0674f86d
commit bcc35f08b4
1 changed files with 3 additions and 3 deletions
--- a/quantization.py
+++ b/quantization.py
@ -441,10 +441,10 @@ def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=F
    try:
        load_cpu_kernel(**kwargs)
    except:
        print("Cannot load cpu kernel, don't use quantized model on cpu.")
        if kernels is None:  # CUDA kernels failed
-            print("Cannot load cuda kernel, quantization failed.")
+            print("Cannot load cpu or cuda kernel, quantization failed:")
-            return model
+            assert kernels is None
        print("Cannot load cpu kernel, don't use quantized model on cpu.")
    current_device = model.device