Add assertion when loading cpu and cuda kernel fails
This commit is contained in:
parent
fe0674f86d
commit
bcc35f08b4
|
@ -441,10 +441,10 @@ def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=F
|
||||||
try:
|
try:
|
||||||
load_cpu_kernel(**kwargs)
|
load_cpu_kernel(**kwargs)
|
||||||
except:
|
except:
|
||||||
print("Cannot load cpu kernel, don't use quantized model on cpu.")
|
|
||||||
if kernels is None: # CUDA kernels failed
|
if kernels is None: # CUDA kernels failed
|
||||||
print("Cannot load cuda kernel, quantization failed.")
|
print("Cannot load cpu or cuda kernel, quantization failed:")
|
||||||
return model
|
assert kernels is None
|
||||||
|
print("Cannot load cpu kernel, don't use quantized model on cpu.")
|
||||||
|
|
||||||
current_device = model.device
|
current_device = model.device
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue