-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconvert_model.py
More file actions
27 lines (22 loc) · 1.04 KB
/
convert_model.py
File metadata and controls
27 lines (22 loc) · 1.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from transformers import AutoModelForCausalLM, AutoTokenizer
from optimum.intel.openvino import OVModelForCausalLM
from optimum.intel import OVQuantizer, OVWeightQuantizationConfig
import openvino as ov
from pathlib import Path
def convert_model(model_id="TinyLlama/TinyLlama-1.1B-Chat-v1.0"):
# Download and convert the model
model = OVModelForCausalLM.from_pretrained(model_id, export=True)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# Save the model in OpenVINO IR format
model.save_pretrained("openvino_model")
tokenizer.save_pretrained("openvino_model")
def compress_model():
model = OVModelForCausalLM.from_pretrained("openvino_model")
int8_model_dir = Path("openvino_model") / "INT8_compressed_weights"
ov_config = OVWeightQuantizationConfig()
# Compress weights to INT8
quantizer = OVQuantizer.from_pretrained(model, ov_config=ov_config)
quantizer.quantize(save_directory=int8_model_dir, weights_only=True)
if __name__ == "__main__":
convert_model()
compress_model()