fariasultana commited on
Commit
de1a314
·
verified ·
1 Parent(s): 36c0c61

feat: Add NPU export (TFLite, QNN, CoreML)

Browse files
Files changed (1) hide show
  1. optimization/npu_export.py +450 -0
optimization/npu_export.py ADDED
@@ -0,0 +1,450 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ NPU Export Module for MiniMind Max2
3
+ Export to TFLite, QNN (Qualcomm), and other NPU formats.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from typing import List, Optional, Dict, Any, Tuple, Union
8
+ from pathlib import Path
9
+ import torch
10
+ import torch.nn as nn
11
+ import json
12
+
13
+
14
+ @dataclass
15
+ class NPUExportConfig:
16
+ """Configuration for NPU export."""
17
+ # Target platforms
18
+ target_platform: str = "tflite" # tflite, qnn, coreml, nnapi
19
+
20
+ # Quantization
21
+ quantization: str = "int8" # float16, int8, int4
22
+ calibration_samples: int = 100
23
+
24
+ # Optimization
25
+ optimize_for_inference: bool = True
26
+ enable_xnnpack: bool = True # TFLite XNNPACK delegate
27
+
28
+ # Model settings
29
+ max_sequence_length: int = 2048
30
+ batch_size: int = 1
31
+
32
+ # QNN specific
33
+ qnn_target: str = "gpu" # cpu, gpu, dsp, htp
34
+
35
+ # Output
36
+ include_metadata: bool = True
37
+
38
+
39
+ class TFLiteExporter:
40
+ """Export MiniMind models to TensorFlow Lite format."""
41
+
42
+ def __init__(self, config: NPUExportConfig):
43
+ self.config = config
44
+
45
+ def export(
46
+ self,
47
+ model: nn.Module,
48
+ output_path: str,
49
+ sample_input: Optional[torch.Tensor] = None,
50
+ ) -> str:
51
+ """
52
+ Export model to TFLite format.
53
+
54
+ Args:
55
+ model: PyTorch model to export
56
+ output_path: Path for output .tflite file
57
+ sample_input: Sample input for tracing
58
+
59
+ Returns:
60
+ Path to exported model
61
+ """
62
+ try:
63
+ import tensorflow as tf
64
+ except ImportError:
65
+ print("TensorFlow not installed. Install with: pip install tensorflow")
66
+ return self._export_via_onnx(model, output_path, sample_input)
67
+
68
+ model.eval()
69
+
70
+ # Get model config
71
+ if hasattr(model, 'config'):
72
+ vocab_size = model.config.vocab_size
73
+ hidden_size = model.config.hidden_size
74
+ else:
75
+ vocab_size = 102400
76
+ hidden_size = 1024
77
+
78
+ # Create sample input if not provided
79
+ if sample_input is None:
80
+ sample_input = torch.randint(
81
+ 0, vocab_size,
82
+ (self.config.batch_size, self.config.max_sequence_length),
83
+ )
84
+
85
+ # Export via ONNX as intermediate
86
+ onnx_path = output_path.replace('.tflite', '.onnx')
87
+ self._export_to_onnx(model, onnx_path, sample_input)
88
+
89
+ # Convert ONNX to TFLite
90
+ try:
91
+ import onnx
92
+ from onnx_tf.backend import prepare
93
+
94
+ # Load ONNX model
95
+ onnx_model = onnx.load(onnx_path)
96
+ tf_rep = prepare(onnx_model)
97
+
98
+ # Save as SavedModel
99
+ saved_model_path = output_path.replace('.tflite', '_saved_model')
100
+ tf_rep.export_graph(saved_model_path)
101
+
102
+ # Convert to TFLite
103
+ converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path)
104
+
105
+ # Quantization settings
106
+ if self.config.quantization == "int8":
107
+ converter.optimizations = [tf.lite.Optimize.DEFAULT]
108
+ converter.target_spec.supported_types = [tf.int8]
109
+ elif self.config.quantization == "float16":
110
+ converter.optimizations = [tf.lite.Optimize.DEFAULT]
111
+ converter.target_spec.supported_types = [tf.float16]
112
+
113
+ # Enable optimizations
114
+ if self.config.optimize_for_inference:
115
+ converter.optimizations = [tf.lite.Optimize.DEFAULT]
116
+
117
+ tflite_model = converter.convert()
118
+
119
+ # Save
120
+ with open(output_path, 'wb') as f:
121
+ f.write(tflite_model)
122
+
123
+ print(f"Exported TFLite model to: {output_path}")
124
+ return output_path
125
+
126
+ except Exception as e:
127
+ print(f"TFLite conversion failed: {e}")
128
+ return onnx_path
129
+
130
+ def _export_to_onnx(
131
+ self,
132
+ model: nn.Module,
133
+ output_path: str,
134
+ sample_input: torch.Tensor,
135
+ ) -> str:
136
+ """Export to ONNX as intermediate format."""
137
+ torch.onnx.export(
138
+ model,
139
+ sample_input,
140
+ output_path,
141
+ export_params=True,
142
+ opset_version=14,
143
+ do_constant_folding=True,
144
+ input_names=['input_ids'],
145
+ output_names=['logits'],
146
+ dynamic_axes={
147
+ 'input_ids': {0: 'batch_size', 1: 'sequence_length'},
148
+ 'logits': {0: 'batch_size', 1: 'sequence_length'},
149
+ },
150
+ )
151
+ return output_path
152
+
153
+ def _export_via_onnx(
154
+ self,
155
+ model: nn.Module,
156
+ output_path: str,
157
+ sample_input: torch.Tensor,
158
+ ) -> str:
159
+ """Fallback: export to ONNX only."""
160
+ onnx_path = output_path.replace('.tflite', '.onnx')
161
+ return self._export_to_onnx(model, onnx_path, sample_input)
162
+
163
+
164
+ class QNNExporter:
165
+ """Export MiniMind models to Qualcomm QNN format."""
166
+
167
+ def __init__(self, config: NPUExportConfig):
168
+ self.config = config
169
+
170
+ def export(
171
+ self,
172
+ model: nn.Module,
173
+ output_path: str,
174
+ sample_input: Optional[torch.Tensor] = None,
175
+ ) -> Dict[str, str]:
176
+ """
177
+ Export model to QNN format for Qualcomm NPUs.
178
+
179
+ Returns:
180
+ Dictionary with paths to exported files
181
+ """
182
+ model.eval()
183
+
184
+ # Get model config
185
+ if hasattr(model, 'config'):
186
+ vocab_size = model.config.vocab_size
187
+ else:
188
+ vocab_size = 102400
189
+
190
+ if sample_input is None:
191
+ sample_input = torch.randint(
192
+ 0, vocab_size,
193
+ (self.config.batch_size, self.config.max_sequence_length),
194
+ )
195
+
196
+ output_dir = Path(output_path).parent
197
+ output_dir.mkdir(parents=True, exist_ok=True)
198
+
199
+ # Step 1: Export to ONNX
200
+ onnx_path = str(output_dir / "model.onnx")
201
+ torch.onnx.export(
202
+ model,
203
+ sample_input,
204
+ onnx_path,
205
+ export_params=True,
206
+ opset_version=14,
207
+ do_constant_folding=True,
208
+ input_names=['input_ids'],
209
+ output_names=['logits'],
210
+ )
211
+
212
+ outputs = {"onnx": onnx_path}
213
+
214
+ # Step 2: Generate QNN conversion script
215
+ qnn_script = self._generate_qnn_script(onnx_path, output_path)
216
+ script_path = str(output_dir / "convert_to_qnn.sh")
217
+ with open(script_path, 'w') as f:
218
+ f.write(qnn_script)
219
+
220
+ outputs["conversion_script"] = script_path
221
+
222
+ # Step 3: Generate model config for QNN
223
+ config_path = str(output_dir / "qnn_config.json")
224
+ qnn_config = {
225
+ "model_name": "minimind_max2",
226
+ "input_tensors": [{
227
+ "name": "input_ids",
228
+ "dims": [self.config.batch_size, self.config.max_sequence_length],
229
+ "data_type": "int32"
230
+ }],
231
+ "output_tensors": [{
232
+ "name": "logits",
233
+ "data_type": "float32"
234
+ }],
235
+ "backend": self.config.qnn_target,
236
+ "quantization": self.config.quantization,
237
+ }
238
+ with open(config_path, 'w') as f:
239
+ json.dump(qnn_config, f, indent=2)
240
+
241
+ outputs["config"] = config_path
242
+
243
+ print(f"QNN export prepared. Run {script_path} with QNN SDK installed.")
244
+ return outputs
245
+
246
+ def _generate_qnn_script(self, onnx_path: str, output_path: str) -> str:
247
+ """Generate shell script for QNN conversion."""
248
+ return f'''#!/bin/bash
249
+ # QNN Conversion Script for MiniMind Max2
250
+ # Requires Qualcomm QNN SDK
251
+
252
+ # Check QNN SDK
253
+ if [ -z "$QNN_SDK_ROOT" ]; then
254
+ echo "Error: QNN_SDK_ROOT not set. Please install Qualcomm QNN SDK."
255
+ exit 1
256
+ fi
257
+
258
+ # Convert ONNX to QNN
259
+ $QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-onnx-converter \\
260
+ --input_network {onnx_path} \\
261
+ --output_path {output_path}.cpp
262
+
263
+ # Compile model library
264
+ $QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-model-lib-generator \\
265
+ -c {output_path}.cpp \\
266
+ -b {output_path}.bin \\
267
+ -t {self.config.qnn_target}
268
+
269
+ echo "QNN model exported to {output_path}.bin"
270
+ '''
271
+
272
+
273
+ class CoreMLExporter:
274
+ """Export MiniMind models to Apple Core ML format."""
275
+
276
+ def __init__(self, config: NPUExportConfig):
277
+ self.config = config
278
+
279
+ def export(
280
+ self,
281
+ model: nn.Module,
282
+ output_path: str,
283
+ sample_input: Optional[torch.Tensor] = None,
284
+ ) -> str:
285
+ """Export model to Core ML format for Apple Neural Engine."""
286
+ try:
287
+ import coremltools as ct
288
+ except ImportError:
289
+ print("coremltools not installed. Install with: pip install coremltools")
290
+ return ""
291
+
292
+ model.eval()
293
+
294
+ # Get model config
295
+ if hasattr(model, 'config'):
296
+ vocab_size = model.config.vocab_size
297
+ else:
298
+ vocab_size = 102400
299
+
300
+ if sample_input is None:
301
+ sample_input = torch.randint(
302
+ 0, vocab_size,
303
+ (self.config.batch_size, self.config.max_sequence_length),
304
+ )
305
+
306
+ # Trace model
307
+ traced = torch.jit.trace(model, sample_input)
308
+
309
+ # Convert to Core ML
310
+ mlmodel = ct.convert(
311
+ traced,
312
+ inputs=[ct.TensorType(
313
+ name="input_ids",
314
+ shape=sample_input.shape,
315
+ dtype=int,
316
+ )],
317
+ compute_units=ct.ComputeUnit.ALL, # Use Neural Engine when available
318
+ )
319
+
320
+ # Quantization
321
+ if self.config.quantization == "float16":
322
+ mlmodel = ct.models.neural_network.quantization_utils.quantize_weights(
323
+ mlmodel, nbits=16
324
+ )
325
+ elif self.config.quantization == "int8":
326
+ mlmodel = ct.models.neural_network.quantization_utils.quantize_weights(
327
+ mlmodel, nbits=8
328
+ )
329
+
330
+ # Save
331
+ mlmodel.save(output_path)
332
+ print(f"Core ML model exported to: {output_path}")
333
+ return output_path
334
+
335
+
336
+ class NPUExporter:
337
+ """Unified NPU export interface."""
338
+
339
+ def __init__(self, config: Optional[NPUExportConfig] = None):
340
+ self.config = config or NPUExportConfig()
341
+
342
+ self.exporters = {
343
+ "tflite": TFLiteExporter(self.config),
344
+ "qnn": QNNExporter(self.config),
345
+ "coreml": CoreMLExporter(self.config),
346
+ }
347
+
348
+ def export(
349
+ self,
350
+ model: nn.Module,
351
+ output_path: str,
352
+ target_platform: Optional[str] = None,
353
+ sample_input: Optional[torch.Tensor] = None,
354
+ ) -> Union[str, Dict[str, str]]:
355
+ """
356
+ Export model to specified NPU format.
357
+
358
+ Args:
359
+ model: PyTorch model
360
+ output_path: Output file path
361
+ target_platform: Target platform (tflite, qnn, coreml)
362
+ sample_input: Sample input for tracing
363
+
364
+ Returns:
365
+ Path(s) to exported model(s)
366
+ """
367
+ platform = target_platform or self.config.target_platform
368
+
369
+ if platform not in self.exporters:
370
+ raise ValueError(f"Unknown platform: {platform}. Supported: {list(self.exporters.keys())}")
371
+
372
+ exporter = self.exporters[platform]
373
+ return exporter.export(model, output_path, sample_input)
374
+
375
+ def export_all(
376
+ self,
377
+ model: nn.Module,
378
+ output_dir: str,
379
+ sample_input: Optional[torch.Tensor] = None,
380
+ ) -> Dict[str, Any]:
381
+ """Export to all supported formats."""
382
+ output_dir = Path(output_dir)
383
+ output_dir.mkdir(parents=True, exist_ok=True)
384
+
385
+ results = {}
386
+
387
+ for platform, exporter in self.exporters.items():
388
+ try:
389
+ if platform == "tflite":
390
+ path = str(output_dir / "model.tflite")
391
+ elif platform == "qnn":
392
+ path = str(output_dir / "qnn" / "model")
393
+ elif platform == "coreml":
394
+ path = str(output_dir / "model.mlpackage")
395
+ else:
396
+ continue
397
+
398
+ result = exporter.export(model, path, sample_input)
399
+ results[platform] = {"success": True, "path": result}
400
+ except Exception as e:
401
+ results[platform] = {"success": False, "error": str(e)}
402
+
403
+ return results
404
+
405
+
406
+ def export_for_mobile(
407
+ model: nn.Module,
408
+ output_dir: str,
409
+ platforms: Optional[List[str]] = None,
410
+ config: Optional[NPUExportConfig] = None,
411
+ ) -> Dict[str, Any]:
412
+ """
413
+ High-level function to export model for mobile devices.
414
+
415
+ Args:
416
+ model: PyTorch model
417
+ output_dir: Output directory
418
+ platforms: List of target platforms (default: all)
419
+ config: Export configuration
420
+
421
+ Returns:
422
+ Dictionary with export results for each platform
423
+ """
424
+ config = config or NPUExportConfig()
425
+ exporter = NPUExporter(config)
426
+
427
+ if platforms is None:
428
+ return exporter.export_all(model, output_dir)
429
+
430
+ results = {}
431
+ output_dir = Path(output_dir)
432
+ output_dir.mkdir(parents=True, exist_ok=True)
433
+
434
+ for platform in platforms:
435
+ try:
436
+ if platform == "tflite":
437
+ path = str(output_dir / "model.tflite")
438
+ elif platform == "qnn":
439
+ path = str(output_dir / "qnn" / "model")
440
+ elif platform == "coreml":
441
+ path = str(output_dir / "model.mlpackage")
442
+ else:
443
+ continue
444
+
445
+ result = exporter.export(model, path, target_platform=platform)
446
+ results[platform] = {"success": True, "path": result}
447
+ except Exception as e:
448
+ results[platform] = {"success": False, "error": str(e)}
449
+
450
+ return results