fariasultana
/

MiniMind

+"""
+NPU Export Module for MiniMind Max2
+Export to TFLite, QNN (Qualcomm), and other NPU formats.
+"""
+from dataclasses import dataclass
+from typing import List, Optional, Dict, Any, Tuple, Union
+from pathlib import Path
+import torch
+import torch.nn as nn
+import json
+@dataclass
+class NPUExportConfig:
+    """Configuration for NPU export."""
+    # Target platforms
+    target_platform: str = "tflite"  # tflite, qnn, coreml, nnapi
+    # Quantization
+    quantization: str = "int8"  # float16, int8, int4
+    calibration_samples: int = 100
+    # Optimization
+    optimize_for_inference: bool = True
+    enable_xnnpack: bool = True  # TFLite XNNPACK delegate
+    # Model settings
+    max_sequence_length: int = 2048
+    batch_size: int = 1
+    # QNN specific
+    qnn_target: str = "gpu"  # cpu, gpu, dsp, htp
+    # Output
+    include_metadata: bool = True
+class TFLiteExporter:
+    """Export MiniMind models to TensorFlow Lite format."""
+    def __init__(self, config: NPUExportConfig):
+        self.config = config
+    def export(
+        self,
+        model: nn.Module,
+        output_path: str,
+        sample_input: Optional[torch.Tensor] = None,
+    ) -> str:
+        """
+        Export model to TFLite format.
+        Args:
+            model: PyTorch model to export
+            output_path: Path for output .tflite file
+            sample_input: Sample input for tracing
+        Returns:
+            Path to exported model
+        """
+        try:
+            import tensorflow as tf
+        except ImportError:
+            print("TensorFlow not installed. Install with: pip install tensorflow")
+            return self._export_via_onnx(model, output_path, sample_input)
+        model.eval()
+        # Get model config
+        if hasattr(model, 'config'):
+            vocab_size = model.config.vocab_size
+            hidden_size = model.config.hidden_size
+        else:
+            vocab_size = 102400
+            hidden_size = 1024
+        # Create sample input if not provided
+        if sample_input is None:
+            sample_input = torch.randint(
+                0, vocab_size,
+                (self.config.batch_size, self.config.max_sequence_length),
+            )
+        # Export via ONNX as intermediate
+        onnx_path = output_path.replace('.tflite', '.onnx')
+        self._export_to_onnx(model, onnx_path, sample_input)
+        # Convert ONNX to TFLite
+        try:
+            import onnx
+            from onnx_tf.backend import prepare
+            # Load ONNX model
+            onnx_model = onnx.load(onnx_path)
+            tf_rep = prepare(onnx_model)
+            # Save as SavedModel
+            saved_model_path = output_path.replace('.tflite', '_saved_model')
+            tf_rep.export_graph(saved_model_path)
+            # Convert to TFLite
+            converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path)
+            # Quantization settings
+            if self.config.quantization == "int8":
+                converter.optimizations = [tf.lite.Optimize.DEFAULT]
+                converter.target_spec.supported_types = [tf.int8]
+            elif self.config.quantization == "float16":
+                converter.optimizations = [tf.lite.Optimize.DEFAULT]
+                converter.target_spec.supported_types = [tf.float16]
+            # Enable optimizations
+            if self.config.optimize_for_inference:
+                converter.optimizations = [tf.lite.Optimize.DEFAULT]
+            tflite_model = converter.convert()
+            # Save
+            with open(output_path, 'wb') as f:
+                f.write(tflite_model)
+            print(f"Exported TFLite model to: {output_path}")
+            return output_path
+        except Exception as e:
+            print(f"TFLite conversion failed: {e}")
+            return onnx_path
+    def _export_to_onnx(
+        self,
+        model: nn.Module,
+        output_path: str,
+        sample_input: torch.Tensor,
+    ) -> str:
+        """Export to ONNX as intermediate format."""
+        torch.onnx.export(
+            model,
+            sample_input,
+            output_path,
+            export_params=True,
+            opset_version=14,
+            do_constant_folding=True,
+            input_names=['input_ids'],
+            output_names=['logits'],
+            dynamic_axes={
+                'input_ids': {0: 'batch_size', 1: 'sequence_length'},
+                'logits': {0: 'batch_size', 1: 'sequence_length'},
+            },
+        )
+        return output_path
+    def _export_via_onnx(
+        self,
+        model: nn.Module,
+        output_path: str,
+        sample_input: torch.Tensor,
+    ) -> str:
+        """Fallback: export to ONNX only."""
+        onnx_path = output_path.replace('.tflite', '.onnx')
+        return self._export_to_onnx(model, onnx_path, sample_input)
+class QNNExporter:
+    """Export MiniMind models to Qualcomm QNN format."""
+    def __init__(self, config: NPUExportConfig):
+        self.config = config
+    def export(
+        self,
+        model: nn.Module,
+        output_path: str,
+        sample_input: Optional[torch.Tensor] = None,
+    ) -> Dict[str, str]:
+        """
+        Export model to QNN format for Qualcomm NPUs.
+        Returns:
+            Dictionary with paths to exported files
+        """
+        model.eval()
+        # Get model config
+        if hasattr(model, 'config'):
+            vocab_size = model.config.vocab_size
+        else:
+            vocab_size = 102400
+        if sample_input is None:
+            sample_input = torch.randint(
+                0, vocab_size,
+                (self.config.batch_size, self.config.max_sequence_length),
+            )
+        output_dir = Path(output_path).parent
+        output_dir.mkdir(parents=True, exist_ok=True)
+        # Step 1: Export to ONNX
+        onnx_path = str(output_dir / "model.onnx")
+        torch.onnx.export(
+            model,
+            sample_input,
+            onnx_path,
+            export_params=True,
+            opset_version=14,
+            do_constant_folding=True,
+            input_names=['input_ids'],
+            output_names=['logits'],
+        )
+        outputs = {"onnx": onnx_path}
+        # Step 2: Generate QNN conversion script
+        qnn_script = self._generate_qnn_script(onnx_path, output_path)
+        script_path = str(output_dir / "convert_to_qnn.sh")
+        with open(script_path, 'w') as f:
+            f.write(qnn_script)
+        outputs["conversion_script"] = script_path
+        # Step 3: Generate model config for QNN
+        config_path = str(output_dir / "qnn_config.json")
+        qnn_config = {
+            "model_name": "minimind_max2",
+            "input_tensors": [{
+                "name": "input_ids",
+                "dims": [self.config.batch_size, self.config.max_sequence_length],
+                "data_type": "int32"
+            }],
+            "output_tensors": [{
+                "name": "logits",
+                "data_type": "float32"
+            }],
+            "backend": self.config.qnn_target,
+            "quantization": self.config.quantization,
+        }
+        with open(config_path, 'w') as f:
+            json.dump(qnn_config, f, indent=2)
+        outputs["config"] = config_path
+        print(f"QNN export prepared. Run {script_path} with QNN SDK installed.")
+        return outputs
+    def _generate_qnn_script(self, onnx_path: str, output_path: str) -> str:
+        """Generate shell script for QNN conversion."""
+        return f'''#!/bin/bash
+# QNN Conversion Script for MiniMind Max2
+# Requires Qualcomm QNN SDK
+# Check QNN SDK
+if [ -z "$QNN_SDK_ROOT" ]; then
+    echo "Error: QNN_SDK_ROOT not set. Please install Qualcomm QNN SDK."
+    exit 1
+fi
+# Convert ONNX to QNN
+$QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-onnx-converter \\
+    --input_network {onnx_path} \\
+    --output_path {output_path}.cpp
+# Compile model library
+$QNN_SDK_ROOT/bin/x86_64-linux-clang/qnn-model-lib-generator \\
+    -c {output_path}.cpp \\
+    -b {output_path}.bin \\
+    -t {self.config.qnn_target}
+echo "QNN model exported to {output_path}.bin"
+'''
+class CoreMLExporter:
+    """Export MiniMind models to Apple Core ML format."""
+    def __init__(self, config: NPUExportConfig):
+        self.config = config
+    def export(
+        self,
+        model: nn.Module,
+        output_path: str,
+        sample_input: Optional[torch.Tensor] = None,
+    ) -> str:
+        """Export model to Core ML format for Apple Neural Engine."""
+        try:
+            import coremltools as ct
+        except ImportError:
+            print("coremltools not installed. Install with: pip install coremltools")
+            return ""
+        model.eval()
+        # Get model config
+        if hasattr(model, 'config'):
+            vocab_size = model.config.vocab_size
+        else:
+            vocab_size = 102400
+        if sample_input is None:
+            sample_input = torch.randint(
+                0, vocab_size,
+                (self.config.batch_size, self.config.max_sequence_length),
+            )
+        # Trace model
+        traced = torch.jit.trace(model, sample_input)
+        # Convert to Core ML
+        mlmodel = ct.convert(
+            traced,
+            inputs=[ct.TensorType(
+                name="input_ids",
+                shape=sample_input.shape,
+                dtype=int,
+            )],
+            compute_units=ct.ComputeUnit.ALL,  # Use Neural Engine when available
+        )
+        # Quantization
+        if self.config.quantization == "float16":
+            mlmodel = ct.models.neural_network.quantization_utils.quantize_weights(
+                mlmodel, nbits=16
+            )
+        elif self.config.quantization == "int8":
+            mlmodel = ct.models.neural_network.quantization_utils.quantize_weights(
+                mlmodel, nbits=8
+            )
+        # Save
+        mlmodel.save(output_path)
+        print(f"Core ML model exported to: {output_path}")
+        return output_path
+class NPUExporter:
+    """Unified NPU export interface."""
+    def __init__(self, config: Optional[NPUExportConfig] = None):
+        self.config = config or NPUExportConfig()
+        self.exporters = {
+            "tflite": TFLiteExporter(self.config),
+            "qnn": QNNExporter(self.config),
+            "coreml": CoreMLExporter(self.config),
+        }
+    def export(
+        self,
+        model: nn.Module,
+        output_path: str,
+        target_platform: Optional[str] = None,
+        sample_input: Optional[torch.Tensor] = None,
+    ) -> Union[str, Dict[str, str]]:
+        """
+        Export model to specified NPU format.
+        Args:
+            model: PyTorch model
+            output_path: Output file path
+            target_platform: Target platform (tflite, qnn, coreml)
+            sample_input: Sample input for tracing
+        Returns:
+            Path(s) to exported model(s)
+        """
+        platform = target_platform or self.config.target_platform
+        if platform not in self.exporters:
+            raise ValueError(f"Unknown platform: {platform}. Supported: {list(self.exporters.keys())}")
+        exporter = self.exporters[platform]
+        return exporter.export(model, output_path, sample_input)
+    def export_all(
+        self,
+        model: nn.Module,
+        output_dir: str,
+        sample_input: Optional[torch.Tensor] = None,
+    ) -> Dict[str, Any]:
+        """Export to all supported formats."""
+        output_dir = Path(output_dir)
+        output_dir.mkdir(parents=True, exist_ok=True)
+        results = {}
+        for platform, exporter in self.exporters.items():
+            try:
+                if platform == "tflite":
+                    path = str(output_dir / "model.tflite")
+                elif platform == "qnn":
+                    path = str(output_dir / "qnn" / "model")
+                elif platform == "coreml":
+                    path = str(output_dir / "model.mlpackage")
+                else:
+                    continue
+                result = exporter.export(model, path, sample_input)
+                results[platform] = {"success": True, "path": result}
+            except Exception as e:
+                results[platform] = {"success": False, "error": str(e)}
+        return results
+def export_for_mobile(
+    model: nn.Module,
+    output_dir: str,
+    platforms: Optional[List[str]] = None,
+    config: Optional[NPUExportConfig] = None,
+) -> Dict[str, Any]:
+    """
+    High-level function to export model for mobile devices.
+    Args:
+        model: PyTorch model
+        output_dir: Output directory
+        platforms: List of target platforms (default: all)
+        config: Export configuration
+    Returns:
+        Dictionary with export results for each platform
+    """
+    config = config or NPUExportConfig()
+    exporter = NPUExporter(config)
+    if platforms is None:
+        return exporter.export_all(model, output_dir)
+    results = {}
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+    for platform in platforms:
+        try:
+            if platform == "tflite":
+                path = str(output_dir / "model.tflite")
+            elif platform == "qnn":
+                path = str(output_dir / "qnn" / "model")
+            elif platform == "coreml":
+                path = str(output_dir / "model.mlpackage")
+            else:
+                continue
+            result = exporter.export(model, path, target_platform=platform)
+            results[platform] = {"success": True, "path": result}
+        except Exception as e:
+            results[platform] = {"success": False, "error": str(e)}
+    return results