|
|
name: latin_g2_jpqd |
|
|
description: Latin script text recognition model (CRNN) optimized with JPQD quantization |
|
|
framework: ONNX |
|
|
task: text-recognition |
|
|
domain: computer-vision |
|
|
subdomain: optical-character-recognition |
|
|
|
|
|
model_info: |
|
|
architecture: CRNN (CNN + BiLSTM + CTC) |
|
|
language: Latin script languages |
|
|
supported_languages: |
|
|
- English |
|
|
- Spanish |
|
|
- French |
|
|
- German |
|
|
- Italian |
|
|
- Portuguese |
|
|
- Dutch |
|
|
- Polish |
|
|
- Czech |
|
|
- Romanian |
|
|
- And other Latin-based languages |
|
|
num_classes: 352 |
|
|
original_source: EasyOCR |
|
|
optimization: JPQD quantization |
|
|
|
|
|
specifications: |
|
|
input_shape: [1, 1, 32, 100] |
|
|
input_type: float32 |
|
|
input_format: Grayscale |
|
|
output_shape: [1, 25, 352] |
|
|
output_type: float32 |
|
|
batch_size: dynamic |
|
|
sequence_length: 25 |
|
|
|
|
|
performance: |
|
|
original_size_mb: 14.7 |
|
|
optimized_size_mb: 8.5 |
|
|
compression_ratio: 3.97 |
|
|
inference_time_cpu_ms: ~12 |
|
|
accuracy_retention: ">95%" |
|
|
|
|
|
deployment: |
|
|
runtime: onnxruntime |
|
|
hardware: CPU-optimized |
|
|
precision: INT8 weights, FP32 activations |
|
|
memory_usage_mb: ~15 |
|
|
|
|
|
usage: |
|
|
preprocessing: |
|
|
- Convert to grayscale |
|
|
- Resize to 32x100 (height x width) |
|
|
- Normalize to [0,1] |
|
|
- Add batch and channel dimensions |
|
|
postprocessing: |
|
|
- Apply CTC decoding |
|
|
- Convert indices to characters |
|
|
- Remove blank tokens and duplicates |
|
|
|
|
|
supported_characters: |
|
|
basic_latin: "a-z, A-Z, 0-9" |
|
|
latin_extended: "À-ÿ (Latin-1 Supplement)" |
|
|
punctuation: "Standard punctuation marks" |
|
|
symbols: "Common symbols and currency" |
|
|
diacritics: "Accented characters for European languages" |
|
|
|
|
|
character_coverage: |
|
|
- "Basic Latin (U+0020-U+007F)" |
|
|
- "Latin-1 Supplement (U+0080-U+00FF)" |
|
|
- "Latin Extended-A (U+0100-U+017F)" |
|
|
- "Latin Extended-B (U+0180-U+024F)" |
|
|
- "Combining Diacritical Marks (U+0300-U+036F)" |
|
|
|
|
|
training_data: |
|
|
type: Multilingual synthetic and real text images |
|
|
languages: Multiple Latin script languages |
|
|
domains: Documents, natural scenes, printed text, handwriting |
|
|
|
|
|
use_cases: |
|
|
- Multilingual document processing |
|
|
- European language OCR |
|
|
- International text recognition |
|
|
- Multilingual forms processing |
|
|
|
|
|
license: apache-2.0 |
|
|
tags: |
|
|
- text-recognition |
|
|
- latin |
|
|
- multilingual |
|
|
- crnn |
|
|
- lstm |
|
|
- ocr |
|
|
- onnx |
|
|
- quantized |
|
|
- jpqd |
|
|
- european-languages |