EasyOCR-onnx / latin_g2_jpqd.yaml
asmud's picture
Initial release: EasyOCR ONNX models with JPQD quantization
c1ac2fb
name: latin_g2_jpqd
description: Latin script text recognition model (CRNN) optimized with JPQD quantization
framework: ONNX
task: text-recognition
domain: computer-vision
subdomain: optical-character-recognition
model_info:
architecture: CRNN (CNN + BiLSTM + CTC)
language: Latin script languages
supported_languages:
- English
- Spanish
- French
- German
- Italian
- Portuguese
- Dutch
- Polish
- Czech
- Romanian
- And other Latin-based languages
num_classes: 352
original_source: EasyOCR
optimization: JPQD quantization
specifications:
input_shape: [1, 1, 32, 100]
input_type: float32
input_format: Grayscale
output_shape: [1, 25, 352] # sequence_length x num_classes
output_type: float32
batch_size: dynamic
sequence_length: 25
performance:
original_size_mb: 14.7
optimized_size_mb: 8.5
compression_ratio: 3.97
inference_time_cpu_ms: ~12
accuracy_retention: ">95%"
deployment:
runtime: onnxruntime
hardware: CPU-optimized
precision: INT8 weights, FP32 activations
memory_usage_mb: ~15
usage:
preprocessing:
- Convert to grayscale
- Resize to 32x100 (height x width)
- Normalize to [0,1]
- Add batch and channel dimensions
postprocessing:
- Apply CTC decoding
- Convert indices to characters
- Remove blank tokens and duplicates
supported_characters:
basic_latin: "a-z, A-Z, 0-9"
latin_extended: "À-ÿ (Latin-1 Supplement)"
punctuation: "Standard punctuation marks"
symbols: "Common symbols and currency"
diacritics: "Accented characters for European languages"
character_coverage:
- "Basic Latin (U+0020-U+007F)"
- "Latin-1 Supplement (U+0080-U+00FF)"
- "Latin Extended-A (U+0100-U+017F)"
- "Latin Extended-B (U+0180-U+024F)"
- "Combining Diacritical Marks (U+0300-U+036F)"
training_data:
type: Multilingual synthetic and real text images
languages: Multiple Latin script languages
domains: Documents, natural scenes, printed text, handwriting
use_cases:
- Multilingual document processing
- European language OCR
- International text recognition
- Multilingual forms processing
license: apache-2.0
tags:
- text-recognition
- latin
- multilingual
- crnn
- lstm
- ocr
- onnx
- quantized
- jpqd
- european-languages