from transformers import LlamaTokenizerFast # Load the raw spm model tokenizer = LlamaTokenizerFast(vocab_file="/home/aviinashh/projects/Mini-LLM/Tokenizer/BPE/spm.model") # Add your special tokens manually to the HF config part tokenizer.add_special_tokens({ "bos_token": "", "eos_token": "", "unk_token": "", "pad_token": "", "additional_special_tokens": ["", "", ""] }) # Save the json version tokenizer.save_pretrained("Tokenizer/") print("Converted to tokenizer.json successfully!")