nioushasadjadi
commited on
Commit
·
4a303bd
1
Parent(s):
156a2ea
Change _encode function to encode.
Browse files- tokenizer.py +1 -1
tokenizer.py
CHANGED
|
@@ -36,7 +36,7 @@ class KmerTokenizer(PreTrainedTokenizer):
|
|
| 36 |
splits = [text[i:i + self.k] for i in range(0, len(text) - self.k + 1, self.stride)]
|
| 37 |
return splits
|
| 38 |
|
| 39 |
-
def
|
| 40 |
tokens = self.tokenize(text, **kwargs)
|
| 41 |
token_ids = self.convert_tokens_to_ids(tokens)
|
| 42 |
if kwargs.get('return_tensors') == 'pt':
|
|
|
|
| 36 |
splits = [text[i:i + self.k] for i in range(0, len(text) - self.k + 1, self.stride)]
|
| 37 |
return splits
|
| 38 |
|
| 39 |
+
def encode(self, text, **kwargs):
|
| 40 |
tokens = self.tokenize(text, **kwargs)
|
| 41 |
token_ids = self.convert_tokens_to_ids(tokens)
|
| 42 |
if kwargs.get('return_tensors') == 'pt':
|