Skip to content

Commit

Permalink
Add tokenize detokenize compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
ParthSareen committed Dec 16, 2024
1 parent 70dd0b7 commit 422724e
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 0 deletions.
10 changes: 10 additions & 0 deletions examples/tokenization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import ollama

# Get tokens from a model
response = ollama.tokenize(model='llama3.2', text='Hello world!')
tokens = response.tokens
print('tokens from model', tokens)

# Convert tokens back to text
response = ollama.detokenize(model='llama3.2', tokens=tokens)
print('text from tokens', response.text) # Prints: Hello world!
6 changes: 6 additions & 0 deletions ollama/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
ListResponse,
ShowResponse,
ProcessResponse,
TokenizeResponse,
DetokenizeResponse,
RequestError,
ResponseError,
)
Expand All @@ -31,6 +33,8 @@
'ListResponse',
'ShowResponse',
'ProcessResponse',
'TokenizeResponse',
'DetokenizeResponse',
'RequestError',
'ResponseError',
]
Expand All @@ -49,3 +53,5 @@
copy = _client.copy
show = _client.show
ps = _client.ps
tokenize = _client.tokenize
detokenize = _client.detokenize
48 changes: 48 additions & 0 deletions ollama/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
CreateRequest,
CopyRequest,
DeleteRequest,
DetokenizeRequest,
DetokenizeResponse,
EmbedRequest,
EmbedResponse,
EmbeddingsRequest,
Expand All @@ -67,6 +69,8 @@
ShowRequest,
ShowResponse,
StatusResponse,
TokenizeRequest,
TokenizeResponse,
Tool,
)

Expand Down Expand Up @@ -611,6 +615,28 @@ def ps(self) -> ProcessResponse:
'/api/ps',
)

def tokenize(self, model: str, text: str) -> TokenizeResponse:
return self._request(
TokenizeResponse,
'POST',
'/api/tokenize',
json=TokenizeRequest(
model=model,
text=text,
).model_dump(exclude_none=True),
)

def detokenize(self, model: str, tokens: Sequence[int]) -> DetokenizeResponse:
return self._request(
DetokenizeResponse,
'POST',
'/api/detokenize',
json=DetokenizeRequest(
model=model,
tokens=tokens,
).model_dump(exclude_none=True),
)


class AsyncClient(BaseClient):
def __init__(self, host: Optional[str] = None, **kwargs) -> None:
Expand Down Expand Up @@ -1120,6 +1146,28 @@ async def ps(self) -> ProcessResponse:
'/api/ps',
)

async def tokenize(self, model: str, text: str) -> TokenizeResponse:
return await self._request(
TokenizeResponse,
'POST',
'/api/tokenize',
json=TokenizeRequest(
model=model,
text=text,
).model_dump(exclude_none=True),
)

async def detokenize(self, model: str, tokens: Sequence[int]) -> DetokenizeResponse:
return await self._request(
DetokenizeResponse,
'POST',
'/api/detokenize',
json=DetokenizeRequest(
model=model,
tokens=tokens,
).model_dump(exclude_none=True),
)


def _copy_messages(messages: Optional[Sequence[Union[Mapping[str, Any], Message]]]) -> Iterator[Message]:
for message in messages or []:
Expand Down
18 changes: 18 additions & 0 deletions ollama/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,24 @@ class Model(SubscriptableBaseModel):
models: Sequence[Model]


class TokenizeRequest(BaseRequest):
model: str
text: str


class TokenizeResponse(BaseGenerateResponse):
tokens: Sequence[int]


class DetokenizeRequest(BaseRequest):
model: str
tokens: Sequence[int]


class DetokenizeResponse(BaseGenerateResponse):
text: str


class RequestError(Exception):
"""
Common class for request errors.
Expand Down

0 comments on commit 422724e

Please sign in to comment.