mirror of https://github.com/hwchase17/langchain
Use correct tokenizer for Bedrock/Anthropic LLMs (#11561)
**Description** This PR implements the usage of the correct tokenizer in Bedrock LLMs, if using anthropic models. **Issue:** #11560 **Dependencies:** optional dependency on `anthropic` python library. **Twitter handle:** jtolgyesi --------- Co-authored-by: Bagatur <baskaryan@gmail.com>pull/11718/head
parent
467b082c34
commit
15687a28d5
@ -0,0 +1,25 @@
|
||||
from typing import Any, List
|
||||
|
||||
|
||||
def _get_anthropic_client() -> Any:
|
||||
try:
|
||||
import anthropic
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import anthropic python package. "
|
||||
"This is needed in order to accurately tokenize the text "
|
||||
"for anthropic models. Please install it with `pip install anthropic`."
|
||||
)
|
||||
return anthropic.Anthropic()
|
||||
|
||||
|
||||
def get_num_tokens_anthropic(text: str) -> int:
|
||||
client = _get_anthropic_client()
|
||||
return client.count_tokens(text=text)
|
||||
|
||||
|
||||
def get_token_ids_anthropic(text: str) -> List[int]:
|
||||
client = _get_anthropic_client()
|
||||
tokenizer = client.get_tokenizer()
|
||||
encoded_text = tokenizer.encode(text)
|
||||
return encoded_text.ids
|
Loading…
Reference in New Issue