diff --git a/libs/partners/openai/langchain_openai/chat_models/base.py b/libs/partners/openai/langchain_openai/chat_models/base.py index b5b7199edb..ed33b919bd 100644 --- a/libs/partners/openai/langchain_openai/chat_models/base.py +++ b/libs/partners/openai/langchain_openai/chat_models/base.py @@ -572,19 +572,9 @@ class ChatOpenAI(BaseChatModel): model = self.tiktoken_model_name else: model = self.model_name - if model == "gpt-3.5-turbo": - # gpt-3.5-turbo may change over time. - # Returning num tokens assuming gpt-3.5-turbo-0301. - model = "gpt-3.5-turbo-0301" - elif model == "gpt-4": - # gpt-4 may change over time. - # Returning num tokens assuming gpt-4-0314. - model = "gpt-4-0314" - # Returns the number of tokens used by a list of messages. try: encoding = tiktoken.encoding_for_model(model) except KeyError: - logger.warning("Warning: model not found. Using cl100k_base encoding.") model = "cl100k_base" encoding = tiktoken.get_encoding(model) return model, encoding diff --git a/libs/partners/openai/langchain_openai/embeddings/base.py b/libs/partners/openai/langchain_openai/embeddings/base.py index cd0987ea43..d0b815358e 100644 --- a/libs/partners/openai/langchain_openai/embeddings/base.py +++ b/libs/partners/openai/langchain_openai/embeddings/base.py @@ -289,9 +289,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): try: encoding = tiktoken.encoding_for_model(model_name) except KeyError: - logger.warning("Warning: model not found. Using cl100k_base encoding.") - model = "cl100k_base" - encoding = tiktoken.get_encoding(model) + encoding = tiktoken.get_encoding("cl100k_base") for i, text in enumerate(texts): if self.model.endswith("001"): # See: https://github.com/openai/openai-python/ diff --git a/libs/partners/openai/langchain_openai/llms/base.py b/libs/partners/openai/langchain_openai/llms/base.py index 27c2afd5ee..9b0ba87d1f 100644 --- a/libs/partners/openai/langchain_openai/llms/base.py +++ b/libs/partners/openai/langchain_openai/llms/base.py @@ -496,9 +496,7 @@ class BaseOpenAI(BaseLLM): try: enc = tiktoken.encoding_for_model(model_name) except KeyError: - logger.warning("Warning: model not found. Using cl100k_base encoding.") - model = "cl100k_base" - enc = tiktoken.get_encoding(model) + enc = tiktoken.get_encoding("cl100k_base") return enc.encode( text, diff --git a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py index 767ce0720c..b729664e6b 100644 --- a/libs/partners/openai/tests/unit_tests/chat_models/test_base.py +++ b/libs/partners/openai/tests/unit_tests/chat_models/test_base.py @@ -118,3 +118,19 @@ async def test_openai_apredict(mock_completion: dict) -> None: res = llm.predict("bar") assert res == "Bar Baz" assert completed + + +@pytest.mark.parametrize( + "model", + [ + "gpt-3.5-turbo", + "gpt-4", + "gpt-3.5-0125", + "gpt-4-0125-preview", + "gpt-4-turbo-preview", + "gpt-4-vision-preview", + ], +) +def test__get_encoding_model(model: str) -> None: + ChatOpenAI(model=model)._get_encoding_model() + return diff --git a/libs/partners/openai/tests/unit_tests/llms/test_base.py b/libs/partners/openai/tests/unit_tests/llms/test_base.py index ebcd420995..d05bb0bfe5 100644 --- a/libs/partners/openai/tests/unit_tests/llms/test_base.py +++ b/libs/partners/openai/tests/unit_tests/llms/test_base.py @@ -7,7 +7,6 @@ from langchain_openai import OpenAI os.environ["OPENAI_API_KEY"] = "foo" -@pytest.mark.requires("openai") def test_openai_model_param() -> None: llm = OpenAI(model="foo") assert llm.model_name == "foo" @@ -15,19 +14,16 @@ def test_openai_model_param() -> None: assert llm.model_name == "foo" -@pytest.mark.requires("openai") def test_openai_model_kwargs() -> None: llm = OpenAI(model_kwargs={"foo": "bar"}) assert llm.model_kwargs == {"foo": "bar"} -@pytest.mark.requires("openai") def test_openai_invalid_model_kwargs() -> None: with pytest.raises(ValueError): OpenAI(model_kwargs={"model_name": "foo"}) -@pytest.mark.requires("openai") def test_openai_incorrect_field() -> None: with pytest.warns(match="not default parameter"): llm = OpenAI(foo="bar") @@ -46,3 +42,15 @@ def mock_completion() -> dict: ], "usage": {"prompt_tokens": 1, "completion_tokens": 2, "total_tokens": 3}, } + + +@pytest.mark.parametrize( + "model", + [ + "gpt-3.5-turbo-instruct", + "text-davinci-003", + ], +) +def test_get_token_ids(model: str) -> None: + OpenAI(model=model).get_token_ids("foo") + return