diff --git a/docs/extras/integrations/chat/azure_chat_openai.ipynb b/docs/extras/integrations/chat/azure_chat_openai.ipynb
index 2c599973e1..d176996999 100644
--- a/docs/extras/integrations/chat/azure_chat_openai.ipynb
+++ b/docs/extras/integrations/chat/azure_chat_openai.ipynb
@@ -74,6 +74,124 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f27fa24d",
+   "metadata": {},
+   "source": [
+    "## Model Version\n",
+    "Azure OpenAI responses contain `model` property, which is name of the model used to generate the response. However unlike native OpenAI responses, it does not contain the version of the model, which is set on the deplyoment in Azure. This makes it tricky to know which version of the model was used to generate the response, which as result can lead to e.g. wrong total cost calculation with `OpenAICallbackHandler`.\n",
+    "\n",
+    "To solve this problem, you can pass `model_version` parameter to `AzureChatOpenAI` class, which will be added to the model name in the llm output. This way you can easily distinguish between different versions of the model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0531798a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.callbacks import get_openai_callback"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "3fd97dfc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "BASE_URL = \"https://{endpoint}.openai.azure.com\"\n",
+    "API_KEY = \"...\"\n",
+    "DEPLOYMENT_NAME = \"gpt-35-turbo\" # in Azure, this deployment has version 0613 - input and output tokens are counted separately"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "aceddb72",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total Cost (USD): $0.000054\n"
+     ]
+    }
+   ],
+   "source": [
+    "model = AzureChatOpenAI(\n",
+    "    openai_api_base=BASE_URL,\n",
+    "    openai_api_version=\"2023-05-15\",\n",
+    "    deployment_name=DEPLOYMENT_NAME,\n",
+    "    openai_api_key=API_KEY,\n",
+    "    openai_api_type=\"azure\",\n",
+    ")\n",
+    "with get_openai_callback() as cb:\n",
+    "    model(\n",
+    "        [\n",
+    "            HumanMessage(\n",
+    "                content=\"Translate this sentence from English to French. I love programming.\"\n",
+    "            )\n",
+    "        ]\n",
+    "    )\n",
+    "    print(f\"Total Cost (USD): ${format(cb.total_cost, '.6f')}\") # without specifying the model version, flat-rate 0.002 USD per 1k input and output tokens is used\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2e61eefd",
+   "metadata": {},
+   "source": [
+    "We can provide the model version to `AzureChatOpenAI` constructor. It will get appended to the model name returned by Azure OpenAI and cost will be counted correctly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "8d5e54e9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Total Cost (USD): $0.000044\n"
+     ]
+    }
+   ],
+   "source": [
+    "model0613 = AzureChatOpenAI(\n",
+    "    openai_api_base=BASE_URL,\n",
+    "    openai_api_version=\"2023-05-15\",\n",
+    "    deployment_name=DEPLOYMENT_NAME,\n",
+    "    openai_api_key=API_KEY,\n",
+    "    openai_api_type=\"azure\",\n",
+    "    model_version=\"0613\"\n",
+    ")\n",
+    "with get_openai_callback() as cb:\n",
+    "    model0613(\n",
+    "        [\n",
+    "            HumanMessage(\n",
+    "                content=\"Translate this sentence from English to French. I love programming.\"\n",
+    "            )\n",
+    "        ]\n",
+    "    )\n",
+    "    print(f\"Total Cost (USD): ${format(cb.total_cost, '.6f')}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99682534",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -92,7 +210,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,
diff --git a/libs/langchain/langchain/callbacks/openai_info.py b/libs/langchain/langchain/callbacks/openai_info.py
index ba94445c31..2952e99a93 100644
--- a/libs/langchain/langchain/callbacks/openai_info.py
+++ b/libs/langchain/langchain/callbacks/openai_info.py
@@ -31,8 +31,19 @@ MODEL_COST_PER_1K_TOKENS = {
     "gpt-3.5-turbo-0613-completion": 0.002,
     "gpt-3.5-turbo-16k-completion": 0.004,
     "gpt-3.5-turbo-16k-0613-completion": 0.004,
+    # Azure GPT-35 input
+    "gpt-35-turbo": 0.0015,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0301": 0.0015,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0613": 0.0015,
+    "gpt-35-turbo-16k": 0.003,
+    "gpt-35-turbo-16k-0613": 0.003,
+    # Azure GPT-35 output
+    "gpt-35-turbo-completion": 0.002,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0301-completion": 0.002,  # Azure OpenAI version of ChatGPT
+    "gpt-35-turbo-0613-completion": 0.002,
+    "gpt-35-turbo-16k-completion": 0.004,
+    "gpt-35-turbo-16k-0613-completion": 0.004,
     # Others
-    "gpt-35-turbo": 0.002,  # Azure OpenAI version of ChatGPT
     "text-ada-001": 0.0004,
     "ada": 0.0004,
     "text-babbage-001": 0.0005,
@@ -69,7 +80,9 @@ def standardize_model_name(
     if "ft-" in model_name:
         return model_name.split(":")[0] + "-finetuned"
     elif is_completion and (
-        model_name.startswith("gpt-4") or model_name.startswith("gpt-3.5")
+        model_name.startswith("gpt-4")
+        or model_name.startswith("gpt-3.5")
+        or model_name.startswith("gpt-35")
     ):
         return model_name + "-completion"
     else:
diff --git a/libs/langchain/langchain/chat_models/azure_openai.py b/libs/langchain/langchain/chat_models/azure_openai.py
index 69e34646e3..71ca287484 100644
--- a/libs/langchain/langchain/chat_models/azure_openai.py
+++ b/libs/langchain/langchain/chat_models/azure_openai.py
@@ -40,11 +40,20 @@ class AzureChatOpenAI(ChatOpenAI):
 
     Be aware the API version may change.
 
+    You can also specify the version of the model using ``model_version`` constructor
+    parameter, as Azure OpenAI doesn't return model version with the response.
+
+    Default is empty. When you specify the version, it will be appended to the
+    model name in the response. Setting correct version will help you to calculate the
+    cost properly. Model version is not validated, so make sure you set it correctly
+    to get the correct cost.
+
     Any parameters that are valid to be passed to the openai.create call can be passed
     in, even if not explicitly saved on this class.
     """
 
     deployment_name: str = ""
+    model_version: str = ""
     openai_api_type: str = ""
     openai_api_base: str = ""
     openai_api_version: str = ""
@@ -137,7 +146,19 @@ class AzureChatOpenAI(ChatOpenAI):
         for res in response["choices"]:
             if res.get("finish_reason", None) == "content_filter":
                 raise ValueError(
-                    "Azure has not provided the response due to a content"
-                    " filter being triggered"
+                    "Azure has not provided the response due to a content filter "
+                    "being triggered"
                 )
-        return super()._create_chat_result(response)
+        chat_result = super()._create_chat_result(response)
+
+        if "model" in response:
+            model = response["model"]
+            if self.model_version:
+                model = f"{model}-{self.model_version}"
+
+            if chat_result.llm_output is not None and isinstance(
+                chat_result.llm_output, dict
+            ):
+                chat_result.llm_output["model_name"] = model
+
+        return chat_result
diff --git a/libs/langchain/tests/unit_tests/callbacks/test_openai_info.py b/libs/langchain/tests/unit_tests/callbacks/test_openai_info.py
index 19b5542446..1fa62a74ad 100644
--- a/libs/langchain/tests/unit_tests/callbacks/test_openai_info.py
+++ b/libs/langchain/tests/unit_tests/callbacks/test_openai_info.py
@@ -60,3 +60,67 @@ def test_on_llm_end_finetuned_model(handler: OpenAICallbackHandler) -> None:
     )
     handler.on_llm_end(response)
     assert handler.total_cost > 0
+
+
+@pytest.mark.parametrize(
+    "model_name,expected_cost",
+    [
+        ("gpt-35-turbo", 0.0035),
+        ("gpt-35-turbo-0301", 0.0035),
+        (
+            "gpt-35-turbo-0613",
+            0.0035,
+        ),
+        (
+            "gpt-35-turbo-16k-0613",
+            0.007,
+        ),
+        (
+            "gpt-35-turbo-16k",
+            0.007,
+        ),
+        ("gpt-4", 0.09),
+        ("gpt-4-0314", 0.09),
+        ("gpt-4-0613", 0.09),
+        ("gpt-4-32k", 0.18),
+        ("gpt-4-32k-0314", 0.18),
+        ("gpt-4-32k-0613", 0.18),
+    ],
+)
+def test_on_llm_end_azure_openai(
+    handler: OpenAICallbackHandler, model_name: str, expected_cost: float
+) -> None:
+    response = LLMResult(
+        generations=[],
+        llm_output={
+            "token_usage": {
+                "prompt_tokens": 1000,
+                "completion_tokens": 1000,
+                "total_tokens": 2000,
+            },
+            "model_name": model_name,
+        },
+    )
+    handler.on_llm_end(response)
+    assert handler.total_cost == expected_cost
+
+
+@pytest.mark.parametrize(
+    "model_name", ["gpt-35-turbo-16k-0301", "gpt-4-0301", "gpt-4-32k-0301"]
+)
+def test_on_llm_end_no_cost_invalid_model(
+    handler: OpenAICallbackHandler, model_name: str
+) -> None:
+    response = LLMResult(
+        generations=[],
+        llm_output={
+            "token_usage": {
+                "prompt_tokens": 1000,
+                "completion_tokens": 1000,
+                "total_tokens": 2000,
+            },
+            "model_name": model_name,
+        },
+    )
+    handler.on_llm_end(response)
+    assert handler.total_cost == 0
diff --git a/libs/langchain/tests/unit_tests/chat_models/test_azureopenai.py b/libs/langchain/tests/unit_tests/chat_models/test_azureopenai.py
new file mode 100644
index 0000000000..94a8e6d44d
--- /dev/null
+++ b/libs/langchain/tests/unit_tests/chat_models/test_azureopenai.py
@@ -0,0 +1,52 @@
+import json
+import os
+from typing import Any, Mapping, cast
+
+import pytest
+
+from langchain.chat_models.azure_openai import AzureChatOpenAI
+
+os.environ["OPENAI_API_KEY"] = "test"
+os.environ["OPENAI_API_BASE"] = "https://oai.azure.com/"
+os.environ["OPENAI_API_VERSION"] = "2023-05-01"
+
+
+@pytest.mark.requires("openai")
+@pytest.mark.parametrize(
+    "model_name", ["gpt-4", "gpt-4-32k", "gpt-35-turbo", "gpt-35-turbo-16k"]
+)
+def test_model_name_set_on_chat_result_when_present_in_response(
+    model_name: str,
+) -> None:
+    sample_response_text = f"""
+    {{
+        "id": "chatcmpl-7ryweq7yc8463fas879t9hdkkdf",
+        "object": "chat.completion",
+        "created": 1690381189,
+        "model": "{model_name}",
+        "choices": [
+            {{
+                "index": 0,
+                "finish_reason": "stop",
+                "message": {{
+                    "role": "assistant",
+                    "content": "I'm an AI assistant that can help you."
+                }}
+            }}
+        ],
+        "usage": {{
+            "completion_tokens": 28,
+            "prompt_tokens": 15,
+            "total_tokens": 43
+        }}
+    }}
+    """
+    # convert sample_response_text to instance of Mapping[str, Any]
+    sample_response = json.loads(sample_response_text)
+    mock_response = cast(Mapping[str, Any], sample_response)
+    mock_chat = AzureChatOpenAI()
+    chat_result = mock_chat._create_chat_result(mock_response)
+    assert (
+        chat_result.llm_output is not None
+        and chat_result.llm_output["model_name"] == model_name
+    )