From 60c837c58adee87d44cf249f51463ac1cf78538f Mon Sep 17 00:00:00 2001 From: Luk Regarde <46375015+itortouch@users.noreply.github.com> Date: Thu, 6 Apr 2023 18:45:14 +0200 Subject: [PATCH] Fix WhatsAppChatLoader regex pattern for 24 hour time format (#2458) Fix for 24 hour time format bug. Now whatsapp regex is able to parse either 12 or 24 hours time format. Linked [issue](https://github.com/hwchase17/langchain/issues/2457). --- langchain/document_loaders/whatsapp_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/langchain/document_loaders/whatsapp_chat.py b/langchain/document_loaders/whatsapp_chat.py index e7eeeace..bbe12a62 100644 --- a/langchain/document_loaders/whatsapp_chat.py +++ b/langchain/document_loaders/whatsapp_chat.py @@ -28,7 +28,7 @@ class WhatsAppChatLoader(BaseLoader): for line in lines: result = re.match( - r"(\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{1,2} (?:AM|PM)) - (.*?): (.*)", + r"(\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{1,2}(?: AM| PM)?) - (.*?): (.*)", line.strip(), ) if result: