From d1b7a934aa8594e7088ffad856e1598329657a79 Mon Sep 17 00:00:00 2001 From: Mohammad Mohtashim <45242107+keenborder786@users.noreply.github.com> Date: Fri, 14 Jun 2024 20:28:24 +0500 Subject: [PATCH] [Community]: HuggingFaceCrossEncoder `score` accounting for pairs. (#22578) - **Description:** Some of the Cross-Encoder models provide scores in pairs, i.e., . However, the `HuggingFaceCrossEncoder` `score` method does not currently take into account the pair situation. This PR addresses this issue by modifying the method to consider only the relevant score if score is being provided in pair. The reason for focusing on the relevant score is that the compressors select the top-n documents based on relevance. - **Issue:** #22556 - Please also refer to this [comment](https://github.com/UKPLab/sentence-transformers/issues/568#issuecomment-729153075) --- .../langchain_community/cross_encoders/huggingface.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libs/community/langchain_community/cross_encoders/huggingface.py b/libs/community/langchain_community/cross_encoders/huggingface.py index 6cfbceff7a..0a1229bb03 100644 --- a/libs/community/langchain_community/cross_encoders/huggingface.py +++ b/libs/community/langchain_community/cross_encoders/huggingface.py @@ -60,4 +60,8 @@ class HuggingFaceCrossEncoder(BaseModel, BaseCrossEncoder): List of scores, one for each pair. """ scores = self.client.predict(text_pairs) + # Somes models e.g bert-multilingual-passage-reranking-msmarco + # gives two score not_relevant and relevant as compare with the query. + if len(scores.shape) > 1: # we are going to get the relevant scores + scores = map(lambda x: x[1], scores) return scores