community: Add llm-extraction option to FireCrawl Document Loader (#25231)

**Description:** This minor PR aims to add `llm_extraction` to Firecrawl loader. This feature is supported on API and PythonSDK, but the langchain loader omits adding this to the response. **Twitter handle:** [scalable_pizza](https://x.com/scalablepizza) --------- Co-authored-by: Chester Curme <chester.curme@gmail.com>
2024-11-10 01:10:59 +00:00 · 2024-08-09 19:29:10 +05:30 · 2024-08-09 19:29:10 +05:30 · 66b7206ab6
commit 66b7206ab6
parent c81c77b465
1 changed files with 7 additions and 4 deletions
--- a/libs/community/langchain_community/document_loaders/firecrawl.py
+++ b/libs/community/langchain_community/document_loaders/firecrawl.py
@ -63,7 +63,10 @@ class FireCrawlLoader(BaseLoader):
                f"Unrecognized mode '{self.mode}'. Expected one of 'crawl', 'scrape'."
            )
        for doc in firecrawl_docs:
-            yield Document(
-                page_content=doc.get("markdown", ""),
-                metadata=doc.get("metadata", {}),
-            )
+            metadata = doc.get("metadata", {})
+            if (self.params is not None) and self.params.get(
+                "extractorOptions", {}
+            ).get("mode") == "llm-extraction":
+                metadata["llm_extraction"] = doc.get("llm_extraction")
+
+            yield Document(page_content=doc.get("markdown", ""), metadata=metadata)