mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Fix: SnowflakeLoader returning empty documents (#5967)
**Fix SnowflakeLoader's Behavior of Returning Empty Documents** **Description:** This PR addresses the issue where the SnowflakeLoader was consistently returning empty documents. After investigation, it was found that the query method within the SnowflakeLoader was not properly fetching and processing the data. **Changes:** 1. Modified the query method in SnowflakeLoader to handle data fetch and processing more accurately. 2. Enhanced error handling within the SnowflakeLoader to catch and log potential issues that may arise during data loading. **Impact:** This fix will ensure the SnowflakeLoader reliably returns the expected documents instead of empty ones, improving the efficiency and reliability of data processing tasks in the LangChain project. Before Fix: `[ Document(page_content='', metadata={}), Document(page_content='', metadata={}), Document(page_content='', metadata={}), Document(page_content='', metadata={}), Document(page_content='', metadata={}), Document(page_content='', metadata={}), Document(page_content='', metadata={}), Document(page_content='', metadata={}), Document(page_content='', metadata={}), Document(page_content='', metadata={}) ]` After Fix: `[Document(page_content='CUSTOMER_ID: 1\nFIRST_NAME: John\nLAST_NAME: Doe\nEMAIL: john.doe@example.com\nPHONE: 555-123-4567\nADDRESS: 123 Elm St, San Francisco, CA 94102', metadata={}), Document(page_content='CUSTOMER_ID: 2\nFIRST_NAME: Jane\nLAST_NAME: Doe\nEMAIL: jane.doe@example.com\nPHONE: 555-987-6543\nADDRESS: 456 Oak St, San Francisco, CA 94103', metadata={}), Document(page_content='CUSTOMER_ID: 3\nFIRST_NAME: Michael\nLAST_NAME: Smith\nEMAIL: michael.smith@example.com\nPHONE: 555-234-5678\nADDRESS: 789 Pine St, San Francisco, CA 94104', metadata={}), Document(page_content='CUSTOMER_ID: 4\nFIRST_NAME: Emily\nLAST_NAME: Johnson\nEMAIL: emily.johnson@example.com\nPHONE: 555-345-6789\nADDRESS: 321 Maple St, San Francisco, CA 94105', metadata={}), Document(page_content='CUSTOMER_ID: 5\nFIRST_NAME: David\nLAST_NAME: Williams\nEMAIL: david.williams@example.com\nPHONE: 555-456-7890\nADDRESS: 654 Birch St, San Francisco, CA 94106', metadata={}), Document(page_content='CUSTOMER_ID: 6\nFIRST_NAME: Emma\nLAST_NAME: Jones\nEMAIL: emma.jones@example.com\nPHONE: 555-567-8901\nADDRESS: 987 Cedar St, San Francisco, CA 94107', metadata={}), Document(page_content='CUSTOMER_ID: 7\nFIRST_NAME: Oliver\nLAST_NAME: Brown\nEMAIL: oliver.brown@example.com\nPHONE: 555-678-9012\nADDRESS: 147 Cherry St, San Francisco, CA 94108', metadata={}), Document(page_content='CUSTOMER_ID: 8\nFIRST_NAME: Sophia\nLAST_NAME: Davis\nEMAIL: sophia.davis@example.com\nPHONE: 555-789-0123\nADDRESS: 369 Walnut St, San Francisco, CA 94109', metadata={}), Document(page_content='CUSTOMER_ID: 9\nFIRST_NAME: James\nLAST_NAME: Taylor\nEMAIL: james.taylor@example.com\nPHONE: 555-890-1234\nADDRESS: 258 Hawthorn St, San Francisco, CA 94110', metadata={}), Document(page_content='CUSTOMER_ID: 10\nFIRST_NAME: Isabella\nLAST_NAME: Wilson\nEMAIL: isabella.wilson@example.com\nPHONE: 555-901-2345\nADDRESS: 963 Aspen St, San Francisco, CA 94111', metadata={})] ` **Tests:** All unit and integration tests have been run and passed successfully. Additional tests were added to validate the new behavior of the SnowflakeLoader. **Checklist:** - [x] Code changes are covered by tests - [x] Code passes `make format` and `make lint` - [x] This PR does not introduce any breaking changes Please review and let me know if any changes are required.
This commit is contained in:
parent
62ec10a7f5
commit
d6f5d0c6b1
@ -53,13 +53,14 @@ class SnowflakeLoader(BaseLoader):
|
||||
self.database = database
|
||||
self.schema = schema
|
||||
self.parameters = parameters
|
||||
self.page_content_columns = page_content_columns
|
||||
self.metadata_columns = metadata_columns
|
||||
self.page_content_columns = (
|
||||
page_content_columns if page_content_columns is not None else ["*"]
|
||||
)
|
||||
self.metadata_columns = metadata_columns if metadata_columns is not None else []
|
||||
|
||||
def _execute_query(self) -> List[Dict[str, Any]]:
|
||||
try:
|
||||
import snowflake.connector
|
||||
from snowflake.connector import DictCursor
|
||||
except ImportError as ex:
|
||||
raise ValueError(
|
||||
"Could not import snowflake-connector-python package. "
|
||||
@ -77,14 +78,13 @@ class SnowflakeLoader(BaseLoader):
|
||||
parameters=self.parameters,
|
||||
)
|
||||
try:
|
||||
cur = conn.cursor(DictCursor)
|
||||
cur = conn.cursor()
|
||||
cur.execute("USE DATABASE " + self.database)
|
||||
cur.execute("USE SCHEMA " + self.schema)
|
||||
cur.execute(self.query, self.parameters)
|
||||
query_result = cur.fetchall()
|
||||
query_result = [
|
||||
{k.lower(): v for k, v in item.items()} for item in query_result
|
||||
]
|
||||
column_names = [column[0] for column in cur.description]
|
||||
query_result = [dict(zip(column_names, row)) for row in query_result]
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
query_result = []
|
||||
@ -111,6 +111,8 @@ class SnowflakeLoader(BaseLoader):
|
||||
print(f"An error occurred during the query: {query_result}")
|
||||
return []
|
||||
page_content_columns, metadata_columns = self._get_columns(query_result)
|
||||
if "*" in page_content_columns:
|
||||
page_content_columns = list(query_result[0].keys())
|
||||
for row in query_result:
|
||||
page_content = "\n".join(
|
||||
f"{k}: {v}" for k, v in row.items() if k in page_content_columns
|
||||
@ -120,4 +122,5 @@ class SnowflakeLoader(BaseLoader):
|
||||
yield doc
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load data into document objects."""
|
||||
return list(self.lazy_load())
|
||||
|
Loading…
Reference in New Issue
Block a user