@ -2,7 +2,6 @@
from __future__ import annotations
from __future__ import annotations
import re
import re
import traceback
from typing import TYPE_CHECKING , Any , Dict , List , Optional , Sequence , Tuple , Union
from typing import TYPE_CHECKING , Any , Dict , List , Optional , Sequence , Tuple , Union
from langchain_core . pydantic_v1 import BaseModel , Field , root_validator
from langchain_core . pydantic_v1 import BaseModel , Field , root_validator
@ -32,9 +31,10 @@ class CassandraDatabase:
include_tables : Optional [ List [ str ] ] = None ,
include_tables : Optional [ List [ str ] ] = None ,
cassio_init_kwargs : Optional [ Dict [ str , Any ] ] = None ,
cassio_init_kwargs : Optional [ Dict [ str , Any ] ] = None ,
) :
) :
self . _session = self . _resolve_session ( session , cassio_init_kwargs )
_session = self . _resolve_session ( session , cassio_init_kwargs )
if not self . _session :
if not _session :
raise ValueError ( " Session not provided and cannot be resolved " )
raise ValueError ( " Session not provided and cannot be resolved " )
self . _session = _session
self . _exclude_keyspaces = IGNORED_KEYSPACES
self . _exclude_keyspaces = IGNORED_KEYSPACES
self . _exclude_tables = exclude_tables or [ ]
self . _exclude_tables = exclude_tables or [ ]
@ -44,52 +44,28 @@ class CassandraDatabase:
self ,
self ,
query : str ,
query : str ,
fetch : str = " all " ,
fetch : str = " all " ,
include_columns : bool = False ,
* * kwargs : Any ,
* * kwargs : Any ,
) - > Union [ str , Sequence[ Dict[ str , Any ] ] , ResultSet ] :
) - > Union [ li st, Dict[ str , Any ] , ResultSet ] :
""" Execute a CQL query and return the results. """
""" Execute a CQL query and return the results. """
clean_query = self . _validate_cql ( query , " SELECT " )
result = self . _session . execute ( clean_query , * * kwargs )
if fetch == " all " :
if fetch == " all " :
return list ( result )
return self . fetch_all ( query , * * kwargs )
elif fetch == " one " :
elif fetch == " one " :
return result . one ( ) . _asdict ( ) if result else { }
return self . fetch_one ( query , * * kwargs )
elif fetch == " cursor " :
elif fetch == " cursor " :
return result
return self . _fetch ( query , * * kwargs )
else :
else :
raise ValueError ( " Fetch parameter must be either ' one ' , ' all ' , or ' cursor ' " )
raise ValueError ( " Fetch parameter must be either ' one ' , ' all ' , or ' cursor ' " )
def run_no_throw (
def _fetch ( self , query : str , * * kwargs : Any ) - > ResultSet :
self ,
clean_query = self . _validate_cql ( query , " SELECT " )
query : str ,
return self . _session . execute ( clean_query , * * kwargs )
fetch : str = " all " ,
include_columns : bool = False ,
* * kwargs : Any ,
) - > Union [ str , Sequence [ Dict [ str , Any ] ] , ResultSet ] :
""" Execute a CQL query and return the results or an error message. """
try :
return self . run ( query , fetch , include_columns , * * kwargs )
except Exception as e :
""" Format the error message """
return f " Error: { e } \n { traceback . format_exc ( ) } "
def get_keyspace_tables_str_no_throw ( self , keyspace : str ) - > str :
""" Get the tables for the specified keyspace. """
try :
schema_string = self . get_keyspace_tables_str ( keyspace )
return schema_string
except Exception as e :
""" Format the error message """
return f " Error: { e } \n { traceback . format_exc ( ) } "
def get_keyspace_tables_str ( self , keyspace : str ) - > str :
def fetch_all ( self , query : str , * * kwargs : Any ) - > list :
""" Get the tables for the specified keyspace. """
return list ( self . _fetch ( query , * * kwargs ) )
tables = self . get_keyspace_tables ( keyspace )
schema_string = " "
for table in tables :
schema_string + = table . as_markdown ( ) + " \n \n "
return schema_string
def fetch_one ( self , query : str , * * kwargs : Any ) - > Dict [ str , Any ] :
result = self . _fetch ( query , * * kwargs )
return result . one ( ) . _asdict ( ) if result else { }
def get_keyspace_tables ( self , keyspace : str ) - > List [ Table ] :
def get_keyspace_tables ( self , keyspace : str ) - > List [ Table ] :
""" Get the Table objects for the specified keyspace. """
""" Get the Table objects for the specified keyspace. """
@ -99,17 +75,6 @@ class CassandraDatabase:
else :
else :
return [ ]
return [ ]
def get_table_data_no_throw (
self , keyspace : str , table : str , predicate : str , limit : int
) - > str :
""" Get data from the specified table in the specified keyspace. Optionally can
take a predicate for the WHERE clause and a limit . """
try :
return self . get_table_data ( keyspace , table , predicate , limit )
except Exception as e :
""" Format the error message """
return f " Error: { e } \n { traceback . format_exc ( ) } "
# This is a more basic string building function that doesn't use a query builder
# This is a more basic string building function that doesn't use a query builder
# or prepared statements
# or prepared statements
# TODO: Refactor to use prepared statements
# TODO: Refactor to use prepared statements
@ -127,7 +92,7 @@ class CassandraDatabase:
query + = " ; "
query + = " ; "
result = self . run( query , fetch = " all " )
result = self . fetch_all( query )
data = " \n " . join ( str ( row ) for row in result )
data = " \n " . join ( str ( row ) for row in result )
return data
return data
@ -144,15 +109,13 @@ class CassandraDatabase:
by iterating over all tables within that keyspace and calling their
by iterating over all tables within that keyspace and calling their
as_markdown method .
as_markdown method .
Parameters :
Args :
- keyspace ( str ) : The name of the keyspace to generate markdown
keyspace : The name of the keyspace to generate markdown documentation for .
documentation for .
tables : list of tables in the keyspace ; it will be resolved if not provided .
- tables ( list [ Table ] ) : list of tables in the keyspace ; it will be resolved
if not provided .
Returns :
Returns :
A string containing the markdown representation of the specified
A string containing the markdown representation of the specified
keyspace schema .
keyspace schema .
"""
"""
if not tables :
if not tables :
tables = self . get_keyspace_tables ( keyspace )
tables = self . get_keyspace_tables ( keyspace )
@ -184,10 +147,10 @@ class CassandraDatabase:
the subset of keyspaces that have been resolved in this instance .
the subset of keyspaces that have been resolved in this instance .
Returns :
Returns :
A markdown string that documents the schema of all resolved keyspaces and
A markdown string that documents the schema of all resolved keyspaces and
their tables within this CassandraDatabase instance . This includes keyspace
their tables within this CassandraDatabase instance . This includes keyspace
names , table names , comments , columns , partition keys , clustering keys ,
names , table names , comments , columns , partition keys , clustering keys ,
and indexes for each table .
and indexes for each table .
"""
"""
schema = self . _resolve_schema ( )
schema = self . _resolve_schema ( )
output = " # Cassandra Database Schema \n \n "
output = " # Cassandra Database Schema \n \n "
@ -201,18 +164,18 @@ class CassandraDatabase:
Ensures that ` cql ` starts with the specified type ( e . g . , SELECT ) and does
Ensures that ` cql ` starts with the specified type ( e . g . , SELECT ) and does
not contain content that could indicate CQL injection vulnerabilities .
not contain content that could indicate CQL injection vulnerabilities .
Parameter s:
Arg s:
- cql ( str ) : The CQL query string to be validated .
cql : The CQL query string to be validated .
- type ( str ) : The expected starting keyword of the query , used to verify
type : The expected starting keyword of the query , used to verify
that the query begins with the correct operation type
that the query begins with the correct operation type
( e . g . , " SELECT " , " UPDATE " ) . Defaults to " SELECT " .
( e . g . , " SELECT " , " UPDATE " ) . Defaults to " SELECT " .
Returns :
Returns :
- str : The trimmed and validated CQL query string without a trailing semicolon .
The trimmed and validated CQL query string without a trailing semicolon .
Raises :
Raises :
- ValueError : If the value of ` type ` is not supported
ValueError : If the value of ` type ` is not supported
- DatabaseError : If ` cql ` is considered unsafe
DatabaseError : If ` cql ` is considered unsafe
"""
"""
SUPPORTED_TYPES = [ " SELECT " ]
SUPPORTED_TYPES = [ " SELECT " ]
if type and type . upper ( ) not in SUPPORTED_TYPES :
if type and type . upper ( ) not in SUPPORTED_TYPES :
@ -246,29 +209,26 @@ class CassandraDatabase:
# The trimmed query, before modifications
# The trimmed query, before modifications
return cql_trimmed
return cql_trimmed
def _fetch_keyspaces ( self , keyspace _li st : Optional [ List [ str ] ] = None ) - > List [ str ] :
def _fetch_keyspaces ( self , keyspace s: Optional [ List [ str ] ] = None ) - > List [ str ] :
"""
"""
Fetches a list of keyspace names from the Cassandra database . The list can be
Fetches a list of keyspace names from the Cassandra database . The list can be
filtered by a provided list of keyspace names or by excluding predefined
filtered by a provided list of keyspace names or by excluding predefined
keyspaces .
keyspaces .
Parameters :
Args :
- keyspace_list ( Optional [ List [ str ] ] ) : A list of keyspace names to specifically
keyspaces : A list of keyspace names to specifically include .
include . If provided and not empty , the method returns only the keyspaces
If provided and not empty , the method returns only the keyspaces
present in this list . If not provided or empty , the method returns all
present in this list .
keyspaces except those specified in the _exclude_keyspaces attribute .
If not provided or empty , the method returns all keyspaces except those
specified in the _exclude_keyspaces attribute .
Returns :
Returns :
- List [ str ] : A list of keyspace names according to the filtering criteria .
A list of keyspace names according to the filtering criteria .
"""
"""
all_keyspaces = self . run (
all_keyspaces = self . fetch_all (
" SELECT keyspace_name FROM system_schema.keyspaces " , fetch = " all "
" SELECT keyspace_name FROM system_schema.keyspaces "
)
)
# Type check to ensure 'all_keyspaces' is a sequence of dictionaries
if not isinstance ( all_keyspaces , Sequence ) :
raise TypeError ( " Expected a sequence of dictionaries from ' run ' method. " )
# Filtering keyspaces based on 'keyspace_list' and '_exclude_keyspaces'
# Filtering keyspaces based on 'keyspace_list' and '_exclude_keyspaces'
filtered_keyspaces = [ ]
filtered_keyspaces = [ ]
for ks in all_keyspaces :
for ks in all_keyspaces :
@ -276,87 +236,105 @@ class CassandraDatabase:
continue # Skip if the row is not a dictionary.
continue # Skip if the row is not a dictionary.
keyspace_name = ks [ " keyspace_name " ]
keyspace_name = ks [ " keyspace_name " ]
if keyspace _li st and keyspace_name in keyspace _li st :
if keyspace s and keyspace_name in keyspace s:
filtered_keyspaces . append ( keyspace_name )
filtered_keyspaces . append ( keyspace_name )
elif not keyspace _li st and keyspace_name not in self . _exclude_keyspaces :
elif not keyspace s and keyspace_name not in self . _exclude_keyspaces :
filtered_keyspaces . append ( keyspace_name )
filtered_keyspaces . append ( keyspace_name )
return filtered_keyspaces
return filtered_keyspaces
def _fetch_schema_data ( self , keyspace_list : List [ str ] ) - > Tuple :
def _format_keyspace_query ( self , query : str , keyspaces : List [ str ] ) - > str :
"""
# Construct IN clause for CQL query
Fetches schema data , including tables , columns , and indexes , filtered by a
keyspace_in_clause = " , " . join ( [ f " ' { ks } ' " for ks in keyspaces ] )
list of keyspaces . This method constructs CQL queries to retrieve detailed
return f """ { query } WHERE keyspace_name IN ( { keyspace_in_clause } ) """
schema information from the specified keyspaces and executes them to gather
data about tables , columns , and indexes within those keyspaces .
Parameters :
def _fetch_tables_data ( self , keyspaces : List [ str ] ) - > list :
- keyspace_list ( List [ str ] ) : A list of keyspace names from which to fetch
""" Fetches tables schema data, filtered by a list of keyspaces.
schema data .
This method allows for efficiently fetching schema information for multiple
keyspaces in a single operation , enabling applications to programmatically
analyze or document the database schema .
Returns :
Args :
- Tuple [ List [ Dict [ str , Any ] ] , List [ Dict [ str , Any ] ] , List [ Dict [ str , Any ] ] ] : A
keyspaces : A list of keyspace names from which to fetch tables schema data .
tuple containing three lists :
- The first list contains dictionaries of table details ( keyspace name ,
table name , and comment ) .
- The second list contains dictionaries of column details ( keyspace name ,
table name , column name , type , kind , and position ) .
- The third list contains dictionaries of index details ( keyspace name ,
table name , index name , kind , and options ) .
This method allows for efficiently fetching schema information for multiple
Returns :
keyspaces in a single operation ,
Dictionaries of table details ( keyspace name , table name , and comment ) .
enabling applications to programmatically analyze or document the database
schema .
"""
"""
# Construct IN clause for CQL query
tables_query = self . _format_keyspace_query (
keyspace_in_clause = " , " . join ( [ f " ' { ks } ' " for ks in keyspace_list ] )
" SELECT keyspace_name, table_name, comment FROM system_schema.tables " ,
keyspaces ,
# Fetch filtered table details
)
tables_query = f """ SELECT keyspace_name, table_name, comment
return self . fetch_all ( tables_query )
FROM system_schema . tables
WHERE keyspace_name
IN ( { keyspace_in_clause } ) """
tables_data = self . run ( tables_query , fetch = " all " )
def _fetch_columns_data ( self , keyspaces : List [ str ] ) - > list :
""" Fetches columns schema data, filtered by a list of keyspaces.
This method allows for efficiently fetching schema information for multiple
keyspaces in a single operation , enabling applications to programmatically
analyze or document the database schema .
# Fetch filtered column details
Args :
columns_query = f """ SELECT keyspace_name, table_name, column_name, type,
keyspaces : A list of keyspace names from which to fetch tables schema data .
kind , clustering_order , position
FROM system_schema . columns
WHERE keyspace_name
IN ( { keyspace_in_clause } ) """
columns_data = self . run ( columns_query , fetch = " all " )
Returns :
Dictionaries of column details ( keyspace name , table name , column name ,
type , kind , and position ) .
"""
tables_query = self . _format_keyspace_query (
"""
SELECT keyspace_name , table_name , column_name , type , kind ,
clustering_order , position
FROM system_schema . columns
""" ,
keyspaces ,
)
return self . fetch_all ( tables_query )
# Fetch filtered index details
def _fetch_indexes_data ( self , keyspaces : List [ str ] ) - > list :
indexes_query = f """ SELECT keyspace_name, table_name, index_name,
""" Fetches indexes schema data, filtered by a list of keyspaces.
kind , options
This method allows for efficiently fetching schema information for multiple
FROM system_schema . indexes
keyspaces in a single operation , enabling applications to programmatically
WHERE keyspace_name
analyze or document the database schema .
IN ( { keyspace_in_clause } ) """
indexes_data = self . run ( indexes_query , fetch = " all " )
Args :
keyspaces : A list of keyspace names from which to fetch tables schema data .
return tables_data , columns_data , indexes_data
Returns :
Dictionaries of index details ( keyspace name , table name , index name , kind ,
and options ) .
"""
tables_query = self . _format_keyspace_query (
"""
SELECT keyspace_name , table_name , index_name ,
kind , options
FROM system_schema . indexes
""" ,
keyspaces ,
)
return self . fetch_all ( tables_query )
def _resolve_schema (
def _resolve_schema (
self , keyspace_list : Optional [ List [ str ] ] = None
self , keyspace s: Optional [ List [ str ] ] = None
) - > Dict [ str , List [ Table ] ] :
) - > Dict [ str , List [ Table ] ] :
"""
"""
Efficiently fetches and organizes Cassandra table schema information ,
Efficiently fetches and organizes Cassandra table schema information ,
such as comments , columns , and indexes , into a dictionary mapping keyspace
such as comments , columns , and indexes , into a dictionary mapping keyspace
names to lists of Table objects .
names to lists of Table objects .
Args :
keyspaces : An optional list of keyspace names from which to fetch tables
schema data .
Returns :
Returns :
A dictionary with keyspace names as keys and lists of Table objects as values ,
A dictionary with keyspace names as keys and lists of Table objects as
where each Table object is populated with schema details appropriate for its
values , where each Table object is populated with schema details
keyspace and table name .
appropriate for its keyspace and table name .
"""
"""
if not keyspace _li st :
if not keyspace s:
keyspace _li st = self . _fetch_keyspaces ( )
keyspace s = self . _fetch_keyspaces ( )
tables_data , columns_data , indexes_data = self . _fetch_schema_data ( keyspace_list )
tables_data = self . _fetch_tables_data ( keyspaces )
columns_data = self . _fetch_columns_data ( keyspaces )
indexes_data = self . _fetch_indexes_data ( keyspaces )
keyspace_dict : dict = { }
keyspace_dict : dict = { }
for table_data in tables_data :
for table_data in tables_data :
@ -415,11 +393,11 @@ class CassandraDatabase:
return keyspace_dict
return keyspace_dict
@staticmethod
def _resolve_session (
def _resolve_session (
self ,
session : Optional [ Session ] = None ,
session : Optional [ Session ] = None ,
cassio_init_kwargs : Optional [ Dict [ str , Any ] ] = None ,
cassio_init_kwargs : Optional [ Dict [ str , Any ] ] = None ,
) - > Session:
) - > Optional[ Session] :
"""
"""
Attempts to resolve and return a Session object for use in database operations .
Attempts to resolve and return a Session object for use in database operations .
@ -430,18 +408,17 @@ class CassandraDatabase:
3. A new ` cassio ` session derived from ` cassio_init_kwargs ` ,
3. A new ` cassio ` session derived from ` cassio_init_kwargs ` ,
4. ` None `
4. ` None `
Parameters :
Args :
- session ( Optional [ Session ] ) : An optional session to use directly .
session : An optional session to use directly .
- cassio_init_kwargs ( Optional [ Dict [ str , Any ] ] ) : An optional dictionary of
cassio_init_kwargs : An optional dictionary of keyword arguments to ` cassio ` .
keyword arguments to ` cassio ` .
Returns :
Returns :
- Session : The resolved session object if successful , or ` None ` if the session
The resolved session object if successful , or ` None ` if the session
cannot be resolved .
cannot be resolved .
Raises :
Raises :
- ValueError : If ` cassio_init_kwargs ` is provided but is not a dictionary of
ValueError : If ` cassio_init_kwargs ` is provided but is not a dictionary of
keyword arguments .
keyword arguments .
"""
"""
# Prefer given session
# Prefer given session
@ -535,20 +512,18 @@ class Table(BaseModel):
Generates a Markdown representation of the Cassandra table schema , allowing for
Generates a Markdown representation of the Cassandra table schema , allowing for
customizable header levels for the table name section .
customizable header levels for the table name section .
Parameters :
Args :
- include_keyspace ( bool ) : If True , includes the keyspace in the output .
include_keyspace : If True , includes the keyspace in the output .
Defaults to True .
Defaults to True .
- header_level ( Optional [ int ] ) : Specifies the markdown header level for the
header_level : Specifies the markdown header level for the table name .
table name .
If None , the table name is included without a header .
If None , the table name is included without a header . Defaults to None
Defaults to None ( no header level ) .
( no header level ) .
Returns :
Returns :
- str : A string in Markdown format detailing the table name
A string in Markdown format detailing the table name
( with optional header level ) ,
( with optional header level ) , keyspace ( optional ) , comment , columns ,
keyspace ( optional ) , comment , columns , partition keys , clustering keys
partition keys , clustering keys ( with optional clustering order ) ,
( with optional clustering order ) ,
and indexes .
and indexes .
"""
"""
output = " "
output = " "
if header_level is not None :
if header_level is not None :