2022-05-31 20:11:35 +00:00
local DataStorage = require ( " datastorage " )
local Device = require ( " device " )
local SQ3 = require ( " lua-ljsqlite3/init " )
local LuaData = require ( " luadata " )
2022-11-11 14:53:06 +00:00
local logger = require ( " logger " )
2022-05-31 20:11:35 +00:00
local db_location = DataStorage : getSettingsDir ( ) .. " /vocabulary_builder.sqlite3 "
2022-10-02 09:51:54 +00:00
local DB_SCHEMA_VERSION = 20221002
2022-05-31 20:11:35 +00:00
local VOCABULARY_DB_SCHEMA = [ [
-- To store looked up words
CREATE TABLE IF NOT EXISTS " vocabulary " (
" word " TEXT NOT NULL UNIQUE ,
2022-06-12 19:34:17 +00:00
" title_id " INTEGER ,
2022-05-31 20:11:35 +00:00
" create_time " INTEGER NOT NULL ,
" review_time " INTEGER ,
" due_time " INTEGER NOT NULL ,
" review_count " INTEGER NOT NULL DEFAULT 0 ,
2022-06-12 19:34:17 +00:00
" prev_context " TEXT ,
" next_context " TEXT ,
2022-10-02 09:51:54 +00:00
" streak_count " INTEGER NOT NULL DEFAULT 0 ,
2022-05-31 20:11:35 +00:00
PRIMARY KEY ( " word " )
) ;
2022-06-12 19:34:17 +00:00
CREATE TABLE IF NOT EXISTS " title " (
" id " INTEGER NOT NULL UNIQUE ,
" name " TEXT UNIQUE ,
2022-07-31 07:02:09 +00:00
" filter " INTEGER NOT NULL DEFAULT 1 ,
PRIMARY KEY ( " id " )
2022-06-12 19:34:17 +00:00
) ;
2022-05-31 20:11:35 +00:00
CREATE INDEX IF NOT EXISTS due_time_index ON vocabulary ( due_time ) ;
2022-06-12 19:34:17 +00:00
CREATE INDEX IF NOT EXISTS title_name_index ON title ( name ) ;
2022-05-31 20:11:35 +00:00
] ]
2022-11-11 14:53:06 +00:00
local VocabularyBuilder = {
path = db_location
}
2022-05-31 20:11:35 +00:00
function VocabularyBuilder : init ( )
VocabularyBuilder : createDB ( )
end
2022-10-12 21:45:29 +00:00
function VocabularyBuilder : selectCount ( vocab_widget )
local db_conn = SQ3.open ( db_location )
2022-10-25 10:27:07 +00:00
local where_clause = vocab_widget : check_reverse ( ) and " WHERE due_time <= " .. vocab_widget.reload_time or " "
2022-10-12 21:45:29 +00:00
local sql = " SELECT count(0) FROM vocabulary INNER JOIN title ON filter=true AND title_id=id " .. where_clause .. " ; "
local count = tonumber ( db_conn : rowexec ( sql ) )
db_conn : close ( )
return count
2022-05-31 20:11:35 +00:00
end
function VocabularyBuilder : createDB ( )
local db_conn = SQ3.open ( db_location )
-- Make it WAL, if possible
if Device : canUseWAL ( ) then
db_conn : exec ( " PRAGMA journal_mode=WAL; " )
else
db_conn : exec ( " PRAGMA journal_mode=TRUNCATE; " )
end
-- Create db
db_conn : exec ( VOCABULARY_DB_SCHEMA )
-- Check version
local db_version = tonumber ( db_conn : rowexec ( " PRAGMA user_version; " ) )
2022-07-31 07:02:09 +00:00
if db_version == 0 then
self : insertLookupData ( db_conn )
-- Update version
db_conn : exec ( string.format ( " PRAGMA user_version=%d; " , DB_SCHEMA_VERSION ) )
elseif db_version < DB_SCHEMA_VERSION then
2022-11-11 23:00:38 +00:00
local ok , re
local log = function ( msg )
logger.warn ( " [vocab builder db migration] " , msg )
end
2022-07-31 07:02:09 +00:00
if db_version < 20220608 then
2022-11-11 23:00:38 +00:00
ok , re = pcall ( db_conn.exec , db_conn , " ALTER TABLE vocabulary ADD prev_context TEXT; " )
if not ok then log ( re ) end
ok , re = pcall ( db_conn.exec , db_conn , " ALTER TABLE vocabulary ADD next_context TEXT; " )
if not ok then log ( re ) end
ok , re = pcall ( db_conn.exec , db_conn , " ALTER TABLE vocabulary ADD title_id INTEGER; " )
if not ok then log ( re ) end
ok , re = pcall ( db_conn.exec , db_conn , " INSERT OR IGNORE INTO title (name) SELECT DISTINCT book_title FROM vocabulary; " )
if not ok then log ( re ) end
ok , re = pcall ( db_conn.exec , db_conn , " UPDATE vocabulary SET title_id = (SELECT id FROM title WHERE name = book_title); " )
if not ok then log ( re ) end
ok , re = pcall ( db_conn.exec , db_conn , " ALTER TABLE vocabulary DROP book_title; " )
if not ok then log ( re ) end
2022-05-31 20:11:35 +00:00
end
2022-07-31 07:02:09 +00:00
if db_version < 20220730 then
2022-11-11 23:00:38 +00:00
ok , re = pcall ( db_conn.exec , db_conn , " ALTER TABLE title ADD filter INTEGER NOT NULL DEFAULT 1; " )
if not ok then log ( re ) end
2022-10-02 09:51:54 +00:00
end
if db_version < 20221002 then
2022-11-11 23:00:38 +00:00
ok , re = pcall ( db_conn.exec , db_conn , [ [
ALTER TABLE vocabulary ADD streak_count INTEGER NULL DEFAULT 0 ;
UPDATE vocabulary SET streak_count = review_count ; ] ] )
if not ok then log ( re ) end
2022-07-31 07:02:09 +00:00
end
2022-06-12 19:34:17 +00:00
db_conn : exec ( " CREATE INDEX IF NOT EXISTS title_id_index ON vocabulary(title_id); " )
2022-05-31 20:11:35 +00:00
-- Update version
db_conn : exec ( string.format ( " PRAGMA user_version=%d; " , DB_SCHEMA_VERSION ) )
end
db_conn : close ( )
end
function VocabularyBuilder : insertLookupData ( db_conn )
local file_path = DataStorage : getSettingsDir ( ) .. " /lookup_history.lua "
Clarify our OOP semantics across the codebase (#9586)
Basically:
* Use `extend` for class definitions
* Use `new` for object instantiations
That includes some minor code cleanups along the way:
* Updated `Widget`'s docs to make the semantics clearer.
* Removed `should_restrict_JIT` (it's been dead code since https://github.com/koreader/android-luajit-launcher/pull/283)
* Minor refactoring of LuaSettings/LuaData/LuaDefaults/DocSettings to behave (mostly, they are instantiated via `open` instead of `new`) like everything else and handle inheritance properly (i.e., DocSettings is now a proper LuaSettings subclass).
* Default to `WidgetContainer` instead of `InputContainer` for stuff that doesn't actually setup key/gesture events.
* Ditto for explicit `*Listener` only classes, make sure they're based on `EventListener` instead of something uselessly fancier.
* Unless absolutely necessary, do not store references in class objects, ever; only values. Instead, always store references in instances, to avoid both sneaky inheritance issues, and sneaky GC pinning of stale references.
* ReaderUI: Fix one such issue with its `active_widgets` array, with critical implications, as it essentially pinned *all* of ReaderUI's modules, including their reference to the `Document` instance (i.e., that was a big-ass leak).
* Terminal: Make sure the shell is killed on plugin teardown.
* InputText: Fix Home/End/Del physical keys to behave sensibly.
* InputContainer/WidgetContainer: If necessary, compute self.dimen at paintTo time (previously, only InputContainers did, which might have had something to do with random widgets unconcerned about input using it as a baseclass instead of WidgetContainer...).
* OverlapGroup: Compute self.dimen at *init* time, because for some reason it needs to do that, but do it directly in OverlapGroup instead of going through a weird WidgetContainer method that it was the sole user of.
* ReaderCropping: Under no circumstances should a Document instance member (here, self.bbox) risk being `nil`ed!
* Kobo: Minor code cleanups.
2022-10-06 00:14:48 +00:00
local lookup_history = LuaData : open ( file_path , " LookupHistory " )
2022-05-31 20:11:35 +00:00
if lookup_history : has ( " lookup_history " ) then
local lookup_history_table = lookup_history : readSetting ( " lookup_history " )
2022-06-12 19:34:17 +00:00
local book_titles = { }
local stmt = db_conn : prepare ( " INSERT INTO title (name) values (?); " )
for i = # lookup_history_table , 1 , - 1 do
local book_title = lookup_history_table [ i ] . book_title or " "
if not book_titles [ book_title ] then
stmt : bind ( book_title )
stmt : step ( )
stmt : clearbind ( ) : reset ( )
book_titles [ book_title ] = true
end
end
2022-05-31 20:11:35 +00:00
2022-06-12 19:34:17 +00:00
local words = { }
local insert_sql = [ [ INSERT OR REPLACE INTO vocabulary
2022-07-31 07:02:09 +00:00
( word , title_id , create_time , due_time , review_time ) values
( ? , ( SELECT id FROM title WHERE name = ? ) , ? , ? , ? ) ; ] ]
2022-06-12 19:34:17 +00:00
stmt = db_conn : prepare ( insert_sql )
2022-05-31 20:11:35 +00:00
for i = # lookup_history_table , 1 , - 1 do
local value = lookup_history_table [ i ]
if not words [ value.word ] then
2022-07-31 07:02:09 +00:00
stmt : bind ( value.word , value.book_title or " " , value.time , value.time + 5 * 60 , value.time )
2022-05-31 20:11:35 +00:00
stmt : step ( )
stmt : clearbind ( ) : reset ( )
words [ value.word ] = true
end
end
end
end
2022-10-12 21:45:29 +00:00
function VocabularyBuilder : _select_items ( items , start_idx , reload_time )
2022-05-31 20:11:35 +00:00
local conn = SQ3.open ( db_location )
2022-10-12 21:45:29 +00:00
local sql
if not reload_time then
sql = string.format ( " SELECT * FROM vocabulary INNER JOIN title ON title_id = title.id AND filter = true ORDER BY due_time limit %d OFFSET %d; " , 32 , start_idx - 1 )
else
sql = string.format ( [ [ SELECT * FROM vocabulary INNER JOIN title
ON title_id = title.id AND filter = true
2022-10-25 10:27:07 +00:00
WHERE due_time <= ] ] .. reload_time ..
2022-10-12 21:45:29 +00:00
" ORDER BY due_time desc limit %d OFFSET %d; " , 32 , start_idx - 1 )
end
2022-05-31 20:11:35 +00:00
local results = conn : exec ( sql )
conn : close ( )
if not results then return end
local current_time = os.time ( )
for i = 1 , # results.word do
local item = items [ start_idx + i - 1 ]
2022-06-03 07:52:39 +00:00
if item and not item.word then
2022-05-31 20:11:35 +00:00
item.word = results.word [ i ]
2022-07-31 07:02:09 +00:00
item.review_count = math.max ( 0 , tonumber ( results.review_count [ i ] ) )
2022-10-02 09:51:54 +00:00
item.streak_count = math.max ( 0 , tonumber ( results.streak_count [ i ] ) )
2022-06-12 19:34:17 +00:00
item.book_title = results.name [ i ] or " "
2022-05-31 20:11:35 +00:00
item.create_time = tonumber ( results.create_time [ i ] )
item.review_time = nil --use this field to flag change
item.due_time = tonumber ( results.due_time [ i ] )
item.is_dim = tonumber ( results.due_time [ i ] ) > current_time
2022-06-12 19:34:17 +00:00
item.prev_context = results.prev_context [ i ]
item.next_context = results.next_context [ i ]
2022-05-31 20:11:35 +00:00
item.got_it_callback = function ( item_input )
VocabularyBuilder : gotOrForgot ( item_input , true )
end
item.forgot_callback = function ( item_input )
VocabularyBuilder : gotOrForgot ( item_input , false )
end
item.remove_callback = function ( item_input )
VocabularyBuilder : remove ( item_input )
end
end
end
end
2022-10-12 21:45:29 +00:00
function VocabularyBuilder : select_items ( vocab_widget , start_idx , end_idx )
local items = vocab_widget.item_table
2022-05-31 20:11:35 +00:00
local start_cursor
if # items == 0 then
start_cursor = 0
else
for i = start_idx + 1 , end_idx do
if not items [ i ] . word then
start_cursor = i
break
end
end
end
if not start_cursor then return end
2022-10-12 21:45:29 +00:00
self : _select_items ( items , start_cursor , vocab_widget : check_reverse ( ) and vocab_widget.reload_time )
2022-05-31 20:11:35 +00:00
end
function VocabularyBuilder : gotOrForgot ( item , isGot )
local current_time = os.time ( )
local due_time
2022-10-02 09:51:54 +00:00
local target_review_count = math.max ( item.review_count + ( isGot and 1 or - 1 ) , 0 )
local target_count = isGot and item.streak_count + 1 or 0
if target_count == 0 then
2022-05-31 20:11:35 +00:00
due_time = current_time + 5 * 60
elseif target_count == 1 then
due_time = current_time + 30 * 60
elseif target_count == 2 then
due_time = current_time + 12 * 3600
elseif target_count == 3 then
due_time = current_time + 24 * 3600
elseif target_count == 4 then
due_time = current_time + 48 * 3600
elseif target_count == 5 then
due_time = current_time + 96 * 3600
elseif target_count == 6 then
due_time = current_time + 24 * 7 * 3600
elseif target_count == 7 then
due_time = current_time + 24 * 15 * 3600
else
2022-11-12 20:20:54 +00:00
due_time = current_time + 24 * 3600 * 30 * 2 ^ ( math.min ( target_count - 8 , 6 ) )
2022-05-31 20:11:35 +00:00
end
2022-10-02 09:51:54 +00:00
item.last_streak_count = item.streak_count
2022-10-02 00:03:23 +00:00
item.last_review_count = item.review_count
item.last_review_time = item.review_time
item.last_due_time = item.due_time
2022-10-02 09:51:54 +00:00
item.streak_count = target_count
item.review_count = target_review_count
2022-05-31 20:11:35 +00:00
item.review_time = current_time
item.due_time = due_time
end
function VocabularyBuilder : batchUpdateItems ( items )
local sql = [ [ UPDATE vocabulary
SET review_count = ? ,
2022-10-02 09:51:54 +00:00
streak_count = ? ,
2022-05-31 20:11:35 +00:00
review_time = ? ,
due_time = ?
WHERE word = ? ; ] ]
local conn = SQ3.open ( db_location )
local stmt = conn : prepare ( sql )
for _ , item in ipairs ( items ) do
if item.review_time then
2022-10-02 09:51:54 +00:00
stmt : bind ( item.review_count , item.streak_count , item.review_time , item.due_time , item.word )
2022-05-31 20:11:35 +00:00
stmt : step ( )
stmt : clearbind ( ) : reset ( )
2022-11-11 14:53:06 +00:00
item.review_time = nil
2022-05-31 20:11:35 +00:00
end
end
2022-06-12 19:34:17 +00:00
conn : exec ( " DELETE FROM title WHERE NOT EXISTS( SELECT title_id FROM vocabulary WHERE id = title_id ); " )
2022-05-31 20:11:35 +00:00
conn : close ( )
end
function VocabularyBuilder : insertOrUpdate ( entry )
local conn = SQ3.open ( db_location )
2022-06-12 19:34:17 +00:00
local stmt = conn : prepare ( " INSERT OR IGNORE INTO title (name) VALUES (?); " )
stmt : bind ( entry.book_title )
stmt : step ( )
stmt : clearbind ( ) : reset ( )
2022-07-31 07:02:09 +00:00
stmt = conn : prepare ( [ [ INSERT INTO vocabulary ( word , title_id , create_time , due_time , review_time , prev_context , next_context )
VALUES ( ? , ( SELECT id FROM title WHERE name = ? ) , ? , ? , ? , ? , ? )
2022-06-12 19:34:17 +00:00
ON CONFLICT ( word ) DO UPDATE SET title_id = excluded.title_id ,
2022-05-31 20:11:35 +00:00
create_time = excluded.create_time ,
review_count = MAX ( review_count - 1 , 0 ) ,
2022-10-02 09:51:54 +00:00
streak_count = 0 ,
2022-06-12 19:34:17 +00:00
due_time = ? ,
prev_context = ifnull ( excluded.prev_context , prev_context ) ,
next_context = ifnull ( excluded.next_context , next_context ) ; ] ] ) ;
2022-07-31 07:02:09 +00:00
stmt : bind ( entry.word , entry.book_title , entry.time , entry.time + 300 , entry.time ,
2022-06-12 19:34:17 +00:00
entry.prev_context , entry.next_context , entry.time + 300 )
2022-06-04 12:54:58 +00:00
stmt : step ( )
stmt : clearbind ( ) : reset ( )
2022-05-31 20:11:35 +00:00
conn : close ( )
end
2022-07-31 07:02:09 +00:00
function VocabularyBuilder : toggleBookFilter ( ids )
local id_string = " "
for key , _ in pairs ( ids ) do
id_string = id_string .. ( id_string == " " and " " or " , " ) .. key
end
local conn = SQ3.open ( db_location )
conn : exec ( " UPDATE title SET filter = (filter | 1) - (filter & 1) WHERE id in ( " .. id_string .. " ); " )
conn : close ( )
end
2022-11-12 20:22:58 +00:00
function VocabularyBuilder : updateBookIdOfWord ( word , id )
if not word or type ( id ) ~= " number " then return end
local conn = SQ3.open ( db_location )
local stmt = conn : prepare ( " UPDATE vocabulary SET title_id = ? WHERE word = ?; " )
stmt : bind ( id , word )
stmt : step ( )
stmt : clearbind ( ) : reset ( )
conn : close ( )
end
function VocabularyBuilder : insertNewBook ( title )
local conn = SQ3.open ( db_location )
local stmt = conn : prepare ( " INSERT INTO title (name) VALUES (?); " )
stmt : bind ( title ) : step ( )
stmt : clearbind ( ) : reset ( )
stmt = conn : prepare ( " SELECT id FROM title WHERE name = ? " )
local result = stmt : bind ( title ) : step ( )
stmt : clearbind ( ) : reset ( )
conn : close ( )
return tonumber ( result [ 1 ] )
end
function VocabularyBuilder : changeBookTitle ( old_title , title )
local conn = SQ3.open ( db_location )
local stmt = conn : prepare ( " UPDATE title SET name = ? WHERE name = ?; " )
stmt : bind ( title , old_title ) : step ( )
stmt : clearbind ( ) : reset ( )
conn : close ( )
end
2022-07-31 07:02:09 +00:00
function VocabularyBuilder : selectBooks ( )
local conn = SQ3.open ( db_location )
local sql = string.format ( " SELECT * FROM title " )
local results = conn : exec ( sql )
conn : close ( )
local items = { }
if not results then return items end
for i = 1 , # results.id do
table.insert ( items , {
id = tonumber ( results.id [ i ] ) ,
name = results.name [ i ] ,
filter = tonumber ( results.filter [ i ] ) ~= 0
} )
end
return items
end
function VocabularyBuilder : hasFilteredBook ( )
local conn = SQ3.open ( db_location )
local has_filter = tonumber ( conn : rowexec ( " SELECT count(0) FROM title WHERE filter = false limit 1; " ) )
conn : close ( )
return has_filter ~= 0
end
2022-05-31 20:11:35 +00:00
function VocabularyBuilder : remove ( item )
local conn = SQ3.open ( db_location )
2022-06-04 12:54:58 +00:00
local stmt = conn : prepare ( " DELETE FROM vocabulary WHERE word = ? ; " )
stmt : bind ( item.word )
stmt : step ( )
stmt : clearbind ( ) : reset ( )
2022-05-31 20:11:35 +00:00
conn : close ( )
end
function VocabularyBuilder : resetProgress ( )
local conn = SQ3.open ( db_location )
local due_time = os.time ( )
2022-10-02 09:51:54 +00:00
conn : exec ( string.format ( " UPDATE vocabulary SET review_count = 0, streak_count = 0, due_time = %d; " , due_time ) )
2022-05-31 20:11:35 +00:00
conn : close ( )
end
function VocabularyBuilder : purge ( )
local conn = SQ3.open ( db_location )
2022-06-12 19:34:17 +00:00
conn : exec ( " DELETE FROM vocabulary; DELETE FROM title; " )
2022-05-31 20:11:35 +00:00
conn : close ( )
end
2022-11-11 14:53:06 +00:00
-- Synchronization
function VocabularyBuilder . onSync ( local_path , cached_path , income_path )
-- we try to open income db
local conn_income = SQ3.open ( income_path )
local ok1 , v1 = pcall ( conn_income.rowexec , conn_income , " PRAGMA schema_version " )
if not ok1 or tonumber ( v1 ) == 0 then
-- no income db or wrong db, first time sync
logger.dbg ( " vocabbuilder open income DB failed " , v1 )
return true
end
local sql = " attach ' " .. income_path : gsub ( " ' " , " '' " ) .. " ' as income_db; "
-- then we try to open cached db
local conn_cached = SQ3.open ( cached_path )
local ok2 , v2 = pcall ( conn_cached.rowexec , conn_cached , " PRAGMA schema_version " )
local attached_cache
if not ok2 or tonumber ( v2 ) == 0 then
-- no cached or error, no item to delete
logger.dbg ( " vocabbuilder open cached DB failed " , v2 )
else
attached_cache = true
sql = sql .. " attach ' " .. cached_path : gsub ( " ' " , " '' " ) .. [ [ ' as cached_db;
-- first we delete from income_db words that exist in cached_db but not in local_db,
-- namely the ones that were deleted since last sync
DELETE FROM income_db.vocabulary WHERE word IN (
SELECT word FROM cached_db.vocabulary WHERE word NOT IN (
SELECT word FROM vocabulary
)
) ;
-- We need to delete words that were delete in income_db since last sync
DELETE FROM vocabulary WHERE word IN (
SELECT word FROM cached_db.vocabulary WHERE word NOT IN (
SELECT word FROM income_db.vocabulary
)
) ;
] ]
end
conn_cached : close ( )
conn_income : close ( )
local conn = SQ3.open ( local_path )
local ok3 , v3 = pcall ( conn.exec , conn , " PRAGMA schema_version " )
if not ok3 or tonumber ( v3 ) == 0 then
-- no local db, this is an error
logger.err ( " vocabbuilder open local DB " , v3 )
return false
end
sql = sql .. [ [
-- We merge the local db with income db to form the synced db.
-- First we do the books
INSERT OR IGNORE INTO title ( name ) SELECT name FROM income_db.title ;
-- Then update income db's book title id references
UPDATE income_db.vocabulary SET title_id = ifnull (
( SELECT mid FROM (
SELECT m.id as mid , title_id as i_tid FROM title as m -- main db
INNER JOIN income_db.title as i -- income db
ON m.name = i.name
LEFT JOIN income_db.vocabulary
on title_id = i.id
) WHERE income_db.vocabulary . title_id = i_tid
) , title_id ) ;
-- Then we merge the income_db's contents into the local db
INSERT INTO vocabulary
( word , create_time , review_time , due_time , review_count , prev_context , next_context , title_id , streak_count )
SELECT word , create_time , review_time , due_time , review_count , prev_context , next_context , title_id , streak_count
FROM income_db.vocabulary WHERE true
ON CONFLICT ( word ) DO UPDATE SET
due_time = MAX ( due_time , excluded.due_time ) ,
review_count = CASE
WHEN create_time = excluded.create_time THEN MAX ( review_count , excluded.review_count )
ELSE review_count + excluded.review_count
END ,
prev_context = ifnull ( excluded.prev_context , prev_context ) ,
next_context = ifnull ( excluded.next_context , next_context ) ,
streak_count = CASE
WHEN review_time > excluded.review_time THEN streak_count
ELSE excluded.streak_count
END ,
review_time = MAX ( review_time , excluded.review_time ) ,
create_time = excluded.create_time , -- we always use the remote value to eliminate duplicate review_count sum
title_id = excluded.title_id -- use remote in case re-assignable book id be supported
] ]
conn : exec ( sql )
pcall ( conn.exec , conn , " COMMIT; " )
conn : exec ( " DETACH income_db; " .. ( attached_cache and " DETACH cached_db; " or " " ) )
conn : exec ( " PRAGMA temp_store = 2; " ) -- use memory for temp files
local ok , errmsg = pcall ( conn.exec , conn , " VACUUM; " ) -- we upload a compact file
if not ok then
logger.warn ( " Failed compacting vocab database: " , errmsg )
end
conn : close ( )
return true
end
2022-05-31 20:11:35 +00:00
VocabularyBuilder : init ( )
return VocabularyBuilder