[automaton] kill memory leak via arena

pull/2224/head
nick black 3 years ago committed by nick black
parent 9fc4418912
commit 57e5981b9d

@ -19,8 +19,8 @@
typedef struct esctrie {
// if non-NULL, this is the next level of radix-128 trie. it is NULL on
// accepting nodes, since no valid control sequence is a prefix of another
// valid control sequence.
struct esctrie** trie;
// valid control sequence. links are 1-biased (0 is NULL).
unsigned* trie;
enum {
NODE_SPECIAL, // an accepting node, or pure transit (if ni.id == 0)
NODE_NUMERIC, // accumulates a number
@ -29,73 +29,67 @@ typedef struct esctrie {
} ntype;
ncinput ni; // composed key terminating here
triefunc fxn; // function to call on match
struct esctrie* kleene; // kleene match
unsigned kleene; // idx of kleene match
} esctrie;
uint32_t esctrie_id(const esctrie* e){
return e->ni.id;
// get node corresponding to 1-biased index
static inline esctrie*
esctrie_from_idx(const automaton* a, unsigned idx){
if(idx == 0){
return NULL;
}
return a->nodepool + (idx - 1);
}
esctrie** esctrie_trie(esctrie* e){
return e->trie;
// return 1-biased index of node in pool
static inline unsigned
esctrie_idx(const automaton* a, const esctrie* e){
return e - a->nodepool + 1;
}
static inline esctrie*
create_esctrie_node(int special){
esctrie* e = malloc(sizeof(*e));
if(e){
memset(e, 0, sizeof(*e));
e->ntype = NODE_SPECIAL;
if((e->ni.id = special) == 0){
const size_t tsize = sizeof(*e->trie) * 0x80;
if( (e->trie = malloc(tsize)) ){
memset(e->trie, 0, tsize);
return e;
}
free(e);
return NULL;
}
return e;
}
return e;
uint32_t esctrie_id(const esctrie* e){
return e->ni.id;
}
static void
free_trienode(esctrie** eptr){
esctrie* e;
if( (e = *eptr) ){
if(e->trie){
int z;
for(z = 0 ; z < 0x80 ; ++z){
// don't recurse down a link to ourselves
if(e->trie[z] && e->trie[z] != e){
free_trienode(&e->trie[z]);
}
// if it's a numeric path, only recurse once
if(z == '0'){
if(e->trie['1'] == e->trie[z]){
z = '9';
}
}
// if it's an all-strings path, only recurse once
if(z == ' '){
if(e->trie['!'] == e->trie[z]){
z = 0x80;
}
}
}
free(e->trie);
// returns the idx of the new node, or 0 on failure (idx is 1-biased)
static inline unsigned
create_esctrie_node(automaton* a, int special){
if(a->poolused == a->poolsize){
unsigned newsize = a->poolsize ? a->poolsize * 2 : 2048;
esctrie* tmp = realloc(a->nodepool, sizeof(*a->nodepool) * newsize);
if(tmp == NULL){
return 0;
}
a->nodepool = tmp;
a->poolsize = newsize;
}
esctrie* e = &a->nodepool[a->poolused++];
memset(e, 0, sizeof(*e));
e->ntype = NODE_SPECIAL;
if((e->ni.id = special) == 0){
const size_t tsize = sizeof(*e->trie) * 0x80;
if((e->trie = malloc(tsize)) == NULL){
--a->poolused;
return 0;
}
free(e);
memset(e->trie, 0, tsize);
}
return esctrie_idx(a, e);
}
void input_free_esctrie(automaton* a){
free_trienode(&a->escapes);
a->escapes = 0;
a->poolsize = 0;
for(unsigned i = 0 ; i < a->poolused ; ++i){
free(a->nodepool[i].trie);
}
free(a->nodepool);
a->poolused = 0;
a->nodepool = NULL;
}
static int
esctrie_make_numeric(esctrie* e){
esctrie_make_numeric(automaton* a, esctrie* e){
if(e->ntype == NODE_NUMERIC){
return 0;
}
@ -111,22 +105,22 @@ esctrie_make_numeric(esctrie* e){
}
e->ntype = NODE_NUMERIC;
for(int i = '0' ; i < '9' ; ++i){
e->trie[i] = e;
e->trie[i] = esctrie_idx(a, e);
}
return 0;
}
static int
esctrie_make_kleene(esctrie* e, unsigned follow, esctrie* term){
esctrie_make_kleene(automaton* a, esctrie* e, unsigned follow, esctrie* term){
if(e->ntype != NODE_SPECIAL){
logerror("can't make node type %d string\n", e->ntype);
return -1;
}
for(unsigned i = 0 ; i < 0x80 ; ++i){
if(i == follow){
e->trie[i] = term;
}else if(e->trie[i] == NULL){
e->trie[i] = e;
e->trie[i] = esctrie_idx(a, term);
}else if(e->trie[i] == 0){
e->trie[i] = esctrie_idx(a, e);
}
}
return 0;
@ -148,7 +142,7 @@ esctrie_make_function(esctrie* e, triefunc fxn){
}
static int
esctrie_make_string(esctrie* e, triefunc fxn){
esctrie_make_string(automaton* a, esctrie* e, triefunc fxn){
if(e->ntype == NODE_STRING){
return 0;
}
@ -165,7 +159,7 @@ esctrie_make_string(esctrie* e, triefunc fxn){
return -1;
}
}
esctrie* newe = create_esctrie_node(0);
esctrie* newe = esctrie_from_idx(a, create_esctrie_node(a, 0));
if(newe == NULL){
return -1;
}
@ -173,7 +167,7 @@ esctrie_make_string(esctrie* e, triefunc fxn){
if(!isprint(i)){
continue;
}
e->trie[i] = newe;
e->trie[i] = esctrie_idx(a, newe);
}
e = newe;
e->ntype = NODE_STRING;
@ -181,16 +175,16 @@ esctrie_make_string(esctrie* e, triefunc fxn){
if(!isprint(i)){
continue;
}
e->trie[i] = newe;
e->trie[i] = esctrie_idx(a, newe);
}
if((e->trie[0x1b] = create_esctrie_node(0)) == NULL){
if((e->trie[0x1b] = create_esctrie_node(a, 0)) == 0){
return -1;
}
e = e->trie[0x1b];
if((e->trie['\\'] = create_esctrie_node(NCKEY_INVALID)) == NULL){
e = esctrie_from_idx(a, e->trie[0x1b]);
if((e->trie['\\'] = create_esctrie_node(a, NCKEY_INVALID)) == 0){
return -1;
}
e = e->trie['\\'];
e = esctrie_from_idx(a, e->trie['\\']);
e->ni.id = 0;
e->ntype = NODE_SPECIAL;
if(esctrie_make_function(e, fxn)){
@ -201,60 +195,58 @@ esctrie_make_string(esctrie* e, triefunc fxn){
}
static esctrie*
link_kleene(esctrie* e, unsigned follow){
link_kleene(automaton* a, esctrie* e, unsigned follow){
if(e->kleene){
return e->kleene;
return a->nodepool + e->kleene;
}
esctrie* term = create_esctrie_node(0);
esctrie* term = esctrie_from_idx(a, create_esctrie_node(a, 0));
if(term == NULL){
return NULL;
}
esctrie* targ = NULL;
if( (targ = create_esctrie_node(0)) ){
if(esctrie_make_kleene(targ, follow, term)){
free_trienode(&targ);
free_trienode(&term);
return NULL;
}
if((targ = esctrie_from_idx(a, create_esctrie_node(a, 0))) == NULL){
return NULL;
}
if(esctrie_make_kleene(a, targ, follow, term)){
return NULL;
}
// fill in all NULL numeric links with the new target
for(unsigned int i = 0 ; i < 0x80 ; ++i){
if(i == follow){
if(e->trie[i]){
logerror("drain terminator already registered\n");
free_trienode(&targ);
free_trienode(&term);
return NULL;
}
e->trie[follow] = term;
}else if(e->trie[i] == NULL){
e->trie[i] = targ;
e->trie[follow] = esctrie_idx(a, term);
}else if(e->trie[i] == 0){
e->trie[i] = esctrie_idx(a, targ);
// FIXME travel to the ends and link targ there
}
}
targ->kleene = targ;
return e->trie[follow];
targ->kleene = esctrie_idx(a, targ);
return esctrie_from_idx(a, e->trie[follow]);
}
static void
fill_in_numerics(esctrie* e, esctrie* targ, unsigned follow, esctrie* efollow){
fill_in_numerics(automaton* a, esctrie* e, esctrie* targ, unsigned follow, esctrie* efollow){
// fill in all NULL numeric links with the new target
for(int i = '0' ; i <= '9' ; ++i){
if(e->trie[i] == NULL){
e->trie[i] = targ;
}else if(e->trie[i] != e){
fill_in_numerics(e->trie[i], targ, follow, efollow);
if(e->trie[i] == 0){
e->trie[i] = esctrie_idx(a, targ);
}else if(e->trie[i] != esctrie_idx(a, e)){
fill_in_numerics(a, esctrie_from_idx(a, e->trie[i]), targ, follow, efollow);
}
}
e->trie[follow] = efollow;
e->trie[follow] = esctrie_idx(a, efollow);
}
// accept any digit and transition to a numeric node.
static esctrie*
link_numeric(esctrie* e, unsigned follow){
link_numeric(automaton* a, esctrie* e, unsigned follow){
esctrie* targ = NULL;
// find a linked NODE_NUMERIC, if one exists. we'll want to reuse it.
for(int i = '0' ; i <= '9' ; ++i){
targ = e->trie[i];
targ = esctrie_from_idx(a, e->trie[i]);
if(targ && targ->ntype == NODE_NUMERIC){
break;
}
@ -262,37 +254,37 @@ link_numeric(esctrie* e, unsigned follow){
}
// we either have a numeric target, or will make one now
if(targ == NULL){
if( (targ = create_esctrie_node(0)) ){
if(esctrie_make_numeric(targ)){
free_trienode(&targ);
return NULL;
}
if((targ = esctrie_from_idx(a, create_esctrie_node(a, 0))) == 0){
return NULL;
}
if(esctrie_make_numeric(a, targ)){
return NULL;
}
}
// targ is the numeric node we're either creating or coopting
esctrie* efollow = targ->trie[follow];
esctrie* efollow = esctrie_from_idx(a, targ->trie[follow]);
if(efollow == NULL){
if((efollow = create_esctrie_node(0)) == NULL){
if((efollow = esctrie_from_idx(a, create_esctrie_node(a, 0))) == NULL){
return NULL;
}
}
for(int i = '0' ; i <= '9' ; ++i){
if(e->trie[i] == NULL){
e->trie[i] = targ;
if(e->trie[i] == 0){
e->trie[i] = esctrie_idx(a, targ);
}
fill_in_numerics(e->trie[i], targ, follow, efollow);
fill_in_numerics(a, esctrie_from_idx(a, e->trie[i]), targ, follow, efollow);
}
return efollow;
}
// add a cflow path to the automaton
int inputctx_add_cflow(automaton* a, const char* csi, triefunc fxn){
if(a->escapes == NULL){
if((a->escapes = create_esctrie_node(0)) == NULL){
if(a->escapes == 0){
if((a->escapes = create_esctrie_node(a, 0)) == 0){
return -1;
}
}
esctrie* eptr = a->escapes;
esctrie* eptr = esctrie_from_idx(a, a->escapes);
bool inescape = false;
unsigned char c;
while( (c = *csi++) ){
@ -310,12 +302,12 @@ int inputctx_add_cflow(automaton* a, const char* csi, triefunc fxn){
return -1;
}
c = *csi++;
eptr = link_numeric(eptr, c);
eptr = link_numeric(a, eptr, c);
if(eptr == NULL){
return -1;
}
}else if(c == 'S'){
if(esctrie_make_string(eptr, fxn)){
if(esctrie_make_string(a, eptr, fxn)){
return -1;
}
return 0;
@ -326,7 +318,7 @@ int inputctx_add_cflow(automaton* a, const char* csi, triefunc fxn){
return -1;
}
c = *csi++;
eptr = link_kleene(eptr, c);
eptr = link_kleene(a, eptr, c);
if(eptr == NULL){
return -1;
}
@ -336,27 +328,27 @@ int inputctx_add_cflow(automaton* a, const char* csi, triefunc fxn){
}
inescape = false;
}else{
if(eptr->trie[c] == NULL){
if((eptr->trie[c] = create_esctrie_node(0)) == NULL){
if(eptr->trie[c] == 0){
if((eptr->trie[c] = create_esctrie_node(a, 0)) == 0){
return -1;
}
}else if(eptr->trie[c] == eptr->kleene){
if((eptr->trie[c] = create_esctrie_node(0)) == NULL){
if((eptr->trie[c] = create_esctrie_node(a, 0)) == 0){
return -1;
}
}else if(eptr->trie[c]->ntype == NODE_NUMERIC){
}else if(esctrie_from_idx(a, eptr->trie[c])->ntype == NODE_NUMERIC){
// punch a hole through the numeric loop. create a new one, and fill
// it in with the existing target.
struct esctrie* newe;
if((newe = create_esctrie_node(0)) == NULL){
if((newe = esctrie_from_idx(a, create_esctrie_node(a, 0))) == 0){
return -1;
}
for(int i = 0 ; i < 0x80 ; ++i){
newe->trie[i] = eptr->trie[c]->trie[i];
newe->trie[i] = esctrie_from_idx(a, eptr->trie[c])->trie[i];
}
eptr->trie[c] = newe;
eptr->trie[c] = esctrie_idx(a, newe);
}
eptr = eptr->trie[c];
eptr = esctrie_from_idx(a, eptr->trie[c]);
}
}
if(inescape){
@ -375,13 +367,12 @@ int inputctx_add_input_escape(automaton* a, const char* esc, uint32_t special,
logerror("not an escape (0x%x)\n", special);
return -1;
}
esctrie** eptr = &a->escapes;
if(*eptr == NULL){
if((*eptr = create_esctrie_node(0)) == NULL){
if(a->escapes == 0){
if((a->escapes = create_esctrie_node(a, 0)) == 0){
return -1;
}
}
esctrie* cur = *eptr;
esctrie* cur = esctrie_from_idx(a, a->escapes);
++esc; // don't encode initial escape as a transition
do{
int valid = *esc;
@ -389,12 +380,12 @@ int inputctx_add_input_escape(automaton* a, const char* esc, uint32_t special,
logerror("invalid character %d in escape\n", valid);
return -1;
}
if(cur->trie[valid] == NULL){
if((cur->trie[valid] = create_esctrie_node(0)) == NULL){
if(cur->trie[valid] == 0){
if((cur->trie[valid] = create_esctrie_node(a, 0)) == 0){
return -1;
}
}
cur = cur->trie[valid];
cur = esctrie_from_idx(a, cur->trie[valid]);
++esc;
}while(*esc);
// it appears that multiple keys can be mapped to the same escape string. as
@ -422,8 +413,7 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
logerror("eight-bit char %u in control sequence\n", candidate);
return -1;
}
esctrie* e = a->state;
logdebug("state: %p candidate: %c %u type: %d\n", e, candidate, candidate, e->ntype);
esctrie* e = esctrie_from_idx(a, a->state);
// we ought not have been called for an escape with any state!
if(candidate == 0x1b && !a->instring){
assert(NULL == e);
@ -437,9 +427,9 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
}
return 0;
}
if((a->state = e->trie[candidate]) == NULL){
if((a->state = e->trie[candidate]) == 0){
if(isprint(candidate)){
if(e == a->escapes){
if(esctrie_idx(a, e) == a->escapes){
memset(ni, 0, sizeof(*ni));
ni->id = candidate;
ni->alt = true;
@ -449,7 +439,7 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
loginfo("unexpected transition %u\n", candidate);
return -1;
}
e = a->state;
e = esctrie_from_idx(a, a->state);
// initialize any node we've just stepped into
switch(e->ntype){
case NODE_NUMERIC:

@ -15,15 +15,22 @@ typedef int (*triefunc)(struct inputctx*);
// the state necessary for matching input against our automaton of control
// sequences. we *do not* match the bulk UTF-8 input. we match online (i.e.
// we can be passed a byte at a time).
// we can be passed a byte at a time). initialize with all zeroes.
typedef struct automaton {
struct esctrie* escapes; // head Esc node of trie
unsigned escapes; // head Esc node of trie
int used; // bytes consumed thus far
int instring; // are we in an ST-terminated string?
struct esctrie* state;
unsigned state;
const unsigned char* matchstart; // beginning of active match
// we keep a node pool not to save time when allocating, but because
// trying to free the automaton without reference counting otherwise
// sucks worse than three bitches in a bitchboat.
unsigned poolsize;
unsigned poolused;
struct esctrie* nodepool;
} automaton;
// wipe out all storage internal to |a| (but not |a| itself).
void input_free_esctrie(automaton *a);
int inputctx_add_input_escape(automaton* a, const char* esc,
@ -38,8 +45,6 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
__attribute__ ((nonnull (1, 2, 4)));
uint32_t esctrie_id(const struct esctrie* e);
// returns 128-way array of esctrie pointers
struct esctrie** esctrie_trie(struct esctrie* e);
#ifdef __cplusplus
}

@ -1225,7 +1225,7 @@ process_escape(inputctx* ictx, const unsigned char* buf, int buflen){
if(candidate == NCKEY_ESC && !ictx->amata.instring){
ictx->amata.matchstart = buf + ictx->amata.used - 1;
ictx->amata.state = ictx->amata.escapes;
logtrace("initialized automaton to %p\n", ictx->amata.state);
logtrace("initialized automaton to %u\n", ictx->amata.state);
ictx->amata.used = 1;
if(used > 1){ // we got reset; replay as input
return -(used - 1);
@ -1236,11 +1236,8 @@ process_escape(inputctx* ictx, const unsigned char* buf, int buflen){
// coming from a transition, where ictx->triepos->trie is checked below.
}else{
ncinput ni = {};
logtrace("triepos: %p in: %c instring%c special: 0x%08x\n", ictx->amata.state,
isprint(candidate) ? candidate : ' ', ictx->amata.instring ? '+' : '-',
ictx->amata.state ? esctrie_id(ictx->amata.state) : 0);
int w = walk_automaton(&ictx->amata, ictx, candidate, &ni);
logdebug("walk result on %u (%c): %d %p\n", candidate,
logdebug("walk result on %u (%c): %d %u\n", candidate,
isprint(candidate) ? candidate : ' ', w, ictx->amata.state);
if(w > 0){
if(ni.id){

Loading…
Cancel
Save