[automaton] robustify/minimize string matches #2219

pull/2224/head
nick black 3 years ago committed by nick black
parent 0749800a9d
commit f19ffddb40

@ -28,7 +28,6 @@ typedef struct esctrie {
NODE_FUNCTION, // invokes a function
} ntype;
ncinput ni; // composed key terminating here
char* str; // accumulated string; reset to NULL on entry
triefunc fxn; // function to call on match
struct esctrie* kleene; // kleene match
} esctrie;
@ -37,10 +36,6 @@ uint32_t esctrie_id(const esctrie* e){
return e->ni.id;
}
const char* esctrie_string(const esctrie* e){
return e->str;
}
esctrie** esctrie_trie(esctrie* e){
return e->trie;
}
@ -89,7 +84,6 @@ free_trienode(esctrie** eptr){
}
}
}
free(e->str);
free(e->trie);
}
free(e);
@ -367,7 +361,6 @@ int inputctx_add_cflow(automaton* a, const char* csi, triefunc fxn){
}
eptr = eptr->trie[c];
}
logdebug("added %c, now at %p (%d) (%u)\n", c, eptr, eptr->ntype, *csi);
}
if(inescape){
logerror("illegal escape at end of line\n");
@ -422,23 +415,6 @@ int inputctx_add_input_escape(automaton* a, const char* esc, uint32_t special,
return 0;
}
static int
growstring(automaton* a, esctrie* e, unsigned candidate){
if(!isprint(candidate)){
logerror("unexpected char %u in string\n", candidate);
return -1;
}
char* tmp = realloc(e->str, a->stridx + 1);
if(tmp == NULL){
return -1;
}
e->str = tmp;
e->str[a->stridx - 1] = candidate;
e->str[a->stridx] = '\0';
++a->stridx;
return 0;
}
// returns -1 for non-match, 0 for match, 1 for acceptance. if we are in the
// middle of a sequence, and receive an escape, *do not call this*, but
// instead call reset_automaton() after replaying the used characters to the
@ -461,8 +437,6 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
if(candidate == 0x1b){
a->state = e->trie[candidate];
a->instring = 0;
}else if(growstring(a, e, candidate)){
return -1;
}
return 0;
}
@ -484,11 +458,7 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
case NODE_NUMERIC:
break;
case NODE_STRING:
a->stridx = 1;
a->instring = 1;
if(growstring(a, e, candidate)){
return -1;
}
break;
case NODE_SPECIAL:
if(e->ni.id){

@ -21,7 +21,6 @@ typedef struct automaton {
int used; // bytes consumed thus far
int instring; // are we in an ST-terminated string?
struct esctrie* state;
unsigned stridx; // bytes of accumulating string (includes NUL) FIXME kill
const unsigned char* matchstart; // beginning of active match
} automaton;
@ -39,7 +38,6 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
__attribute__ ((nonnull (1, 2, 4)));
uint32_t esctrie_id(const struct esctrie* e);
const char* esctrie_string(const struct esctrie* e);
// returns 128-way array of esctrie pointers
struct esctrie** esctrie_trie(struct esctrie* e);

@ -314,8 +314,12 @@ amata_next_numeric(automaton* amata, const char* prefix, char follow){
// prefix has been matched
unsigned ret = 0;
while(isdigit(*amata->matchstart)){
int addend = *amata->matchstart - '0';
if((UINT_MAX - addend) / 10 < ret){
logerror("overflow: %u * 10 + %u > %u\n", ret, addend, UINT_MAX);
}
ret *= 10;
ret += *amata->matchstart - '0'; // FIXME overflow check!
ret += addend;
++amata->matchstart;
}
if(*amata->matchstart++ != follow){
@ -325,27 +329,30 @@ amata_next_numeric(automaton* amata, const char* prefix, char follow){
return ret;
}
// get a fixed CSI-anchored node from the trie
static inline struct esctrie*
csi_node(automaton *amata, const char* prefix){
struct esctrie* e = amata->escapes;
e = esctrie_trie(e)['['];
unsigned p;
while(e && (p = *prefix)){
e = esctrie_trie(e)[p];
++prefix;
// same deal as amata_next_numeric, but returns a heap-allocated string.
// strings always end with ST ("x1b\\"). this one *does* return NULL on
// either a match failure or an alloc failure.
static char*
amata_next_string(automaton* amata, const char* prefix){
char c;
while( (c = *prefix++) ){
if(*amata->matchstart != c){
logerror("matchstart didn't match prefix (%c vs %c)\n", c, *amata->matchstart);
return NULL;
}
++amata->matchstart;
}
if(*prefix){
logerror("error following path: %s\n", prefix);
// prefix has been matched. mark start of string and find follow.
const unsigned char* start = amata->matchstart;
while(*amata->matchstart != '\x1b'){
++amata->matchstart;
}
return e;
}
// get the DCS node from the trie
static inline struct esctrie*
dcs_node(automaton *amata){
struct esctrie* e = amata->escapes;
return esctrie_trie(e)['P'];
char* ret = malloc(amata->matchstart - start + 1);
if(ret){
memcpy(ret, start, amata->matchstart - start);
ret[amata->matchstart - start] = '\0';
}
return ret;
}
// ictx->numeric, ictx->p3, and ictx->p2 have the two parameters. we're using
@ -610,7 +617,8 @@ da2_cb(inputctx* ictx){
termname ? termname : "unset");
return 2;
}
unsigned pv = amata_next_numeric(&ictx->amata, "\x1b[>0;", 'c');
amata_next_numeric(&ictx->amata, "\x1b[>", ';');
unsigned pv = amata_next_numeric(&ictx->amata, "", ';');
int maj, min, patch;
if(pv == 0){
return 2;
@ -666,34 +674,25 @@ decrpm_asu_cb(inputctx* ictx){
static int
bgdef_cb(inputctx* ictx){
if(ictx->initdata){
struct esctrie* e = ictx->amata.escapes;
e = esctrie_trie(e)[']'];
e = esctrie_trie(e)['1'];
e = esctrie_trie(e)['1'];
e = esctrie_trie(e)[';'];
e = esctrie_trie(e)['r'];
e = esctrie_trie(e)['g'];
e = esctrie_trie(e)['b'];
e = esctrie_trie(e)[':'];
e = esctrie_trie(e)['a'];
const char* str = esctrie_string(e);
if(str == NULL){
logerror("empty bg string\n");
char* str = amata_next_string(&ictx->amata, "\x1b]11;rgb:");
if(str == NULL){
logerror("empty bg string\n");
}else{
int r, g, b;
if(sscanf(str, "%02x/%02x/%02x", &r, &g, &b) == 3){
// great! =]
}else if(sscanf(str, "%04x/%04x/%04x", &r, &g, &b) == 3){
r /= 256;
g /= 256;
b /= 256;
}else{
int r, g, b;
if(sscanf(str, "%02x/%02x/%02x", &r, &g, &b) == 3){
// great! =]
}else if(sscanf(str, "%04x/%04x/%04x", &r, &g, &b) == 3){
r /= 256;
g /= 256;
b /= 256;
}else{
logerror("couldn't extract rgb from %s\n", str);
r = g = b = 0;
}
ictx->initdata->bg = (r << 16u) | (g << 8u) | b;
loginfo("default background 0x%02x%02x%02x\n", r, g, b);
logerror("couldn't extract rgb from %s\n", str);
r = g = b = 0;
}
ictx->initdata->bg = (r << 16u) | (g << 8u) | b;
loginfo("default background 0x%02x%02x%02x\n", r, g, b);
free(str);
}
}
return 2;
}
@ -721,18 +720,14 @@ extract_xtversion(inputctx* ictx, const char* str, char suffix){
static int
xtversion_cb(inputctx* ictx){
struct esctrie* e = dcs_node(&ictx->amata);
e = esctrie_trie(e)['>'];
e = esctrie_trie(e)['|'];
e = esctrie_trie(e)['a'];
const char* xtversion = esctrie_string(e);
if(ictx->initdata == NULL){
return 2;
}
char* xtversion = amata_next_string(&ictx->amata, "\x1bP>|");
if(xtversion == NULL){
logwarn("empty xtversion\n");
return 2; // don't replay as input
}
if(ictx->initdata == NULL){
return 2;
}
static const struct {
const char* prefix;
char suffix;
@ -755,7 +750,8 @@ xtversion_cb(inputctx* ictx){
loginfo("found terminal type %d version %s\n", xtv->term, ictx->initdata->version);
ictx->initdata->qterm = xtv->term;
}else{
return -1;
free(xtversion);
return 2;
}
break;
}
@ -763,18 +759,17 @@ xtversion_cb(inputctx* ictx){
if(xtv->prefix == NULL){
logwarn("unknown xtversion [%s]\n", xtversion);
}
free(xtversion);
return 2;
}
static int
tcap_cb(inputctx* ictx){
struct esctrie* e = dcs_node(&ictx->amata);
e = esctrie_trie(e)['1'];
e = esctrie_trie(e)['+'];
e = esctrie_trie(e)['r'];
e = esctrie_trie(e)['a'];
const char* str = esctrie_string(e);
loginfo("TCAP: %s\n", str);
char* str = amata_next_string(&ictx->amata, "\x1bP1+r");
if(str){
loginfo("TCAP: %s\n", str);
free(str);
}
/* FIXME
if(cap == 0x544e){ // 'TN' terminal name
loginfo("got TN capability %d\n", val);
@ -791,11 +786,7 @@ tcap_cb(inputctx* ictx){
static int
tda_cb(inputctx* ictx){
struct esctrie* e = dcs_node(&ictx->amata);
e = esctrie_trie(e)['!'];
e = esctrie_trie(e)['|'];
e = esctrie_trie(e)['a'];
const char* str = esctrie_string(e);
char* str = amata_next_string(&ictx->amata, "\x1bP!|");
if(str == NULL){
logwarn("empty ternary device attribute\n");
return 2; // don't replay
@ -810,6 +801,7 @@ tda_cb(inputctx* ictx){
}
loginfo("got TDA: %s, terminal type %d\n", str, ictx->initdata->qterm);
}
free(str);
return 2;
}

Loading…
Cancel
Save