[automaton] robustify/minimize string matches #2219

pull/2224/head
nick black 3 years ago committed by nick black
parent 0749800a9d
commit f19ffddb40

@ -28,7 +28,6 @@ typedef struct esctrie {
NODE_FUNCTION, // invokes a function NODE_FUNCTION, // invokes a function
} ntype; } ntype;
ncinput ni; // composed key terminating here ncinput ni; // composed key terminating here
char* str; // accumulated string; reset to NULL on entry
triefunc fxn; // function to call on match triefunc fxn; // function to call on match
struct esctrie* kleene; // kleene match struct esctrie* kleene; // kleene match
} esctrie; } esctrie;
@ -37,10 +36,6 @@ uint32_t esctrie_id(const esctrie* e){
return e->ni.id; return e->ni.id;
} }
const char* esctrie_string(const esctrie* e){
return e->str;
}
esctrie** esctrie_trie(esctrie* e){ esctrie** esctrie_trie(esctrie* e){
return e->trie; return e->trie;
} }
@ -89,7 +84,6 @@ free_trienode(esctrie** eptr){
} }
} }
} }
free(e->str);
free(e->trie); free(e->trie);
} }
free(e); free(e);
@ -367,7 +361,6 @@ int inputctx_add_cflow(automaton* a, const char* csi, triefunc fxn){
} }
eptr = eptr->trie[c]; eptr = eptr->trie[c];
} }
logdebug("added %c, now at %p (%d) (%u)\n", c, eptr, eptr->ntype, *csi);
} }
if(inescape){ if(inescape){
logerror("illegal escape at end of line\n"); logerror("illegal escape at end of line\n");
@ -422,23 +415,6 @@ int inputctx_add_input_escape(automaton* a, const char* esc, uint32_t special,
return 0; return 0;
} }
static int
growstring(automaton* a, esctrie* e, unsigned candidate){
if(!isprint(candidate)){
logerror("unexpected char %u in string\n", candidate);
return -1;
}
char* tmp = realloc(e->str, a->stridx + 1);
if(tmp == NULL){
return -1;
}
e->str = tmp;
e->str[a->stridx - 1] = candidate;
e->str[a->stridx] = '\0';
++a->stridx;
return 0;
}
// returns -1 for non-match, 0 for match, 1 for acceptance. if we are in the // returns -1 for non-match, 0 for match, 1 for acceptance. if we are in the
// middle of a sequence, and receive an escape, *do not call this*, but // middle of a sequence, and receive an escape, *do not call this*, but
// instead call reset_automaton() after replaying the used characters to the // instead call reset_automaton() after replaying the used characters to the
@ -461,8 +437,6 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
if(candidate == 0x1b){ if(candidate == 0x1b){
a->state = e->trie[candidate]; a->state = e->trie[candidate];
a->instring = 0; a->instring = 0;
}else if(growstring(a, e, candidate)){
return -1;
} }
return 0; return 0;
} }
@ -484,11 +458,7 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
case NODE_NUMERIC: case NODE_NUMERIC:
break; break;
case NODE_STRING: case NODE_STRING:
a->stridx = 1;
a->instring = 1; a->instring = 1;
if(growstring(a, e, candidate)){
return -1;
}
break; break;
case NODE_SPECIAL: case NODE_SPECIAL:
if(e->ni.id){ if(e->ni.id){

@ -21,7 +21,6 @@ typedef struct automaton {
int used; // bytes consumed thus far int used; // bytes consumed thus far
int instring; // are we in an ST-terminated string? int instring; // are we in an ST-terminated string?
struct esctrie* state; struct esctrie* state;
unsigned stridx; // bytes of accumulating string (includes NUL) FIXME kill
const unsigned char* matchstart; // beginning of active match const unsigned char* matchstart; // beginning of active match
} automaton; } automaton;
@ -39,7 +38,6 @@ int walk_automaton(automaton* a, struct inputctx* ictx, unsigned candidate,
__attribute__ ((nonnull (1, 2, 4))); __attribute__ ((nonnull (1, 2, 4)));
uint32_t esctrie_id(const struct esctrie* e); uint32_t esctrie_id(const struct esctrie* e);
const char* esctrie_string(const struct esctrie* e);
// returns 128-way array of esctrie pointers // returns 128-way array of esctrie pointers
struct esctrie** esctrie_trie(struct esctrie* e); struct esctrie** esctrie_trie(struct esctrie* e);

@ -314,8 +314,12 @@ amata_next_numeric(automaton* amata, const char* prefix, char follow){
// prefix has been matched // prefix has been matched
unsigned ret = 0; unsigned ret = 0;
while(isdigit(*amata->matchstart)){ while(isdigit(*amata->matchstart)){
int addend = *amata->matchstart - '0';
if((UINT_MAX - addend) / 10 < ret){
logerror("overflow: %u * 10 + %u > %u\n", ret, addend, UINT_MAX);
}
ret *= 10; ret *= 10;
ret += *amata->matchstart - '0'; // FIXME overflow check! ret += addend;
++amata->matchstart; ++amata->matchstart;
} }
if(*amata->matchstart++ != follow){ if(*amata->matchstart++ != follow){
@ -325,27 +329,30 @@ amata_next_numeric(automaton* amata, const char* prefix, char follow){
return ret; return ret;
} }
// get a fixed CSI-anchored node from the trie // same deal as amata_next_numeric, but returns a heap-allocated string.
static inline struct esctrie* // strings always end with ST ("x1b\\"). this one *does* return NULL on
csi_node(automaton *amata, const char* prefix){ // either a match failure or an alloc failure.
struct esctrie* e = amata->escapes; static char*
e = esctrie_trie(e)['[']; amata_next_string(automaton* amata, const char* prefix){
unsigned p; char c;
while(e && (p = *prefix)){ while( (c = *prefix++) ){
e = esctrie_trie(e)[p]; if(*amata->matchstart != c){
++prefix; logerror("matchstart didn't match prefix (%c vs %c)\n", c, *amata->matchstart);
return NULL;
}
++amata->matchstart;
} }
if(*prefix){ // prefix has been matched. mark start of string and find follow.
logerror("error following path: %s\n", prefix); const unsigned char* start = amata->matchstart;
while(*amata->matchstart != '\x1b'){
++amata->matchstart;
} }
return e; char* ret = malloc(amata->matchstart - start + 1);
} if(ret){
memcpy(ret, start, amata->matchstart - start);
// get the DCS node from the trie ret[amata->matchstart - start] = '\0';
static inline struct esctrie* }
dcs_node(automaton *amata){ return ret;
struct esctrie* e = amata->escapes;
return esctrie_trie(e)['P'];
} }
// ictx->numeric, ictx->p3, and ictx->p2 have the two parameters. we're using // ictx->numeric, ictx->p3, and ictx->p2 have the two parameters. we're using
@ -610,7 +617,8 @@ da2_cb(inputctx* ictx){
termname ? termname : "unset"); termname ? termname : "unset");
return 2; return 2;
} }
unsigned pv = amata_next_numeric(&ictx->amata, "\x1b[>0;", 'c'); amata_next_numeric(&ictx->amata, "\x1b[>", ';');
unsigned pv = amata_next_numeric(&ictx->amata, "", ';');
int maj, min, patch; int maj, min, patch;
if(pv == 0){ if(pv == 0){
return 2; return 2;
@ -666,34 +674,25 @@ decrpm_asu_cb(inputctx* ictx){
static int static int
bgdef_cb(inputctx* ictx){ bgdef_cb(inputctx* ictx){
if(ictx->initdata){ if(ictx->initdata){
struct esctrie* e = ictx->amata.escapes; char* str = amata_next_string(&ictx->amata, "\x1b]11;rgb:");
e = esctrie_trie(e)[']']; if(str == NULL){
e = esctrie_trie(e)['1']; logerror("empty bg string\n");
e = esctrie_trie(e)['1']; }else{
e = esctrie_trie(e)[';']; int r, g, b;
e = esctrie_trie(e)['r']; if(sscanf(str, "%02x/%02x/%02x", &r, &g, &b) == 3){
e = esctrie_trie(e)['g']; // great! =]
e = esctrie_trie(e)['b']; }else if(sscanf(str, "%04x/%04x/%04x", &r, &g, &b) == 3){
e = esctrie_trie(e)[':']; r /= 256;
e = esctrie_trie(e)['a']; g /= 256;
const char* str = esctrie_string(e); b /= 256;
if(str == NULL){
logerror("empty bg string\n");
}else{ }else{
int r, g, b; logerror("couldn't extract rgb from %s\n", str);
if(sscanf(str, "%02x/%02x/%02x", &r, &g, &b) == 3){ r = g = b = 0;
// great! =]
}else if(sscanf(str, "%04x/%04x/%04x", &r, &g, &b) == 3){
r /= 256;
g /= 256;
b /= 256;
}else{
logerror("couldn't extract rgb from %s\n", str);
r = g = b = 0;
}
ictx->initdata->bg = (r << 16u) | (g << 8u) | b;
loginfo("default background 0x%02x%02x%02x\n", r, g, b);
} }
ictx->initdata->bg = (r << 16u) | (g << 8u) | b;
loginfo("default background 0x%02x%02x%02x\n", r, g, b);
free(str);
}
} }
return 2; return 2;
} }
@ -721,18 +720,14 @@ extract_xtversion(inputctx* ictx, const char* str, char suffix){
static int static int
xtversion_cb(inputctx* ictx){ xtversion_cb(inputctx* ictx){
struct esctrie* e = dcs_node(&ictx->amata); if(ictx->initdata == NULL){
e = esctrie_trie(e)['>']; return 2;
e = esctrie_trie(e)['|']; }
e = esctrie_trie(e)['a']; char* xtversion = amata_next_string(&ictx->amata, "\x1bP>|");
const char* xtversion = esctrie_string(e);
if(xtversion == NULL){ if(xtversion == NULL){
logwarn("empty xtversion\n"); logwarn("empty xtversion\n");
return 2; // don't replay as input return 2; // don't replay as input
} }
if(ictx->initdata == NULL){
return 2;
}
static const struct { static const struct {
const char* prefix; const char* prefix;
char suffix; char suffix;
@ -755,7 +750,8 @@ xtversion_cb(inputctx* ictx){
loginfo("found terminal type %d version %s\n", xtv->term, ictx->initdata->version); loginfo("found terminal type %d version %s\n", xtv->term, ictx->initdata->version);
ictx->initdata->qterm = xtv->term; ictx->initdata->qterm = xtv->term;
}else{ }else{
return -1; free(xtversion);
return 2;
} }
break; break;
} }
@ -763,18 +759,17 @@ xtversion_cb(inputctx* ictx){
if(xtv->prefix == NULL){ if(xtv->prefix == NULL){
logwarn("unknown xtversion [%s]\n", xtversion); logwarn("unknown xtversion [%s]\n", xtversion);
} }
free(xtversion);
return 2; return 2;
} }
static int static int
tcap_cb(inputctx* ictx){ tcap_cb(inputctx* ictx){
struct esctrie* e = dcs_node(&ictx->amata); char* str = amata_next_string(&ictx->amata, "\x1bP1+r");
e = esctrie_trie(e)['1']; if(str){
e = esctrie_trie(e)['+']; loginfo("TCAP: %s\n", str);
e = esctrie_trie(e)['r']; free(str);
e = esctrie_trie(e)['a']; }
const char* str = esctrie_string(e);
loginfo("TCAP: %s\n", str);
/* FIXME /* FIXME
if(cap == 0x544e){ // 'TN' terminal name if(cap == 0x544e){ // 'TN' terminal name
loginfo("got TN capability %d\n", val); loginfo("got TN capability %d\n", val);
@ -791,11 +786,7 @@ tcap_cb(inputctx* ictx){
static int static int
tda_cb(inputctx* ictx){ tda_cb(inputctx* ictx){
struct esctrie* e = dcs_node(&ictx->amata); char* str = amata_next_string(&ictx->amata, "\x1bP!|");
e = esctrie_trie(e)['!'];
e = esctrie_trie(e)['|'];
e = esctrie_trie(e)['a'];
const char* str = esctrie_string(e);
if(str == NULL){ if(str == NULL){
logwarn("empty ternary device attribute\n"); logwarn("empty ternary device attribute\n");
return 2; // don't replay return 2; // don't replay
@ -810,6 +801,7 @@ tda_cb(inputctx* ictx){
} }
loginfo("got TDA: %s, terminal type %d\n", str, ictx->initdata->qterm); loginfo("got TDA: %s, terminal type %d\n", str, ictx->initdata->qterm);
} }
free(str);
return 2; return 2;
} }

Loading…
Cancel
Save