[ncman] parse all five arguments of TH

2024-11-08 01:10:23 +00:00 · 2021-12-08 05:44:34 -05:00 · 2021-12-08 05:44:34 -05:00 · f8626e48cb
commit f8626e48cb
parent e23cf59f57
1 changed files with 61 additions and 110 deletions
--- a/src/man/main.c
+++ b/src/man/main.c
@ -2,6 +2,7 @@
 #include <errno.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <wctype.h>
 #include <getopt.h>
 #include <inttypes.h>
 #include <sys/mman.h>
@ -314,6 +315,8 @@ typedef struct pagedom {
  char* title;
  char* section;
  char* version;
  char* footer;
  char* header;
 } pagedom;
 static const char*
@ -321,131 +324,79 @@ dom_get_title(const pagedom* dom){
  return dom->title;
 }
 // get the next token. first, chew whitespace. then match a string of
 // iswgraph(), or a quoted string of iswprint(). return the number of
 // characters consumed, or -1 on error (no token, unterminated quote).
 // heap-copies the utf8 to *token on success.
 static int
 lex_next_token(const char* s, char** token){
  mbstate_t ps = {};
  wchar_t w;
  size_t b, cur;
  cur = 0;
  bool inquote = false;
  const char* tokstart = NULL;
  while((b = mbrtowc(&w, s + cur, MB_CUR_MAX, &ps)) != (size_t)-1 && b != (size_t)-2){
    if(tokstart){
      if(b == 0 || (inquote && w == L'"') || (!inquote && iswspace(w))){
        if(!tokstart || !*tokstart || *tokstart == '"'){
          return -1;
        }
        *token = strndup(tokstart, cur - (tokstart - s));
        return cur + b;
      }
    }else{
      if(iswspace(w)){
        cur += b;
        continue;
      }
      if(w == '"'){
        inquote = true;
        tokstart = s + cur + b;
      }else{
        tokstart = s + cur;
      }
    }
    cur += b;
  }
  return -1;
 }
 // take the newly-added title section, and extract the title, section, and
 // version (technically footer-middle, footer-inside, and header-middle).
 // they ought be quoted, but might not be.
 static int
 lex_title(pagedom* dom){
  const char* tok = dom->root->text;
-  while(isspace(*tok)){
+  int b = lex_next_token(tok, &dom->title);
-    ++tok;
+  if(b < 0){
  }
  bool quoted = false;
  if(*tok == '"'){
    quoted = true;
    ++tok;
  }
  if(!*tok){
    fprintf(stderr, "couldn't extract title [%s]\n", dom->root->text);
    return -1;
  }
-  const char* endtok = tok + 1;
+  tok += b;
-  while(*endtok){
+  b = lex_next_token(tok, &dom->section);
-    if(!quoted){
+  if(b < 0){
      if(isspace(*endtok)){
        break;
      }else if(*endtok == '"'){
        quoted = true;
        break;
      }
    }else{
      if(*endtok == '"'){
        quoted = false;
        break;
      }
    }
    ++endtok;
  }
  if(!*endtok){
    fprintf(stderr, "couldn't extract title [%s]\n", dom->root->text);
    return -1;
  }
  dom->title = strndup(tok, endtok - tok);
  tok = endtok + 1;
  if(!*tok){
    fprintf(stderr, "couldn't extract section [%s]\n", dom->root->text);
    return -1;
  }
-  if(!quoted){
+  tok += b;
-    while(isspace(*tok)){
+  b = lex_next_token(tok, &dom->version);
-      ++tok;
+  if(b < 0){
-    }
+    //fprintf(stderr, "couldn't extract version [%s]\n", dom->root->text);
-    quoted = false;
+    return 0;
    if(*tok == '"'){
      quoted = true;
      ++tok;
    }
    if(!*tok){
      fprintf(stderr, "couldn't extract section [%s]\n", dom->root->text);
      return -1;
    }
  }
-  endtok = tok + 1;
+  tok += b;
-  while(*endtok){
+  b = lex_next_token(tok, &dom->footer);
-    if(!quoted){
+  if(b < 0){
-      if(isspace(*endtok)){
+    //fprintf(stderr, "couldn't extract footer [%s]\n", dom->root->text);
-        break;
+    return 0;
      }else if(*endtok == '"'){
        quoted = true;
        break;
      }
    }else{
      if(*endtok == '"'){
        quoted = false;
        break;
      }
    }
    ++endtok;
  }
-  if(!*endtok){
+  tok += b;
-    fprintf(stderr, "couldn't extract section [%s]\n", dom->root->text);
+  b = lex_next_token(tok, &dom->header);
-    return -1;
+  if(b < 0){
    //fprintf(stderr, "couldn't extract header [%s]\n", dom->root->text);
    return 0;
  }
  dom->section = strndup(tok, endtok - tok);
  tok = endtok + 1;
  if(!*tok){
    fprintf(stderr, "couldn't extract version [%s]\n", dom->root->text);
    dom->version = strdup("");
    return 0; // allow empty version
  }
  if(!quoted){
    while(isspace(*tok)){
      ++tok;
    }
    quoted = false;
    if(*tok == '"'){
      quoted = true;
      ++tok;
    }
    if(!*tok){
      fprintf(stderr, "couldn't extract version [%s]\n", dom->root->text);
      dom->version = strdup("");
      return 0; // allow empty version
    }
  }
  endtok = tok + 1;
  while(*endtok){
    if(!quoted){
      if(isspace(*endtok)){
        break;
      }else if(*endtok == '"'){
        quoted = true;
        break;
      }
    }else{
      if(*endtok == '"'){
        quoted = false;
        break;
      }
    }
    ++endtok;
  }
  if(!*endtok){
    fprintf(stderr, "couldn't extract version [%s]\n", dom->root->text);
    dom->version = strdup("");
    return 0; // allow empty version
  }
  dom->version = strndup(tok, endtok - tok);
  return 0;
 }