use encoding_us_utf8() for ncdirect

4 months ago · b41af4e192
parent 8595f4a308
commit b41af4e192
4 changed files with 45 additions and 37 deletions
--- a/src/lib/direct.c
+++ b/src/lib/direct.c
@ -893,8 +893,9 @@ ncdirect* ncdirect_core_init(const char* termtype, FILE* outfp, uint64_t flags){
  }
  const char* encoding = nl_langinfo(CODESET);
  bool utf8 = false;
-  if(encoding && strcmp(encoding, "UTF-8") == 0){
+  if(encoding && encoding_is_utf8(encoding)){
    utf8 = true;
+    ncmetric_use_utf8();
  }
  if(setup_signals(ret, (flags & NCDIRECT_OPTION_NO_QUIT_SIGHANDLERS),
                   true, ncdirect_stop_minimal)){
--- a/src/lib/internal.h
+++ b/src/lib/internal.h
@ -1889,6 +1889,33 @@ int putenv_term(const char* termname) __attribute__ ((nonnull (1)));
 int set_loglevel_from_env(ncloglevel_e* loglevel)
  __attribute__ ((nonnull (1)));

+// glibc's _nl_normalize_charset() converts to lowercase, removing everything
+// but alnums. furthermore, "cs" is a valid prefix meaning "character set".
+static inline bool
+encoding_is_utf8(const char *enc){
+  if(tolower(enc[0]) == 'c' && tolower(enc[1]) == 's'){ // strncasecmp() isn't ansi/iso
+    enc += 2; // skip initial "cs" if present.
+  }
+  const char utfstr[] = "utf8";
+  const char* match = utfstr;
+  while(*enc){
+    if(isalnum(*enc)){ // we only care about alnums
+      if(tolower(*enc) != tolower(*match)){
+        return false;
+      }
+      ++match;
+    }
+    ++enc;
+  }
+  if(*match){
+    return false;
+  }
+  return true;
+}
+
+// tell ncmetric that utf8 is available. should be per-context, but isn't.
+void ncmetric_use_utf8(void);
+
 #undef API
 #undef ALLOC

--- a/src/lib/metric.c
+++ b/src/lib/metric.c
@ -8,26 +8,27 @@

 static const wchar_t UTF8_SUBPREFIX[] = L"mµnpfazy"; // 10^24-1
 static const wchar_t ASCII_SUBPREFIX[] = L"munpfazy"; // 10^24-1
+
+// we want to use UTF8_SUBPREFIX if we have utf8 available to us. we could
+// pull this out of const struct notcurses*, except these ncnmetric() doesn't
+// take one, and we don't want to break the API. instead, we call this from
+// notcurses_init() when we create a utf8 context. a gross hack =\.
+static pthread_once_t utf8_verdict = PTHREAD_ONCE_INIT;
 static const wchar_t* SUBPREFIXES = ASCII_SUBPREFIX;
-static pthread_once_t utf8_detector = PTHREAD_ONCE_INIT;

-// sure hope we've called setlocale() by the time we hit this!
 static void
-detect_utf8(void){
-  const char* encoding = nl_langinfo(CODESET);
-  if(encoding){
-    if(strcmp(encoding, "UTF-8") == 0){
-      SUBPREFIXES = UTF8_SUBPREFIX;
-    }
-  }
+ncmetric_use_utf8_internal(void){
+  SUBPREFIXES = UTF8_SUBPREFIX;
+}
+
+void ncmetric_use_utf8(void){
+  pthread_once(&utf8_verdict, ncmetric_use_utf8_internal);
 }

 const char* ncnmetric(uintmax_t val, size_t s, uintmax_t decimal,
                      char* buf, int omitdec, uintmax_t mult,
                      int uprefix){
-  // FIXME this is global to the process...ick :/
-  fesetround(FE_TONEAREST);
-  pthread_once(&utf8_detector, detect_utf8);
+  fesetround(FE_TONEAREST); // FIXME global to the process...ick :/
  // these two must have the same number of elements
  const wchar_t* subprefixes = SUBPREFIXES;
  const wchar_t prefixes[] = L"KMGTPEZY"; // 10^21-1 encompasses 2^64-1
--- a/src/lib/notcurses.c
+++ b/src/lib/notcurses.c
@ -1095,30 +1095,6 @@ int ncplane_destroy_family(ncplane *ncp){
  return ret;
 }

-// glibc's _nl_normalize_charset() converts to lowercase, removing everything
-// but alnums. furthermore, "cs" is a valid prefix meaning "character set".
-static bool
-encoding_is_utf8(const char *enc){
-  if(tolower(enc[0]) == 'c' && tolower(enc[1]) == 's'){ // strncasecmp() isn't ansi/iso
-    enc += 2; // skip initial "cs" if present.
-  }
-  const char utfstr[] = "utf8";
-  const char* match = utfstr;
-  while(*enc){
-    if(isalnum(*enc)){ // we only care about alnums
-      if(tolower(*enc) != tolower(*match)){
-        return false;
-      }
-      ++match;
-    }
-    ++enc;
-  }
-  if(*match){
-    return false;
-  }
-  return true;
-}
-
 // it's critical that we're using UTF-8 encoding if at all possible. since the
 // client might not have called setlocale(2) (if they weren't reading the
 // directions...), go ahead and try calling setlocale(LC_ALL, "") and then
@ -1271,6 +1247,9 @@ notcurses_early_init(const struct notcurses_options* opts, FILE* fp, unsigned* u
    free(ret);
    return NULL;
  }
+  if(utf8){
+    ncmetric_use_utf8();
+  }
  return ret;
 }