add ncstrwidth_valid() and documentation #2153

This commit is contained in:
nick black 2021-09-10 01:28:27 -04:00
parent 831a65aa7c
commit 54cf38b076
No known key found for this signature in database
GPG Key ID: 5F43400C21CBFACC
4 changed files with 56 additions and 13 deletions

View File

@ -1947,12 +1947,11 @@ nccell_cols(const nccell* c){
}
#define NCSTYLE_MASK 0xffffu
#define NCSTYLE_ITALIC 0x0020u
#define NCSTYLE_UNDERLINE 0x0010u
#define NCSTYLE_UNDERCURL 0x0008u
#define NCSTYLE_BOLD 0x0004u
#define NCSTYLE_STRUCK 0x0002u
#define NCSTYLE_BLINK 0x0001u
#define NCSTYLE_ITALIC 0x0010u
#define NCSTYLE_UNDERLINE 0x0008u
#define NCSTYLE_UNDERCURL 0x0004u
#define NCSTYLE_BOLD 0x0002u
#define NCSTYLE_STRUCK 0x0001u
#define NCSTYLE_NONE 0
// copy the UTF8-encoded EGC out of the cell, whether simple or complex. the
@ -2079,6 +2078,21 @@ nccells_double_box(struct ncplane* n, uint32_t attr, uint64_t channels,
}
```
It is sometimes useful to find the number of bytes and columns represented by
a UTF-8 string. `ncstrwidth_valid()` returns -1 if it encounters an invalid
character, and the number of columns otherwise. Even if there is an error, if
`validbytes` and/or `validwidth` are not `NULL`, the number of bytes and
columns (respectively) consumed before error are returned via these parameters.
```c
// Returns the number of columns occupied by a the valid prefix of a multibyte
// (UTF-8) string. If an invalid character is encountered, -1 will be returned,
// and the number of valid bytes and columns will be written into *|validbytes|
// and *|validwidth| (assuming them non-NULL). If the entire string is valid,
// *|validbytes| and *|validwidth| reflect the entire string.
int ncstrwidth_valid(const char* egcs, int* validbytes, int* validwidth);
```
### Cell channels API
Helpers are provided to manipulate an `nccell`'s `channels` member. They are

View File

@ -114,6 +114,8 @@ typedef struct nccell {
**int ncstrwidth(const char* ***text***)**;
**int ncstrwidth_valid(const char* ***text***, int* ***validbytes***, int* ***validwidth***)**;
# DESCRIPTION
Cells make up the framebuffer associated with each plane, with one cell per
@ -140,6 +142,11 @@ ought be considered invalidated by changes to the **nccell** or **egcpool**.
The handle is **not** heap-allocated; do **not** attempt to **free(3)** it.
A heap-allocated copy can be acquired with **nccell_strdup**.
**ncstrwidth_valid** returns the number of columns occupied by a valid UTF-8
string, or -1 if an error is encountered. In either case, the number of valid
bytes and columns, respectively, consumed before error into ***validbytes***
and ***validwidth*** (assuming them to not be **NULL**).
# RETURN VALUES
**nccell_load** and similar functions return the number of bytes loaded from the
@ -157,6 +164,7 @@ less than, equal to, or more than ***c2***, respectively.
**nccell_cols** returns the number of columns occupied by ***c***, according
to **wcwidth(3)***. **ncstrwidth** is an equivalent for strings.
**ncstrwidth_valid** returns the same value as **ncstrwidth**.
# NOTES
@ -172,4 +180,5 @@ have been renamed to start with **nccell**.
**notcurses_plane(3)**,
**notcurses_output(3)**,
**notcurses_visual(3)**,
**wcwidth(3)**
**wcwidth(3)**,
**utf8(7)**

View File

@ -92,8 +92,16 @@ typedef enum {
// Returns the number of columns occupied by a multibyte (UTF-8) string, or
// -1 if a non-printable/illegal character is encountered.
// FIXME becomes a static inline in ABI3.
API int ncstrwidth(const char* mbs);
// Returns the number of columns occupied by a the valid prefix of a multibyte
// (UTF-8) string. If an invalid character is encountered, -1 will be returned,
// and the number of valid bytes and columns will be written into *|validbytes|
// and *|validwidth| (assuming them non-NULL). If the entire string is valid,
// *|validbytes| and *|validwidth| reflect the entire string.
API int ncstrwidth_valid(const char* egcs, int* validbytes, int* validwidth);
// Returns a heap-allocated copy of the user name under which we are running.
API ALLOC char* notcurses_accountname(void);

View File

@ -3039,17 +3039,29 @@ int notcurses_ucs32_to_utf8(const uint32_t* ucs32, unsigned ucs32count,
}
int ncstrwidth(const char* mbs){
int cols = 0; // number of columns consumed thus far
return ncstrwidth_valid(mbs, NULL, NULL);
}
int ncstrwidth_valid(const char* egcs, int* validbytes, int* validwidth){
int cols = 0; // number of columns consumed thus far
if(validwidth == NULL){
validwidth = &cols;
}
int bytes = 0; // number of bytes consumed thus far
if(validbytes == NULL){
validbytes = &bytes;
}
do{
int thesecols, thesebytes;
thesebytes = utf8_egc_len(mbs, &thesecols);
thesebytes = utf8_egc_len(egcs, &thesecols);
if(thesebytes < 0){
return -1;
}
mbs += thesebytes;
cols += thesecols;
}while(*mbs);
return cols;
egcs += thesebytes;
*validbytes += thesebytes;
*validwidth += thesecols;
}while(*egcs);
return *validwidth;
}
void ncplane_pixelgeom(const ncplane* n, int* RESTRICT pxy, int* RESTRICT pxx,