Data Structures | Macros | Functions | Variables
mbyte.c File Reference
#include <inttypes.h>
#include <stdbool.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include "nvim/vim.h"
#include "nvim/ascii.h"
#include "nvim/eval.h"
#include "nvim/path.h"
#include "nvim/iconv.h"
#include "nvim/mbyte.h"
#include "nvim/charset.h"
#include "nvim/cursor.h"
#include "nvim/fileio.h"
#include "nvim/func_attr.h"
#include "nvim/memline.h"
#include "nvim/message.h"
#include "nvim/misc1.h"
#include "nvim/memory.h"
#include "nvim/option.h"
#include "nvim/screen.h"
#include "nvim/spell.h"
#include "nvim/strings.h"
#include "nvim/os/os.h"
#include "nvim/arabic.h"
#include "nvim/mark.h"

Data Structures

struct  convertStruct
 
struct  interval
 

Macros

#define IDX_LATIN_1   0
 
#define IDX_ISO_2   1
 
#define IDX_ISO_3   2
 
#define IDX_ISO_4   3
 
#define IDX_ISO_5   4
 
#define IDX_ISO_6   5
 
#define IDX_ISO_7   6
 
#define IDX_ISO_8   7
 
#define IDX_ISO_9   8
 
#define IDX_ISO_10   9
 
#define IDX_ISO_11   10
 
#define IDX_ISO_13   11
 
#define IDX_ISO_14   12
 
#define IDX_ISO_15   13
 
#define IDX_KOI8_R   14
 
#define IDX_KOI8_U   15
 
#define IDX_UTF8   16
 
#define IDX_UCS2   17
 
#define IDX_UCS2LE   18
 
#define IDX_UTF16   19
 
#define IDX_UTF16LE   20
 
#define IDX_UCS4   21
 
#define IDX_UCS4LE   22
 
#define IDX_DEBUG   23
 
#define IDX_EUC_JP   24
 
#define IDX_SJIS   25
 
#define IDX_EUC_KR   26
 
#define IDX_EUC_CN   27
 
#define IDX_EUC_TW   28
 
#define IDX_BIG5   29
 
#define IDX_CP437   30
 
#define IDX_CP737   31
 
#define IDX_CP775   32
 
#define IDX_CP850   33
 
#define IDX_CP852   34
 
#define IDX_CP855   35
 
#define IDX_CP857   36
 
#define IDX_CP860   37
 
#define IDX_CP861   38
 
#define IDX_CP862   39
 
#define IDX_CP863   40
 
#define IDX_CP865   41
 
#define IDX_CP866   42
 
#define IDX_CP869   43
 
#define IDX_CP874   44
 
#define IDX_CP932   45
 
#define IDX_CP936   46
 
#define IDX_CP949   47
 
#define IDX_CP950   48
 
#define IDX_CP1250   49
 
#define IDX_CP1251   50
 
#define IDX_CP1253   51
 
#define IDX_CP1254   52
 
#define IDX_CP1255   53
 
#define IDX_CP1256   54
 
#define IDX_CP1257   55
 
#define IDX_CP1258   56
 
#define IDX_MACROMAN   57
 
#define IDX_HPROMAN8   58
 
#define IDX_COUNT   59
 
#define IS_COMPOSING(s1, s2, s3)   (i == 0 ? UTF_COMPOSINGLIKE((s1), (s2)) : utf_iscomposing((s3)))
 

Functions

int enc_canon_props (const char_u *name)
 
int bomb_size (void)
 
void remove_bom (char_u *s)
 
int mb_get_class (const char_u *p)
 
int mb_get_class_tab (const char_u *p, const uint64_t *const chartab)
 
int utf_char2cells (int c)
 
int utf_ptr2cells (const char_u *p)
 
int utf_ptr2cells_len (const char_u *p, int size)
 
size_t mb_string2cells (const char_u *str)
 
size_t mb_string2cells_len (const char_u *str, size_t size)
 
int utf_ptr2char (const char_u *const p) FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
 
int mb_ptr2char_adv (const char_u **const pp)
 
int mb_cptr2char_adv (const char_u **pp)
 
bool utf_composinglike (const char_u *p1, const char_u *p2)
 
int utfc_ptr2char (const char_u *p, int *pcc)
 
int utfc_ptr2char_len (const char_u *p, int *pcc, int maxlen)
 
int utf_ptr2len (const char_u *const p) FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
 
int utf_byte2len (int b)
 
int utf_ptr2len_len (const char_u *p, int size)
 
int utfc_ptr2len (const char_u *const p) FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
 
int utfc_ptr2len_len (const char_u *p, int size)
 
int utf_char2len (const int c)
 Determine how many bytes certain unicode codepoint will occupy. More...
 
int utf_char2bytes (const int c, char_u *const buf)
 
bool utf_iscomposing (int c)
 
bool utf_printable (int c)
 
int utf_class (const int c)
 
int utf_class_tab (const int c, const uint64_t *const chartab)
 
bool utf_ambiguous_width (int c)
 
int utf_fold (int a)
 
int mb_toupper (int a)
 
bool mb_islower (int a)
 
int mb_tolower (int a)
 
bool mb_isupper (int a)
 
int mb_strnicmp (const char_u *s1, const char_u *s2, const size_t nn)
 
int mb_stricmp (const char *s1, const char *s2)
 
void show_utf8 (void)
 
int utf_head_off (const char_u *base, const char_u *p)
 
void mb_copy_char (const char_u **const fp, char_u **const tp)
 
int mb_off_next (char_u *base, char_u *p)
 
int mb_tail_off (char_u *base, char_u *p)
 
void utf_find_illegal (void)
 
void mb_adjust_cursor (void)
 
void mb_check_adjust_col (void *win_)
 
char_umb_prevptr (char_u *line, char_u *p)
 
int mb_charlen (char_u *str)
 
int mb_charlen_len (char_u *str, int len)
 
const char * mb_unescape (const char **const pp) FUNC_ATTR_WARN_UNUSED_RESULT FUNC_ATTR_NONNULL_ALL
 
char_uenc_skip (char_u *p)
 
char_uenc_canonize (char_u *enc) FUNC_ATTR_NONNULL_RET
 
char_uenc_locale (void)
 
int convert_setup (vimconv_T *vcp, char_u *from, char_u *to)
 
int convert_setup_ext (vimconv_T *vcp, char_u *from, bool from_unicode_is_utf8, char_u *to, bool to_unicode_is_utf8)
 
char_ustring_convert (const vimconv_T *const vcp, char_u *ptr, size_t *lenp)
 
char_ustring_convert_ext (const vimconv_T *const vcp, char_u *ptr, size_t *lenp, size_t *unconvlenp)
 

Variables

char_u e_loadlib [] = "E370: Could not load library %s"
 
char_u e_loadfunc [] = "E448: Could not load library function %s"
 
const uint8_t utf8len_tab []
 
const uint8_t utf8len_tab_zero []
 

Macro Definition Documentation

#define IDX_BIG5   29
#define IDX_COUNT   59
#define IDX_CP1250   49
#define IDX_CP1251   50
#define IDX_CP1253   51
#define IDX_CP1254   52
#define IDX_CP1255   53
#define IDX_CP1256   54
#define IDX_CP1257   55
#define IDX_CP1258   56
#define IDX_CP437   30
#define IDX_CP737   31
#define IDX_CP775   32
#define IDX_CP850   33
#define IDX_CP852   34
#define IDX_CP855   35
#define IDX_CP857   36
#define IDX_CP860   37
#define IDX_CP861   38
#define IDX_CP862   39
#define IDX_CP863   40
#define IDX_CP865   41
#define IDX_CP866   42
#define IDX_CP869   43
#define IDX_CP874   44
#define IDX_CP932   45
#define IDX_CP936   46
#define IDX_CP949   47
#define IDX_CP950   48
#define IDX_DEBUG   23
#define IDX_EUC_CN   27
#define IDX_EUC_JP   24
#define IDX_EUC_KR   26
#define IDX_EUC_TW   28
#define IDX_HPROMAN8   58
#define IDX_ISO_10   9
#define IDX_ISO_11   10
#define IDX_ISO_13   11
#define IDX_ISO_14   12
#define IDX_ISO_15   13
#define IDX_ISO_2   1
#define IDX_ISO_3   2
#define IDX_ISO_4   3
#define IDX_ISO_5   4
#define IDX_ISO_6   5
#define IDX_ISO_7   6
#define IDX_ISO_8   7
#define IDX_ISO_9   8
#define IDX_KOI8_R   14
#define IDX_KOI8_U   15
#define IDX_LATIN_1   0
#define IDX_MACROMAN   57
#define IDX_SJIS   25
#define IDX_UCS2   17
#define IDX_UCS2LE   18
#define IDX_UCS4   21
#define IDX_UCS4LE   22
#define IDX_UTF16   19
#define IDX_UTF16LE   20
#define IDX_UTF8   16
#define IS_COMPOSING (   s1,
  s2,
  s3 
)    (i == 0 ? UTF_COMPOSINGLIKE((s1), (s2)) : utf_iscomposing((s3)))

Function Documentation

int bomb_size ( void  )
int convert_setup ( vimconv_T vcp,
char_u from,
char_u to 
)
int convert_setup_ext ( vimconv_T vcp,
char_u from,
bool  from_unicode_is_utf8,
char_u to,
bool  to_unicode_is_utf8 
)
int enc_canon_props ( const char_u name)
char_u* enc_canonize ( char_u enc)
char_u* enc_locale ( void  )
char_u* enc_skip ( char_u p)
void mb_adjust_cursor ( void  )
int mb_charlen ( char_u str)
int mb_charlen_len ( char_u str,
int  len 
)
void mb_check_adjust_col ( void *  win_)

Checks and adjusts cursor column. Not mode-dependent.

See also
check_cursor_col_win
Parameters
win_Places cursor on a valid column for this window.
void mb_copy_char ( const char_u **const  fp,
char_u **const  tp 
)

Copy a character, advancing the pointers

Parameters
[in,out]fpSource of the character to copy.
[in,out]tpDestination to copy to.
int mb_cptr2char_adv ( const char_u **  pp)
int mb_get_class ( const char_u p)
int mb_get_class_tab ( const char_u p,
const uint64_t *const  chartab 
)
bool mb_islower ( int  a)
bool mb_isupper ( int  a)
int mb_off_next ( char_u base,
char_u p 
)
char_u* mb_prevptr ( char_u line,
char_u p 
)
int mb_ptr2char_adv ( const char_u **const  pp)
int mb_stricmp ( const char *  s1,
const char *  s2 
)

Compare strings case-insensitively

Note
We need to call mb_stricmp() even when we aren't dealing with a multi-byte encoding because mb_stricmp() takes care of all ASCII and non-ascii encodings, including characters with umlauts in latin1, etc., while STRICMP() only handles the system locale version, which often does not handle non-ascii properly.
Parameters
[in]s1First string to compare, not more then MAXCOL characters.
[in]s2Second string to compare, not more then MAXCOL characters.
Returns
0 if strings are equal, <0 if s1 < s2, >0 if s1 > s2.
size_t mb_string2cells ( const char_u str)

Calculate the number of cells occupied by string str.

Parameters
strThe source string, may not be NULL, must be a NUL-terminated string.
Returns
The number of cells occupied by string str
size_t mb_string2cells_len ( const char_u str,
size_t  size 
)

Get the number of cells occupied by string str with maximum length size

Parameters
strThe source string, may not be NULL, must be a NUL-terminated string.
sizemaximum length of string. It will terminate on earlier NUL.
Returns
The number of cells occupied by string str
int mb_strnicmp ( const char_u s1,
const char_u s2,
const size_t  nn 
)
int mb_tail_off ( char_u base,
char_u p 
)
int mb_tolower ( int  a)

Return the lower-case equivalent of "a", which is a UCS-4 character. Use simple case folding.

int mb_toupper ( int  a)

Return the upper-case equivalent of "a", which is a UCS-4 character. Use simple case folding.

const char* mb_unescape ( const char **const  pp)

Try to unescape a multibyte character

Used for the rhs and lhs of the mappings.

Parameters
[in,out]ppString to unescape. Is advanced to just after the bytes that form a multibyte character.
Returns
Unescaped string if it is a multibyte character, NULL if no multibyte character was found. Returns a static buffer, always one and the same.
void remove_bom ( char_u s)
void show_utf8 ( void  )
char_u* string_convert ( const vimconv_T *const  vcp,
char_u ptr,
size_t *  lenp 
)
char_u* string_convert_ext ( const vimconv_T *const  vcp,
char_u ptr,
size_t *  lenp,
size_t *  unconvlenp 
)
bool utf_ambiguous_width ( int  c)
int utf_byte2len ( int  b)
int utf_char2bytes ( const int  c,
char_u *const  buf 
)

Convert Unicode character to UTF-8 string

Parameters
ccharacter to convert to buf
[out]bufUTF-8 string generated from c, does not add \0
Returns
Number of bytes (1-6). Does not include composing characters.
int utf_char2cells ( int  c)

For UTF-8 character "c" return 2 for a double-width character, 1 for others. Returns 4 or 6 for an unprintable character. Is only correct for characters >= 0x80. When p_ambw is "double", return 2 for a character with East Asian Width class 'A'(mbiguous).

Note
Tables doublewidth and ambiguous are generated by gen_unicode_tables.lua, which must be manually invoked as needed.
int utf_char2len ( const int  c)

Determine how many bytes certain unicode codepoint will occupy.

int utf_class ( const int  c)
int utf_class_tab ( const int  c,
const uint64_t *const  chartab 
)
bool utf_composinglike ( const char_u p1,
const char_u p2 
)
void utf_find_illegal ( void  )
int utf_fold ( int  a)
int utf_head_off ( const char_u base,
const char_u p 
)

Return offset from "p" to the first byte of the character it points into. If "p" points to the NUL at the end of the string return 0. Returns 0 when already at the first byte of a character.

bool utf_iscomposing ( int  c)
bool utf_printable ( int  c)
int utf_ptr2cells ( const char_u p)

Return the number of display cells character at "*p" occupies. This doesn't take care of unprintable characters, use ptr2cells() for that.

int utf_ptr2cells_len ( const char_u p,
int  size 
)

Like utf_ptr2cells(), but limit string length to "size". For an empty string or truncated character returns 1.

int utf_ptr2char ( const char_u *const  p)

Convert a UTF-8 byte sequence to a wide character

If the sequence is illegal or truncated by a NUL then the first byte is returned. For an overlong sequence this may return zero. Does not include composing characters for obvious reasons.

Parameters
[in]pString to convert.
Returns
Unicode codepoint or byte value.
int utf_ptr2len ( const char_u *const  p)

Get the length of a UTF-8 byte sequence representing a single codepoint

Parameters
[in]pUTF-8 string.
Returns
Sequence length, 0 for empty string and 1 for non-UTF-8 byte sequence.
int utf_ptr2len_len ( const char_u p,
int  size 
)
int utfc_ptr2char ( const char_u p,
int *  pcc 
)

Convert a UTF-8 string to a wide character

Also gets up to MAX_MCO composing characters.

Parameters
[out]pccLocation where to store composing characters. Must have space at least for MAX_MCO + 1 elements.
Returns
leading character.
int utfc_ptr2char_len ( const char_u p,
int *  pcc,
int  maxlen 
)
int utfc_ptr2len ( const char_u *const  p)

Return the number of bytes occupied by a UTF-8 character in a string

This includes following composing characters.

int utfc_ptr2len_len ( const char_u p,
int  size 
)

Variable Documentation

int canon
int codepage
char_u e_loadfunc[] = "E448: Could not load library function %s"
char_u e_loadlib[] = "E370: Could not load library %s"
const char* name
int prop
const uint8_t utf8len_tab[]
Initial value:
= {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1,
}
const uint8_t utf8len_tab_zero[]
Initial value:
= {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0,
}