regexp_defs.h
Go to the documentation of this file.
1 /*
2  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
3  *
4  * This is NOT the original regular expression code as written by Henry
5  * Spencer. This code has been modified specifically for use with Vim, and
6  * should not be used apart from compiling Vim. If you want a good regular
7  * expression library, get the original code.
8  *
9  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
10  */
11 
12 #ifndef NVIM_REGEXP_DEFS_H
13 #define NVIM_REGEXP_DEFS_H
14 
15 #include <stdbool.h>
16 
17 #include "nvim/pos.h"
18 #include "nvim/profile.h"
19 #include "nvim/types.h"
20 
21 /*
22  * The number of sub-matches is limited to 10.
23  * The first one (index 0) is the whole match, referenced with "\0".
24  * The second one (index 1) is the first sub-match, referenced with "\1".
25  * This goes up to the tenth (index 9), referenced with "\9".
26  */
27 #define NSUBEXP 10
28 
29 /*
30  * In the NFA engine: how many braces are allowed.
31  * TODO(RE): Use dynamic memory allocation instead of static, like here
32  */
33 #define NFA_MAX_BRACES 20
34 
35 // In the NFA engine: how many states are allowed.
36 #define NFA_MAX_STATES 100000
37 #define NFA_TOO_EXPENSIVE -1
38 
39 // Which regexp engine to use? Needed for vim_regcomp().
40 // Must match with 'regexpengine'.
41 #define AUTOMATIC_ENGINE 0
42 #define BACKTRACKING_ENGINE 1
43 #define NFA_ENGINE 2
44 
45 typedef struct regengine regengine_T;
46 typedef struct regprog regprog_T;
48 
55 typedef struct {
57  lpos_T startpos[NSUBEXP];
59  int rmm_ic;
61 } regmmatch_T;
62 
63 #include "nvim/buffer_defs.h"
64 
65 /*
66  * Structure returned by vim_regcomp() to pass on to vim_regexec().
67  * This is the general structure. For the actual matcher, two specific
68  * structures are used. See code below.
69  */
70 struct regprog {
72  unsigned regflags;
73  unsigned re_engine;
74  unsigned re_flags;
75  bool re_in_use;
76 };
77 
78 /*
79  * Structure used by the back track matcher.
80  * These fields are only to be used in regexp.c!
81  * See regexp.c for an explanation.
82  */
83 typedef struct {
84  // These four members implement regprog_T.
86  unsigned regflags;
87  unsigned re_engine;
88  unsigned re_flags;
89  bool re_in_use;
90 
91  int regstart;
94  int regmlen;
96  char_u program[1]; // actually longer..
97 } bt_regprog_T;
98 
99 // Structure representing a NFA state.
100 // An NFA state may have no outgoing edge, when it is a NFA_MATCH state.
101 typedef struct nfa_state nfa_state_T;
102 struct nfa_state {
103  int c;
106  int id;
107  int lastlist[2]; // 0: normal, 1: recursive
108  int val;
109 };
110 
111 /*
112  * Structure used by the NFA matcher.
113  */
114 typedef struct {
115  // These four members implement regprog_T.
117  unsigned regflags;
118  unsigned re_engine;
119  unsigned re_flags;
120  bool re_in_use;
121 
122  nfa_state_T *start; // points into state[]
123 
124  int reganch; // pattern starts with ^
125  int regstart; // char at start of pattern
126  char_u *match_text; // plain text to match with
127 
128  int has_zend; // pattern contains \ze
129  int has_backref; // pattern contains \1 .. \9
130  int reghasz;
132  int nsubexp; // number of ()
133  int nstate;
134  nfa_state_T state[1]; // actually longer..
135 } nfa_regprog_T;
136 
137 /*
138  * Structure to be used for single-line matching.
139  * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
140  * When there is no match, the pointer is NULL.
141  */
142 typedef struct {
144  char_u *startp[NSUBEXP];
145  char_u *endp[NSUBEXP];
146  bool rm_ic;
147 } regmatch_T;
148 
149 /*
150  * Structure used to store external references: "\z\(\)" to "\z\1".
151  * Use a reference count to avoid the need to copy this around. When it goes
152  * from 1 to zero the matches need to be freed.
153  */
154 struct reg_extmatch {
155  int16_t refcnt;
157 };
158 
159 struct regengine {
160  regprog_T *(*regcomp)(char_u *, int);
164  proftime_T *, int *);
166 };
167 
168 #endif // NVIM_REGEXP_DEFS_H
NSUBEXP
#define NSUBEXP
Definition: regexp_defs.h:27
profile.h
window_S
Definition: buffer_defs.h:1158
regengine::regexec_nl
int(* regexec_nl)(regmatch_T *, char_u *, colnr_T, bool)
Definition: regexp_defs.h:162
nfa_state::out1
nfa_state_T * out1
Definition: regexp_defs.h:105
lpos_T
Definition: pos.h:38
regprog::regflags
unsigned regflags
Definition: regexp_defs.h:72
bt_regprog_T::re_flags
unsigned re_flags
Definition: regexp_defs.h:88
regmmatch_T
Definition: regexp_defs.h:55
types.h
nfa_state::out
nfa_state_T * out
Definition: regexp_defs.h:104
regengine
Definition: regexp_defs.h:159
endpos
save_se_T endpos
Definition: regexp_nfa.c:4674
nfa_regprog_T::re_in_use
bool re_in_use
Definition: regexp_defs.h:120
nfa_regprog_T::nstate
int nstate
Definition: regexp_defs.h:133
regprog::re_in_use
bool re_in_use
prog is being executed
Definition: regexp_defs.h:75
regprog
Definition: regexp_defs.h:70
regprog::re_engine
unsigned re_engine
Automatic, backtracking or NFA engine.
Definition: regexp_defs.h:73
nfa_state::c
int c
Definition: regexp_defs.h:103
nfa_regprog_T::pattern
char_u * pattern
Definition: regexp_defs.h:131
nfa_regprog_T::reghasz
int reghasz
Definition: regexp_defs.h:130
nfa_regprog_T::engine
regengine_T * engine
Definition: regexp_defs.h:116
regprog::re_flags
unsigned re_flags
Second argument for vim_regcomp().
Definition: regexp_defs.h:74
nfa_regprog_T
Definition: regexp_defs.h:114
void
void(WINAPI *pClosePseudoConsole)(HPCON)
regengine::regexec_multi
long(* regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T, proftime_T *, int *)
Definition: regexp_defs.h:163
reg_extmatch
Definition: regexp_defs.h:154
regmmatch_T::rmm_maxcol
colnr_T rmm_maxcol
Definition: regexp_defs.h:60
regengine::expr
char_u * expr
Definition: regexp_defs.h:165
nfa_state::id
int id
Definition: regexp_defs.h:106
regmmatch_T::rmm_ic
int rmm_ic
Definition: regexp_defs.h:59
nfa_regprog_T::has_zend
int has_zend
Definition: regexp_defs.h:128
nfa_state::val
int val
Definition: regexp_defs.h:108
colnr_T
int colnr_T
Column number type.
Definition: pos.h:12
nfa_state::lastlist
int lastlist[2]
Definition: regexp_defs.h:107
nfa_regprog_T::re_engine
unsigned re_engine
Definition: regexp_defs.h:118
nfa_regprog_T::reganch
int reganch
Definition: regexp_defs.h:124
bt_regprog_T::regmlen
int regmlen
Definition: regexp_defs.h:94
regmatch_T::rm_ic
bool rm_ic
Definition: regexp_defs.h:146
nfa_regprog_T::match_text
char_u * match_text
Definition: regexp_defs.h:126
regprog::engine
regengine_T * engine
Definition: regexp_defs.h:71
nfa_regprog_T::nsubexp
int nsubexp
Definition: regexp_defs.h:132
nfa_regprog_T::start
nfa_state_T * start
Definition: regexp_defs.h:122
bt_regprog_T::re_in_use
bool re_in_use
Definition: regexp_defs.h:89
regmmatch_T::regprog
regprog_T * regprog
Definition: regexp_defs.h:56
bt_regprog_T::re_engine
unsigned re_engine
Definition: regexp_defs.h:87
bt_regprog_T::engine
regengine_T * engine
Definition: regexp_defs.h:85
proftime_T
uint64_t proftime_T
Definition: profile.h:7
nfa_regprog_T::regstart
int regstart
Definition: regexp_defs.h:125
nfa_state
Definition: regexp_defs.h:102
bt_regprog_T::regstart
int regstart
Definition: regexp_defs.h:91
reg_extmatch::matches
char_u * matches[NSUBEXP]
Definition: regexp_defs.h:156
char_u
unsigned char char_u
Definition: types.h:12
file_buffer
Definition: buffer_defs.h:516
bool
#define bool
Definition: conv.h:18
reg_extmatch::refcnt
int16_t refcnt
Definition: regexp_defs.h:155
regmatch_T::regprog
regprog_T * regprog
Definition: regexp_defs.h:143
buffer_defs.h
bt_regprog_T::regflags
unsigned regflags
Definition: regexp_defs.h:86
nfa_regprog_T::regflags
unsigned regflags
Definition: regexp_defs.h:117
regmatch_T
Definition: regexp_defs.h:142
bt_regprog_T::reganch
char_u reganch
Definition: regexp_defs.h:92
linenr_T
long linenr_T
Definition: pos.h:7
nfa_regprog_T::re_flags
unsigned re_flags
Definition: regexp_defs.h:119
regengine::regfree
void(* regfree)(regprog_T *)
Definition: regexp_defs.h:161
bt_regprog_T::reghasz
char_u reghasz
Definition: regexp_defs.h:95
nfa_regprog_T::has_backref
int has_backref
Definition: regexp_defs.h:129
pos.h
bt_regprog_T::regmust
char_u * regmust
Definition: regexp_defs.h:93
bt_regprog_T
Definition: regexp_defs.h:83