regexp_defs.h
Go to the documentation of this file.
1 /*
2  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
3  *
4  * This is NOT the original regular expression code as written by Henry
5  * Spencer. This code has been modified specifically for use with Vim, and
6  * should not be used apart from compiling Vim. If you want a good regular
7  * expression library, get the original code.
8  *
9  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
10  */
11 
12 #ifndef NVIM_REGEXP_DEFS_H
13 #define NVIM_REGEXP_DEFS_H
14 
15 #include <stdbool.h>
16 
17 #include "nvim/pos.h"
18 #include "nvim/types.h"
19 #include "nvim/profile.h"
20 
21 /*
22  * The number of sub-matches is limited to 10.
23  * The first one (index 0) is the whole match, referenced with "\0".
24  * The second one (index 1) is the first sub-match, referenced with "\1".
25  * This goes up to the tenth (index 9), referenced with "\9".
26  */
27 #define NSUBEXP 10
28 
29 /*
30  * In the NFA engine: how many braces are allowed.
31  * TODO(RE): Use dynamic memory allocation instead of static, like here
32  */
33 #define NFA_MAX_BRACES 20
34 
35 // In the NFA engine: how many states are allowed.
36 #define NFA_MAX_STATES 100000
37 #define NFA_TOO_EXPENSIVE -1
38 
39 // Which regexp engine to use? Needed for vim_regcomp().
40 // Must match with 'regexpengine'.
41 #define AUTOMATIC_ENGINE 0
42 #define BACKTRACKING_ENGINE 1
43 #define NFA_ENGINE 2
44 
45 typedef struct regengine regengine_T;
46 typedef struct regprog regprog_T;
48 
55 typedef struct {
57  lpos_T startpos[NSUBEXP];
58  lpos_T endpos[NSUBEXP];
59  int rmm_ic;
61 } regmmatch_T;
62 
63 #include "nvim/buffer_defs.h"
64 
65 /*
66  * Structure returned by vim_regcomp() to pass on to vim_regexec().
67  * This is the general structure. For the actual matcher, two specific
68  * structures are used. See code below.
69  */
70 struct regprog {
72  unsigned regflags;
73  unsigned re_engine;
74  unsigned re_flags;
75 };
76 
77 /*
78  * Structure used by the back track matcher.
79  * These fields are only to be used in regexp.c!
80  * See regexp.c for an explanation.
81  */
82 typedef struct {
83  // These four members implement regprog_T.
85  unsigned regflags;
86  unsigned re_engine;
87  unsigned re_flags;
88 
89  int regstart;
92  int regmlen;
94  char_u program[1]; /* actually longer.. */
95 } bt_regprog_T;
96 
97 // Structure representing a NFA state.
98 // An NFA state may have no outgoing edge, when it is a NFA_MATCH state.
99 typedef struct nfa_state nfa_state_T;
100 struct nfa_state {
101  int c;
104  int id;
105  int lastlist[2]; /* 0: normal, 1: recursive */
106  int val;
107 };
108 
109 /*
110  * Structure used by the NFA matcher.
111  */
112 typedef struct {
113  // These four members implement regprog_T.
115  unsigned regflags;
116  unsigned re_engine;
117  unsigned re_flags;
118 
119  nfa_state_T *start; /* points into state[] */
120 
121  int reganch; /* pattern starts with ^ */
122  int regstart; /* char at start of pattern */
123  char_u *match_text; /* plain text to match with */
124 
125  int has_zend; /* pattern contains \ze */
126  int has_backref; /* pattern contains \1 .. \9 */
127  int reghasz;
129  int nsubexp; /* number of () */
130  int nstate;
131  nfa_state_T state[1]; /* actually longer.. */
132 } nfa_regprog_T;
133 
134 /*
135  * Structure to be used for single-line matching.
136  * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
137  * When there is no match, the pointer is NULL.
138  */
139 typedef struct {
141  char_u *startp[NSUBEXP];
142  char_u *endp[NSUBEXP];
143  bool rm_ic;
144 } regmatch_T;
145 
146 /*
147  * Structure used to store external references: "\z\(\)" to "\z\1".
148  * Use a reference count to avoid the need to copy this around. When it goes
149  * from 1 to zero the matches need to be freed.
150  */
151 struct reg_extmatch {
152  int16_t refcnt;
154 };
155 
156 struct regengine {
157  regprog_T *(*regcomp)(char_u *, int);
158  void (*regfree)(regprog_T *);
159  int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, bool);
160  long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T,
161  proftime_T *, int *);
162  char_u *expr;
163 };
164 
165 #endif // NVIM_REGEXP_DEFS_H
char_u * match_text
Definition: regexp_defs.h:123
int nsubexp
Definition: regexp_defs.h:129
nfa_state_T * start
Definition: regexp_defs.h:119
unsigned re_engine
Automatic, backtracking or NFA engine.
Definition: regexp_defs.h:73
Definition: regexp_defs.h:82
char_u reganch
Definition: regexp_defs.h:90
int regmlen
Definition: regexp_defs.h:92
int c
Definition: regexp_defs.h:101
#define NSUBEXP
Definition: regexp_defs.h:27
unsigned regflags
Definition: regexp_defs.h:72
nfa_state_T * out1
Definition: regexp_defs.h:103
colnr_T rmm_maxcol
Definition: regexp_defs.h:60
int regstart
Definition: regexp_defs.h:122
regprog_T * regprog
Definition: regexp_defs.h:140
char_u * regmust
Definition: regexp_defs.h:91
nfa_state_T * out
Definition: regexp_defs.h:102
char_u * expr
Definition: regexp_defs.h:162
unsigned regflags
Definition: regexp_defs.h:85
int regstart
Definition: regexp_defs.h:89
unsigned re_flags
Second argument for vim_regcomp().
Definition: regexp_defs.h:117
int colnr_T
Column number type.
Definition: pos.h:9
Definition: regexp_defs.h:139
unsigned regflags
Definition: regexp_defs.h:115
int has_backref
Definition: regexp_defs.h:126
unsigned re_flags
Second argument for vim_regcomp().
Definition: regexp_defs.h:74
regprog_T * regprog
Definition: regexp_defs.h:56
Definition: buffer_defs.h:477
Definition: regexp_defs.h:100
regengine_T * engine
Definition: regexp_defs.h:71
int id
Definition: regexp_defs.h:104
int val
Definition: regexp_defs.h:106
Definition: buffer_defs.h:1023
int16_t refcnt
Definition: regexp_defs.h:152
unsigned re_flags
Second argument for vim_regcomp().
Definition: regexp_defs.h:87
int rmm_ic
Definition: regexp_defs.h:59
Definition: pos.h:31
long linenr_T
Definition: pos.h:4
Definition: regexp_defs.h:112
char_u reghasz
Definition: regexp_defs.h:93
unsigned char char_u
Definition: types.h:11
int nstate
Definition: regexp_defs.h:130
Definition: regexp_defs.h:55
char_u * pattern
Definition: regexp_defs.h:128
int reghasz
Definition: regexp_defs.h:127
regengine_T * engine
Definition: regexp_defs.h:114
char_u * matches[NSUBEXP]
Definition: regexp_defs.h:153
int has_zend
Definition: regexp_defs.h:125
Definition: regexp_defs.h:156
Definition: regexp_defs.h:70
int reganch
Definition: regexp_defs.h:121
unsigned re_engine
Definition: regexp_defs.h:116
bool rm_ic
Definition: regexp_defs.h:143
uint64_t proftime_T
Definition: profile.h:7
unsigned re_engine
Definition: regexp_defs.h:86
Definition: regexp_defs.h:151
regengine_T * engine
Definition: regexp_defs.h:84