regexp_defs.h
Go to the documentation of this file.
1 /*
2  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
3  *
4  * This is NOT the original regular expression code as written by Henry
5  * Spencer. This code has been modified specifically for use with Vim, and
6  * should not be used apart from compiling Vim. If you want a good regular
7  * expression library, get the original code.
8  *
9  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
10  */
11 
12 #ifndef NVIM_REGEXP_DEFS_H
13 #define NVIM_REGEXP_DEFS_H
14 
15 #include <stdbool.h>
16 
17 #include "nvim/pos.h"
18 #include "nvim/types.h"
19 #include "nvim/profile.h"
20 
21 /*
22  * The number of sub-matches is limited to 10.
23  * The first one (index 0) is the whole match, referenced with "\0".
24  * The second one (index 1) is the first sub-match, referenced with "\1".
25  * This goes up to the tenth (index 9), referenced with "\9".
26  */
27 #define NSUBEXP 10
28 
29 /*
30  * In the NFA engine: how many braces are allowed.
31  * TODO(RE): Use dynamic memory allocation instead of static, like here
32  */
33 #define NFA_MAX_BRACES 20
34 
35 // In the NFA engine: how many states are allowed.
36 #define NFA_MAX_STATES 100000
37 #define NFA_TOO_EXPENSIVE -1
38 
39 // Which regexp engine to use? Needed for vim_regcomp().
40 // Must match with 'regexpengine'.
41 #define AUTOMATIC_ENGINE 0
42 #define BACKTRACKING_ENGINE 1
43 #define NFA_ENGINE 2
44 
45 typedef struct regengine regengine_T;
46 typedef struct regprog regprog_T;
48 
55 typedef struct {
57  lpos_T startpos[NSUBEXP];
58  lpos_T endpos[NSUBEXP];
59  int rmm_ic;
61 } regmmatch_T;
62 
63 #include "nvim/buffer_defs.h"
64 
65 /*
66  * Structure returned by vim_regcomp() to pass on to vim_regexec().
67  * This is the general structure. For the actual matcher, two specific
68  * structures are used. See code below.
69  */
70 struct regprog {
72  unsigned regflags;
73  unsigned re_engine;
74  unsigned re_flags;
75 };
76 
77 /*
78  * Structure used by the back track matcher.
79  * These fields are only to be used in regexp.c!
80  * See regexp.c for an explanation.
81  */
82 typedef struct {
83  // These four members implement regprog_T.
85  unsigned regflags;
86  unsigned re_engine;
87  unsigned re_flags;
88 
89  int regstart;
92  int regmlen;
94  char_u program[1]; /* actually longer.. */
95 } bt_regprog_T;
96 
97 /*
98  * Structure representing a NFA state.
99  * A NFA state may have no outgoing edge, when it is a NFA_MATCH state.
100  */
101 typedef struct nfa_state nfa_state_T;
102 struct nfa_state {
103  int c;
106  int id;
107  int lastlist[2]; /* 0: normal, 1: recursive */
108  int val;
109 };
110 
111 /*
112  * Structure used by the NFA matcher.
113  */
114 typedef struct {
115  // These four members implement regprog_T.
117  unsigned regflags;
118  unsigned re_engine;
119  unsigned re_flags;
120 
121  nfa_state_T *start; /* points into state[] */
122 
123  int reganch; /* pattern starts with ^ */
124  int regstart; /* char at start of pattern */
125  char_u *match_text; /* plain text to match with */
126 
127  int has_zend; /* pattern contains \ze */
128  int has_backref; /* pattern contains \1 .. \9 */
129  int reghasz;
131  int nsubexp; /* number of () */
132  int nstate;
133  nfa_state_T state[1]; /* actually longer.. */
134 } nfa_regprog_T;
135 
136 /*
137  * Structure to be used for single-line matching.
138  * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
139  * When there is no match, the pointer is NULL.
140  */
141 typedef struct {
143  char_u *startp[NSUBEXP];
144  char_u *endp[NSUBEXP];
145  bool rm_ic;
146 } regmatch_T;
147 
148 /*
149  * Structure used to store external references: "\z\(\)" to "\z\1".
150  * Use a reference count to avoid the need to copy this around. When it goes
151  * from 1 to zero the matches need to be freed.
152  */
153 struct reg_extmatch {
154  int16_t refcnt;
156 };
157 
158 struct regengine {
159  regprog_T *(*regcomp)(char_u *, int);
160  void (*regfree)(regprog_T *);
161  int (*regexec_nl)(regmatch_T *, char_u *, colnr_T, bool);
162  long (*regexec_multi)(regmmatch_T *, win_T *, buf_T *, linenr_T, colnr_T,
163  proftime_T *, int *);
164  char_u *expr;
165 };
166 
167 #endif // NVIM_REGEXP_DEFS_H
char_u * match_text
Definition: regexp_defs.h:125
int nsubexp
Definition: regexp_defs.h:131
nfa_state_T * start
Definition: regexp_defs.h:121
unsigned re_engine
Automatic, backtracking or NFA engine.
Definition: regexp_defs.h:73
Definition: regexp_defs.h:82
char_u reganch
Definition: regexp_defs.h:90
int regmlen
Definition: regexp_defs.h:92
int c
Definition: regexp_defs.h:103
#define NSUBEXP
Definition: regexp_defs.h:27
unsigned regflags
Definition: regexp_defs.h:72
nfa_state_T * out1
Definition: regexp_defs.h:105
colnr_T rmm_maxcol
Definition: regexp_defs.h:60
int regstart
Definition: regexp_defs.h:124
regprog_T * regprog
Definition: regexp_defs.h:142
char_u * regmust
Definition: regexp_defs.h:91
nfa_state_T * out
Definition: regexp_defs.h:104
char_u * expr
Definition: regexp_defs.h:164
unsigned regflags
Definition: regexp_defs.h:85
int regstart
Definition: regexp_defs.h:89
unsigned re_flags
Second argument for vim_regcomp().
Definition: regexp_defs.h:119
int colnr_T
Column number type.
Definition: pos.h:9
Definition: regexp_defs.h:141
unsigned regflags
Definition: regexp_defs.h:117
int has_backref
Definition: regexp_defs.h:128
unsigned re_flags
Second argument for vim_regcomp().
Definition: regexp_defs.h:74
regprog_T * regprog
Definition: regexp_defs.h:56
Definition: buffer_defs.h:469
Definition: regexp_defs.h:102
regengine_T * engine
Definition: regexp_defs.h:71
int id
Definition: regexp_defs.h:106
int val
Definition: regexp_defs.h:108
Definition: buffer_defs.h:1002
int16_t refcnt
Definition: regexp_defs.h:154
unsigned re_flags
Second argument for vim_regcomp().
Definition: regexp_defs.h:87
int rmm_ic
Definition: regexp_defs.h:59
Definition: pos.h:31
long linenr_T
Definition: pos.h:4
Definition: regexp_defs.h:114
char_u reghasz
Definition: regexp_defs.h:93
unsigned char char_u
Definition: types.h:11
int nstate
Definition: regexp_defs.h:132
Definition: regexp_defs.h:55
char_u * pattern
Definition: regexp_defs.h:130
int reghasz
Definition: regexp_defs.h:129
regengine_T * engine
Definition: regexp_defs.h:116
char_u * matches[NSUBEXP]
Definition: regexp_defs.h:155
int has_zend
Definition: regexp_defs.h:127
Definition: regexp_defs.h:158
Definition: regexp_defs.h:70
int reganch
Definition: regexp_defs.h:123
unsigned re_engine
Definition: regexp_defs.h:118
bool rm_ic
Definition: regexp_defs.h:145
uint64_t proftime_T
Definition: profile.h:7
unsigned re_engine
Definition: regexp_defs.h:86
Definition: regexp_defs.h:153
regengine_T * engine
Definition: regexp_defs.h:84