Coverage for hyperparser.py: 58%
143 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-11 13:22 -0700
« prev ^ index » next coverage.py v7.2.5, created at 2023-05-11 13:22 -0700
1"""Provide advanced parsing abilities for ParenMatch and other extensions.
3HyperParser uses PyParser. PyParser mostly gives information on the
4proper indentation of code. HyperParser gives additional information on
5the structure of code.
6"""
7from keyword import iskeyword
8import string
10from idlelib import pyparse
12# all ASCII chars that may be in an identifier
13_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_")
14# all ASCII chars that may be the first char of an identifier
15_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_")
17# lookup table for whether 7-bit ASCII chars are valid in a Python identifier
18_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)]
19# lookup table for whether 7-bit ASCII chars are valid as the first
20# char in a Python identifier
21_IS_ASCII_ID_FIRST_CHAR = \
22 [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]
25class HyperParser:
26 def __init__(self, editwin, index):
27 "To initialize, analyze the surroundings of the given index."
29 self.editwin = editwin 1abcd
30 self.text = text = editwin.text 1abcd
32 parser = pyparse.Parser(editwin.indentwidth, editwin.tabwidth) 1abcd
34 def index2line(index): 1abcd
35 return int(float(index)) 1abcd
36 lno = index2line(text.index(index)) 1abcd
38 if not editwin.prompt_last_line: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true1abcd
39 for context in editwin.num_context_lines:
40 startat = max(lno - context, 1)
41 startatindex = repr(startat) + ".0"
42 stopatindex = "%d.end" % lno
43 # We add the newline because PyParse requires a newline
44 # at end. We add a space so that index won't be at end
45 # of line, so that its status will be the same as the
46 # char before it, if should.
47 parser.set_code(text.get(startatindex, stopatindex)+' \n')
48 bod = parser.find_good_parse_start(
49 editwin._build_char_in_string_func(startatindex))
50 if bod is not None or startat == 1:
51 break
52 parser.set_lo(bod or 0)
53 else:
54 r = text.tag_prevrange("console", index) 1abcd
55 if r: 55 ↛ 56line 55 didn't jump to line 56, because the condition on line 55 was never true1abcd
56 startatindex = r[1]
57 else:
58 startatindex = "1.0" 1abcd
59 stopatindex = "%d.end" % lno 1abcd
60 # We add the newline because PyParse requires it. We add a
61 # space so that index won't be at end of line, so that its
62 # status will be the same as the char before it, if should.
63 parser.set_code(text.get(startatindex, stopatindex)+' \n') 1abcd
64 parser.set_lo(0) 1abcd
66 # We want what the parser has, minus the last newline and space.
67 self.rawtext = parser.code[:-2] 1abcd
68 # Parser.code apparently preserves the statement we are in, so
69 # that stopatindex can be used to synchronize the string with
70 # the text box indices.
71 self.stopatindex = stopatindex 1abcd
72 self.bracketing = parser.get_last_stmt_bracketing() 1abcd
73 # find which pairs of bracketing are openers. These always
74 # correspond to a character of rawtext.
75 self.isopener = [i>0 and self.bracketing[i][1] > 1abcd
76 self.bracketing[i-1][1]
77 for i in range(len(self.bracketing))]
79 self.set_index(index) 1abcd
81 def set_index(self, index):
82 """Set the index to which the functions relate.
84 The index must be in the same statement.
85 """
86 indexinrawtext = (len(self.rawtext) - 1abcd
87 len(self.text.get(index, self.stopatindex)))
88 if indexinrawtext < 0: 88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true1abcd
89 raise ValueError("Index %s precedes the analyzed statement"
90 % index)
91 self.indexinrawtext = indexinrawtext 1abcd
92 # find the rightmost bracket to which index belongs
93 self.indexbracket = 0 1abcd
94 while (self.indexbracket < len(self.bracketing)-1 and 1abcd
95 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
96 self.indexbracket += 1 1abcd
97 if (self.indexbracket < len(self.bracketing)-1 and 1abcd
98 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
99 not self.isopener[self.indexbracket+1]):
100 self.indexbracket += 1 1abcd
102 def is_in_string(self):
103 """Is the index given to the HyperParser in a string?"""
104 # The bracket to which we belong should be an opener.
105 # If it's an opener, it has to have a character.
106 return (self.isopener[self.indexbracket] and
107 self.rawtext[self.bracketing[self.indexbracket][0]]
108 in ('"', "'"))
110 def is_in_code(self):
111 """Is the index given to the HyperParser in normal code?"""
112 return (not self.isopener[self.indexbracket] or 1abcd
113 self.rawtext[self.bracketing[self.indexbracket][0]]
114 not in ('#', '"', "'"))
116 def get_surrounding_brackets(self, openers='([{', mustclose=False):
117 """Return bracket indexes or None.
119 If the index given to the HyperParser is surrounded by a
120 bracket defined in openers (or at least has one before it),
121 return the indices of the opening bracket and the closing
122 bracket (or the end of line, whichever comes first).
124 If it is not surrounded by brackets, or the end of line comes
125 before the closing bracket and mustclose is True, returns None.
126 """
128 bracketinglevel = self.bracketing[self.indexbracket][1] 1abcd
129 before = self.indexbracket 1abcd
130 while (not self.isopener[before] or 1abcd
131 self.rawtext[self.bracketing[before][0]] not in openers or
132 self.bracketing[before][1] > bracketinglevel):
133 before -= 1 1abcd
134 if before < 0: 1abcd
135 return None 1abcd
136 bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) 1abcd
137 after = self.indexbracket + 1 1abcd
138 while (after < len(self.bracketing) and 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never true1abcd
139 self.bracketing[after][1] >= bracketinglevel):
140 after += 1
142 beforeindex = self.text.index("%s-%dc" % 1abcd
143 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
144 if (after >= len(self.bracketing) or 144 ↛ 152line 144 didn't jump to line 152, because the condition on line 144 was never false1abcd
145 self.bracketing[after][0] > len(self.rawtext)):
146 if mustclose: 146 ↛ 147line 146 didn't jump to line 147, because the condition on line 146 was never true1abcd
147 return None
148 afterindex = self.stopatindex 1abcd
149 else:
150 # We are after a real char, so it is a ')' and we give the
151 # index before it.
152 afterindex = self.text.index(
153 "%s-%dc" % (self.stopatindex,
154 len(self.rawtext)-(self.bracketing[after][0]-1)))
156 return beforeindex, afterindex 1abcd
158 # the set of built-in identifiers which are also keywords,
159 # i.e. keyword.iskeyword() returns True for them
160 _ID_KEYWORDS = frozenset({"True", "False", "None"})
162 @classmethod
163 def _eat_identifier(cls, str, limit, pos):
164 """Given a string and pos, return the number of chars in the
165 identifier which ends at pos, or 0 if there is no such one.
167 This ignores non-identifier eywords are not identifiers.
168 """
169 is_ascii_id_char = _IS_ASCII_ID_CHAR 1abcd
171 # Start at the end (pos) and work backwards.
172 i = pos 1abcd
174 # Go backwards as long as the characters are valid ASCII
175 # identifier characters. This is an optimization, since it
176 # is faster in the common case where most of the characters
177 # are ASCII.
178 while i > limit and ( 1abcd
179 ord(str[i - 1]) < 128 and
180 is_ascii_id_char[ord(str[i - 1])]
181 ):
182 i -= 1 1abcd
184 # If the above loop ended due to reaching a non-ASCII
185 # character, continue going backwards using the most generic
186 # test for whether a string contains only valid identifier
187 # characters.
188 if i > limit and ord(str[i - 1]) >= 128: 188 ↛ 189line 188 didn't jump to line 189, because the condition on line 188 was never true1abcd
189 while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():
190 i -= 4
191 if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():
192 i -= 2
193 if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():
194 i -= 1
196 # The identifier candidate starts here. If it isn't a valid
197 # identifier, don't eat anything. At this point that is only
198 # possible if the first character isn't a valid first
199 # character for an identifier.
200 if not str[i:pos].isidentifier():
201 return 0
202 elif i < pos: 202 ↛ 211line 202 didn't jump to line 211, because the condition on line 202 was never false1abcd
203 # All characters in str[i:pos] are valid ASCII identifier
204 # characters, so it is enough to check that the first is
205 # valid as the first character of an identifier.
206 if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]: 206 ↛ 207line 206 didn't jump to line 207, because the condition on line 206 was never true1abcd
207 return 0
209 # All keywords are valid identifiers, but should not be
210 # considered identifiers here, except for True, False and None.
211 if i < pos and ( 211 ↛ 215line 211 didn't jump to line 215, because the condition on line 211 was never true1abcd
212 iskeyword(str[i:pos]) and
213 str[i:pos] not in cls._ID_KEYWORDS
214 ):
215 return 0
217 return pos - i 1abcd
219 # This string includes all chars that may be in a white space
220 _whitespace_chars = " \t\n\\"
222 def get_expression(self):
223 """Return a string with the Python expression which ends at the
224 given index, which is empty if there is no real one.
225 """
226 if not self.is_in_code(): 226 ↛ 227line 226 didn't jump to line 227, because the condition on line 226 was never true1abcd
227 raise ValueError("get_expression should only be called "
228 "if index is inside a code.")
230 rawtext = self.rawtext 1abcd
231 bracketing = self.bracketing 1abcd
233 brck_index = self.indexbracket 1abcd
234 brck_limit = bracketing[brck_index][0] 1abcd
235 pos = self.indexinrawtext 1abcd
237 last_identifier_pos = pos 1abcd
238 postdot_phase = True 1abcd
240 while True: 1abcd
241 # Eat whitespaces, comments, and if postdot_phase is False - a dot
242 while True: 1abcd
243 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: 243 ↛ 245line 243 didn't jump to line 245, because the condition on line 243 was never true1abcd
244 # Eat a whitespace
245 pos -= 1
246 elif (not postdot_phase and 246 ↛ 249line 246 didn't jump to line 249, because the condition on line 246 was never true1abcd
247 pos > brck_limit and rawtext[pos-1] == '.'):
248 # Eat a dot
249 pos -= 1
250 postdot_phase = True
251 # The next line will fail if we are *inside* a comment,
252 # but we shouldn't be.
253 elif (pos == brck_limit and brck_index > 0 and 253 ↛ 256line 253 didn't jump to line 256, because the condition on line 253 was never true1abcd
254 rawtext[bracketing[brck_index-1][0]] == '#'):
255 # Eat a comment
256 brck_index -= 2
257 brck_limit = bracketing[brck_index][0]
258 pos = bracketing[brck_index+1][0]
259 else:
260 # If we didn't eat anything, quit.
261 break 1abcd
263 if not postdot_phase: 1abcd
264 # We didn't find a dot, so the expression end at the
265 # last identifier pos.
266 break 1abcd
268 ret = self._eat_identifier(rawtext, brck_limit, pos) 1abcd
269 if ret: 269 ↛ 277line 269 didn't jump to line 277, because the condition on line 269 was never false1abcd
270 # There is an identifier to eat
271 pos = pos - ret 1abcd
272 last_identifier_pos = pos 1abcd
273 # Now, to continue the search, we must find a dot.
274 postdot_phase = False 1abcd
275 # (the loop continues now)
277 elif pos == brck_limit:
278 # We are at a bracketing limit. If it is a closing
279 # bracket, eat the bracket, otherwise, stop the search.
280 level = bracketing[brck_index][1]
281 while brck_index > 0 and bracketing[brck_index-1][1] > level:
282 brck_index -= 1
283 if bracketing[brck_index][0] == brck_limit:
284 # We were not at the end of a closing bracket
285 break
286 pos = bracketing[brck_index][0]
287 brck_index -= 1
288 brck_limit = bracketing[brck_index][0]
289 last_identifier_pos = pos
290 if rawtext[pos] in "([":
291 # [] and () may be used after an identifier, so we
292 # continue. postdot_phase is True, so we don't allow a dot.
293 pass
294 else:
295 # We can't continue after other types of brackets
296 if rawtext[pos] in "'\"":
297 # Scan a string prefix
298 while pos > 0 and rawtext[pos - 1] in "rRbBuU":
299 pos -= 1
300 last_identifier_pos = pos
301 break
303 else:
304 # We've found an operator or something.
305 break
307 return rawtext[last_identifier_pos:self.indexinrawtext] 1abcd
310if __name__ == '__main__': 310 ↛ 311line 310 didn't jump to line 311, because the condition on line 310 was never true
311 from unittest import main
312 main('idlelib.idle_test.test_hyperparser', verbosity=2)