Coverage for hyperparser.py: 58%

1"""Provide advanced parsing abilities for ParenMatch and other extensions.

3HyperParser uses PyParser. PyParser mostly gives information on the

4proper indentation of code. HyperParser gives additional information on

5the structure of code.

6"""

7from keyword import iskeyword (empty)

8import string (empty)

10from idlelib import pyparse (empty)

12# all ASCII chars that may be in an identifier

13_ASCII_ID_CHARS = frozenset(string.ascii_letters + string.digits + "_") (empty)

14# all ASCII chars that may be the first char of an identifier

15_ASCII_ID_FIRST_CHARS = frozenset(string.ascii_letters + "_") (empty)

17# lookup table for whether 7-bit ASCII chars are valid in a Python identifier

18_IS_ASCII_ID_CHAR = [(chr(x) in _ASCII_ID_CHARS) for x in range(128)] (empty)

19# lookup table for whether 7-bit ASCII chars are valid as the first

20# char in a Python identifier

21_IS_ASCII_ID_FIRST_CHAR = \ (empty)

22 [(chr(x) in _ASCII_ID_FIRST_CHARS) for x in range(128)]

25class HyperParser: (empty)

26 def __init__(self, editwin, index): (empty)

27 "To initialize, analyze the surroundings of the given index."

29 self.editwin = editwin 4 ctx1abcd

30 self.text = text = editwin.text 4 ctx1abcd

32 parser = pyparse.Parser(editwin.indentwidth, editwin.tabwidth) 4 ctx1abcd

34 def index2line(index): 4 ctx1abcd

35 return int(float(index)) 4 ctx1abcd

36 lno = index2line(text.index(index)) 4 ctx1abcd

38 if not editwin.prompt_last_line: 38 ↛ 39line 38 didn't jump to line 39, because the condition on line 38 was never true4 ctx1abcd

39 for context in editwin.num_context_lines:

40 startat = max(lno - context, 1)

41 startatindex = repr(startat) + ".0"

42 stopatindex = "%d.end" % lno

43 # We add the newline because PyParse requires a newline

44 # at end. We add a space so that index won't be at end

45 # of line, so that its status will be the same as the

46 # char before it, if should.

47 parser.set_code(text.get(startatindex, stopatindex)+' \n')

48 bod = parser.find_good_parse_start(

49 editwin._build_char_in_string_func(startatindex))

50 if bod is not None or startat == 1:

51 break

52 parser.set_lo(bod or 0)

53 else:

54 r = text.tag_prevrange("console", index) 4 ctx1abcd

55 if r: 55 ↛ 56line 55 didn't jump to line 56, because the condition on line 55 was never true4 ctx1abcd

56 startatindex = r[1]

57 else:

58 startatindex = "1.0" 4 ctx1abcd

59 stopatindex = "%d.end" % lno 4 ctx1abcd

60 # We add the newline because PyParse requires it. We add a

61 # space so that index won't be at end of line, so that its

62 # status will be the same as the char before it, if should.

63 parser.set_code(text.get(startatindex, stopatindex)+' \n') 4 ctx1abcd

64 parser.set_lo(0) 4 ctx1abcd

66 # We want what the parser has, minus the last newline and space.

67 self.rawtext = parser.code[:-2] 4 ctx1abcd

68 # Parser.code apparently preserves the statement we are in, so

69 # that stopatindex can be used to synchronize the string with

70 # the text box indices.

71 self.stopatindex = stopatindex 4 ctx1abcd

72 self.bracketing = parser.get_last_stmt_bracketing() 4 ctx1abcd

73 # find which pairs of bracketing are openers. These always

74 # correspond to a character of rawtext.

75 self.isopener = [i>0 and self.bracketing[i][1] > 4 ctx1abcd

76 self.bracketing[i-1][1]

77 for i in range(len(self.bracketing))]

79 self.set_index(index) 4 ctx1abcd

81 def set_index(self, index): (empty)

82 """Set the index to which the functions relate.

84 The index must be in the same statement.

85 """

86 indexinrawtext = (len(self.rawtext) - 4 ctx1abcd

87 len(self.text.get(index, self.stopatindex)))

88 if indexinrawtext < 0: 88 ↛ 89line 88 didn't jump to line 89, because the condition on line 88 was never true4 ctx1abcd

89 raise ValueError("Index %s precedes the analyzed statement"

90 % index)

91 self.indexinrawtext = indexinrawtext 4 ctx1abcd

92 # find the rightmost bracket to which index belongs

93 self.indexbracket = 0 4 ctx1abcd

94 while (self.indexbracket < len(self.bracketing)-1 and 4 ctx1abcd

95 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):

96 self.indexbracket += 1 4 ctx1abcd

97 if (self.indexbracket < len(self.bracketing)-1 and 4 ctx1abcd

98 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and

99 not self.isopener[self.indexbracket+1]):

100 self.indexbracket += 1 4 ctx1abcd

101

102 def is_in_string(self): (empty)

103 """Is the index given to the HyperParser in a string?"""

104 # The bracket to which we belong should be an opener.

105 # If it's an opener, it has to have a character.

106 return (self.isopener[self.indexbracket] and

107 self.rawtext[self.bracketing[self.indexbracket][0]]

108 in ('"', "'"))

109

110 def is_in_code(self): (empty)

111 """Is the index given to the HyperParser in normal code?"""

112 return (not self.isopener[self.indexbracket] or 4 ctx1abcd

113 self.rawtext[self.bracketing[self.indexbracket][0]]

114 not in ('#', '"', "'"))

115

116 def get_surrounding_brackets(self, openers='([{', mustclose=False): (empty)

117 """Return bracket indexes or None.

118

119 If the index given to the HyperParser is surrounded by a

120 bracket defined in openers (or at least has one before it),

121 return the indices of the opening bracket and the closing

122 bracket (or the end of line, whichever comes first).

123

124 If it is not surrounded by brackets, or the end of line comes

125 before the closing bracket and mustclose is True, returns None.

126 """

127

128 bracketinglevel = self.bracketing[self.indexbracket][1] 4 ctx1abcd

129 before = self.indexbracket 4 ctx1abcd

130 while (not self.isopener[before] or 4 ctx1abcd

131 self.rawtext[self.bracketing[before][0]] not in openers or

132 self.bracketing[before][1] > bracketinglevel):

133 before -= 1 4 ctx1abcd

134 if before < 0: 4 ctx1abcd

135 return None 4 ctx1abcd

136 bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) 4 ctx1abcd

137 after = self.indexbracket + 1 4 ctx1abcd

138 while (after < len(self.bracketing) and 138 ↛ 140line 138 didn't jump to line 140, because the condition on line 138 was never true4 ctx1abcd

139 self.bracketing[after][1] >= bracketinglevel):

140 after += 1

141

142 beforeindex = self.text.index("%s-%dc" % 4 ctx1abcd

143 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))

144 if (after >= len(self.bracketing) or 144 ↛ 152line 144 didn't jump to line 152, because the condition on line 144 was never false4 ctx1abcd

145 self.bracketing[after][0] > len(self.rawtext)):

146 if mustclose: 146 ↛ 147line 146 didn't jump to line 147, because the condition on line 146 was never true4 ctx1abcd

147 return None

148 afterindex = self.stopatindex 4 ctx1abcd

149 else:

150 # We are after a real char, so it is a ')' and we give the

151 # index before it.

152 afterindex = self.text.index(

153 "%s-%dc" % (self.stopatindex,

154 len(self.rawtext)-(self.bracketing[after][0]-1)))

155

156 return beforeindex, afterindex 4 ctx1abcd

157

158 # the set of built-in identifiers which are also keywords,

159 # i.e. keyword.iskeyword() returns True for them

160 _ID_KEYWORDS = frozenset({"True", "False", "None"}) (empty)

161

162 @classmethod (empty)

163 def _eat_identifier(cls, str, limit, pos): (empty)

164 """Given a string and pos, return the number of chars in the

165 identifier which ends at pos, or 0 if there is no such one.

166

167 This ignores non-identifier eywords are not identifiers.

168 """

169 is_ascii_id_char = _IS_ASCII_ID_CHAR 4 ctx1abcd

170

171 # Start at the end (pos) and work backwards.

172 i = pos 4 ctx1abcd

173

174 # Go backwards as long as the characters are valid ASCII

175 # identifier characters. This is an optimization, since it

176 # is faster in the common case where most of the characters

177 # are ASCII.

178 while i > limit and ( 4 ctx1abcd

179 ord(str[i - 1]) < 128 and

180 is_ascii_id_char[ord(str[i - 1])]

181 ):

182 i -= 1 4 ctx1abcd

183

184 # If the above loop ended due to reaching a non-ASCII

185 # character, continue going backwards using the most generic

186 # test for whether a string contains only valid identifier

187 # characters.

188 if i > limit and ord(str[i - 1]) >= 128: 188 ↛ 189line 188 didn't jump to line 189, because the condition on line 188 was never true4 ctx1abcd

189 while i - 4 >= limit and ('a' + str[i - 4:pos]).isidentifier():

190 i -= 4

191 if i - 2 >= limit and ('a' + str[i - 2:pos]).isidentifier():

192 i -= 2

193 if i - 1 >= limit and ('a' + str[i - 1:pos]).isidentifier():

194 i -= 1

195

196 # The identifier candidate starts here. If it isn't a valid

197 # identifier, don't eat anything. At this point that is only

198 # possible if the first character isn't a valid first

199 # character for an identifier.

200 if not str[i:pos].isidentifier():

201 return 0

202 elif i < pos: 202 ↛ 211line 202 didn't jump to line 211, because the condition on line 202 was never false4 ctx1abcd

203 # All characters in str[i:pos] are valid ASCII identifier

204 # characters, so it is enough to check that the first is

205 # valid as the first character of an identifier.

206 if not _IS_ASCII_ID_FIRST_CHAR[ord(str[i])]: 206 ↛ 207line 206 didn't jump to line 207, because the condition on line 206 was never true4 ctx1abcd

207 return 0

208

209 # All keywords are valid identifiers, but should not be

210 # considered identifiers here, except for True, False and None.

211 if i < pos and ( 211 ↛ 215line 211 didn't jump to line 215, because the condition on line 211 was never true4 ctx1abcd

212 iskeyword(str[i:pos]) and

213 str[i:pos] not in cls._ID_KEYWORDS

214 ):

215 return 0

216

217 return pos - i 4 ctx1abcd

218

219 # This string includes all chars that may be in a white space

220 _whitespace_chars = " \t\n\\" (empty)

221

222 def get_expression(self): (empty)

223 """Return a string with the Python expression which ends at the

224 given index, which is empty if there is no real one.

225 """

226 if not self.is_in_code(): 226 ↛ 227line 226 didn't jump to line 227, because the condition on line 226 was never true4 ctx1abcd

227 raise ValueError("get_expression should only be called "

228 "if index is inside a code.")

229

230 rawtext = self.rawtext 4 ctx1abcd

231 bracketing = self.bracketing 4 ctx1abcd

232

233 brck_index = self.indexbracket 4 ctx1abcd

234 brck_limit = bracketing[brck_index][0] 4 ctx1abcd

235 pos = self.indexinrawtext 4 ctx1abcd

236

237 last_identifier_pos = pos 4 ctx1abcd

238 postdot_phase = True 4 ctx1abcd

239

240 while True: 4 ctx1abcd

241 # Eat whitespaces, comments, and if postdot_phase is False - a dot

242 while True: 4 ctx1abcd

243 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: 243 ↛ 245line 243 didn't jump to line 245, because the condition on line 243 was never true4 ctx1abcd

244 # Eat a whitespace

245 pos -= 1

246 elif (not postdot_phase and 246 ↛ 249line 246 didn't jump to line 249, because the condition on line 246 was never true4 ctx1abcd

247 pos > brck_limit and rawtext[pos-1] == '.'):

248 # Eat a dot

249 pos -= 1

250 postdot_phase = True

251 # The next line will fail if we are *inside* a comment,

252 # but we shouldn't be.

253 elif (pos == brck_limit and brck_index > 0 and 253 ↛ 256line 253 didn't jump to line 256, because the condition on line 253 was never true4 ctx1abcd

254 rawtext[bracketing[brck_index-1][0]] == '#'):

255 # Eat a comment

256 brck_index -= 2

257 brck_limit = bracketing[brck_index][0]

258 pos = bracketing[brck_index+1][0]

259 else:

260 # If we didn't eat anything, quit.

261 break 4 ctx1abcd

262

263 if not postdot_phase: 4 ctx1abcd

264 # We didn't find a dot, so the expression end at the

265 # last identifier pos.

266 break 4 ctx1abcd

267

268 ret = self._eat_identifier(rawtext, brck_limit, pos) 4 ctx1abcd

269 if ret: 269 ↛ 277line 269 didn't jump to line 277, because the condition on line 269 was never false4 ctx1abcd

270 # There is an identifier to eat

271 pos = pos - ret 4 ctx1abcd

272 last_identifier_pos = pos 4 ctx1abcd

273 # Now, to continue the search, we must find a dot.

274 postdot_phase = False 4 ctx1abcd

275 # (the loop continues now)

276

277 elif pos == brck_limit:

278 # We are at a bracketing limit. If it is a closing

279 # bracket, eat the bracket, otherwise, stop the search.

280 level = bracketing[brck_index][1]

281 while brck_index > 0 and bracketing[brck_index-1][1] > level:

282 brck_index -= 1

283 if bracketing[brck_index][0] == brck_limit:

284 # We were not at the end of a closing bracket

285 break

286 pos = bracketing[brck_index][0]

287 brck_index -= 1

288 brck_limit = bracketing[brck_index][0]

289 last_identifier_pos = pos

290 if rawtext[pos] in "([":

291 # [] and () may be used after an identifier, so we

292 # continue. postdot_phase is True, so we don't allow a dot.

293 pass

294 else:

295 # We can't continue after other types of brackets

296 if rawtext[pos] in "'\"":

297 # Scan a string prefix

298 while pos > 0 and rawtext[pos - 1] in "rRbBuU":

299 pos -= 1

300 last_identifier_pos = pos

301 break

302

303 else:

304 # We've found an operator or something.

305 break

306

307 return rawtext[last_identifier_pos:self.indexinrawtext] 4 ctx1abcd

308

309

310if __name__ == '__main__': 310 ↛ 311line 310 didn't jump to line 311, because the condition on line 310 was never true(empty)

311 from unittest import main

312 main('idlelib.idle_test.test_hyperparser', verbosity=2)