@@ -89,21 +89,31 @@ def css(self, query):
8989 """
9090 return self .__class__ (flatten ([x .css (query ) for x in self ]))
9191
92- def re (self , regex ):
92+ def re (self , regex , replace_entities = True ):
9393 """
9494 Call the ``.re()`` method for each element in this list and return
9595 their results flattened, as a list of unicode strings.
96+
97+ By default, character entity references are replaced by their
98+ corresponding character (except for ``&`` and ``<``.
99+ Passing ``replace_entities`` as ``False`` switches off these
100+ replacements.
96101 """
97- return flatten ([x .re (regex ) for x in self ])
102+ return flatten ([x .re (regex , replace_entities = replace_entities ) for x in self ])
98103
99- def re_first (self , regex , default = None ):
104+ def re_first (self , regex , default = None , replace_entities = True ):
100105 """
101106 Call the ``.re()`` method for the first element in this list and
102107 return the result in an unicode string. If the list is empty or the
103108 regex doesn't match anything, return the default value (``None`` if
104109 the argument is not provided).
110+
111+ By default, character entity references are replaced by their
112+ corresponding character (except for ``&`` and ``<``.
113+ Passing ``replace_entities`` as ``False`` switches off these
114+ replacements.
105115 """
106- for el in iflatten (x .re (regex ) for x in self ):
116+ for el in iflatten (x .re (regex , replace_entities = replace_entities ) for x in self ):
107117 return el
108118 else :
109119 return default
@@ -238,23 +248,33 @@ def css(self, query):
238248 def _css2xpath (self , query ):
239249 return self ._csstranslator .css_to_xpath (query )
240250
241- def re (self , regex ):
251+ def re (self , regex , replace_entities = True ):
242252 """
243253 Apply the given regex and return a list of unicode strings with the
244254 matches.
245255
246256 ``regex`` can be either a compiled regular expression or a string which
247- will be compiled to a regular expression using ``re.compile(regex)``
257+ will be compiled to a regular expression using ``re.compile(regex)``.
258+
259+ By default, character entity references are replaced by their
260+ corresponding character (except for ``&`` and ``<``.
261+ Passing ``replace_entities`` as ``False`` switches off these
262+ replacements.
248263 """
249- return extract_regex (regex , self .extract ())
264+ return extract_regex (regex , self .extract (), replace_entities = replace_entities )
250265
251- def re_first (self , regex , default = None ):
266+ def re_first (self , regex , default = None , replace_entities = True ):
252267 """
253268 Apply the given regex and return the first unicode string which
254269 matches. If there is no match, return the default value (``None`` if
255270 the argument is not provided).
271+
272+ By default, character entity references are replaced by their
273+ corresponding character (except for ``&`` and ``<``.
274+ Passing ``replace_entities`` as ``False`` switches off these
275+ replacements.
256276 """
257- return next (iflatten (self .re (regex )), default )
277+ return next (iflatten (self .re (regex , replace_entities = replace_entities )), default )
258278
259279 def extract (self ):
260280 """
0 commit comments