44
55import typing
66import warnings
7- from typing import Any , Dict , List , Mapping , Optional , Pattern , Union
7+ from typing import (
8+ Any ,
9+ Dict ,
10+ List ,
11+ Mapping ,
12+ Optional ,
13+ Pattern ,
14+ Type ,
15+ TypeVar ,
16+ Union ,
17+ )
818from warnings import warn
919
20+ from cssselect import GenericTranslator as OriginalGenericTranslator
1021from lxml import etree , html
1122from pkg_resources import parse_version
1223
1324from .csstranslator import GenericTranslator , HTMLTranslator
1425from .utils import extract_regex , flatten , iflatten , shorten
1526
16- _SelectorType = typing .TypeVar ("_SelectorType" , bound = "Selector" )
27+
28+ if typing .TYPE_CHECKING :
29+ # both require Python 3.8
30+ from typing import Literal , SupportsIndex
31+
32+ # simplified _OutputMethodArg from types-lxml
33+ _TostringMethodType = Literal [
34+ "html" ,
35+ "xml" ,
36+ ]
37+
38+
39+ _SelectorType = TypeVar ("_SelectorType" , bound = "Selector" )
40+ _ParserType = Union [etree .XMLParser , etree .HTMLParser ]
1741
1842lxml_version = parse_version (etree .__version__ )
1943lxml_huge_tree_version = parse_version ("4.2" )
@@ -62,16 +86,20 @@ def _st(st: Optional[str]) -> str:
6286
6387
6488def create_root_node (
65- text , parser_cls , base_url = None , huge_tree = LXML_SUPPORTS_HUGE_TREE
66- ):
89+ text : str ,
90+ parser_cls : Type [_ParserType ],
91+ base_url : Optional [str ] = None ,
92+ huge_tree : bool = LXML_SUPPORTS_HUGE_TREE ,
93+ ) -> etree ._Element :
6794 """Create root node for text using given parser class."""
6895 body = text .strip ().replace ("\x00 " , "" ).encode ("utf8" ) or b"<html/>"
6996 if huge_tree and LXML_SUPPORTS_HUGE_TREE :
7097 parser = parser_cls (recover = True , encoding = "utf8" , huge_tree = True )
71- root = etree .fromstring (body , parser = parser , base_url = base_url )
98+ # the stub wrongly thinks base_url can't be None
99+ root = etree .fromstring (body , parser = parser , base_url = base_url ) # type: ignore[arg-type]
72100 else :
73101 parser = parser_cls (recover = True , encoding = "utf8" )
74- root = etree .fromstring (body , parser = parser , base_url = base_url )
102+ root = etree .fromstring (body , parser = parser , base_url = base_url ) # type: ignore[arg-type]
75103 for error in parser .error_log :
76104 if "use XML_PARSE_HUGE option" in error .message :
77105 warnings .warn (
@@ -90,18 +118,23 @@ class SelectorList(List[_SelectorType]):
90118 """
91119
92120 @typing .overload
93- def __getitem__ (self , pos : int ) -> _SelectorType :
121+ def __getitem__ (self , pos : "SupportsIndex" ) -> _SelectorType :
94122 pass
95123
96124 @typing .overload
97125 def __getitem__ (self , pos : slice ) -> "SelectorList[_SelectorType]" :
98126 pass
99127
100128 def __getitem__ (
101- self , pos : Union [int , slice ]
129+ self , pos : Union ["SupportsIndex" , slice ]
102130 ) -> Union [_SelectorType , "SelectorList[_SelectorType]" ]:
103131 o = super ().__getitem__ (pos )
104- return self .__class__ (o ) if isinstance (pos , slice ) else o
132+ if isinstance (pos , slice ):
133+ return self .__class__ (
134+ typing .cast ("SelectorList[_SelectorType]" , o )
135+ )
136+ else :
137+ return typing .cast (_SelectorType , o )
105138
106139 def __getstate__ (self ) -> None :
107140 raise TypeError ("can't pickle SelectorList objects" )
@@ -237,7 +270,7 @@ def attrib(self) -> Mapping[str, str]:
237270 return x .attrib
238271 return {}
239272
240- def remove (self ) -> None :
273+ def remove (self ) -> None : # type: ignore[override]
241274 """
242275 Remove matched nodes from the parent for each element in this list.
243276 """
@@ -308,9 +341,15 @@ def __init__(
308341 huge_tree : bool = LXML_SUPPORTS_HUGE_TREE ,
309342 ) -> None :
310343 self .type = st = _st (type or self ._default_type )
311- self ._parser = _ctgroup [st ]["_parser" ]
312- self ._csstranslator = _ctgroup [st ]["_csstranslator" ]
313- self ._tostring_method = _ctgroup [st ]["_tostring_method" ]
344+ self ._parser : Type [_ParserType ] = typing .cast (
345+ Type [_ParserType ], _ctgroup [st ]["_parser" ]
346+ )
347+ self ._csstranslator : OriginalGenericTranslator = typing .cast (
348+ OriginalGenericTranslator , _ctgroup [st ]["_csstranslator" ]
349+ )
350+ self ._tostring_method : "_TostringMethodType" = typing .cast (
351+ "_TostringMethodType" , _ctgroup [st ]["_tostring_method" ]
352+ )
314353
315354 if text is not None :
316355 if not isinstance (text , str ):
@@ -334,7 +373,7 @@ def _get_root(
334373 text : str ,
335374 base_url : Optional [str ] = None ,
336375 huge_tree : bool = LXML_SUPPORTS_HUGE_TREE ,
337- ) -> Any :
376+ ) -> etree . _Element :
338377 return create_root_node (
339378 text , self ._parser , base_url = base_url , huge_tree = huge_tree
340379 )
@@ -365,7 +404,9 @@ def xpath(
365404 try :
366405 xpathev = self .root .xpath
367406 except AttributeError :
368- return self .selectorlist_cls ([])
407+ return typing .cast (
408+ SelectorList [_SelectorType ], self .selectorlist_cls ([])
409+ )
369410
370411 nsp = dict (self .namespaces )
371412 if namespaces is not None :
@@ -389,7 +430,9 @@ def xpath(
389430 )
390431 for x in result
391432 ]
392- return self .selectorlist_cls (result )
433+ return typing .cast (
434+ SelectorList [_SelectorType ], self .selectorlist_cls (result )
435+ )
393436
394437 def css (self : _SelectorType , query : str ) -> SelectorList [_SelectorType ]:
395438 """
@@ -404,7 +447,7 @@ def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]:
404447 """
405448 return self .xpath (self ._css2xpath (query ))
406449
407- def _css2xpath (self , query : str ) -> Any :
450+ def _css2xpath (self , query : str ) -> str :
408451 return self ._csstranslator .css_to_xpath (query )
409452
410453 def re (
@@ -512,7 +555,10 @@ def remove_namespaces(self) -> None:
512555 # loop on element attributes also
513556 for an in el .attrib :
514557 if an .startswith ("{" ):
515- el .attrib [an .split ("}" , 1 )[1 ]] = el .attrib .pop (an )
558+ # this cast shouldn't be needed as pop never returns None
559+ el .attrib [an .split ("}" , 1 )[1 ]] = typing .cast (
560+ str , el .attrib .pop (an )
561+ )
516562 # remove namespace declarations
517563 etree .cleanup_namespaces (self .root )
518564
@@ -537,7 +583,7 @@ def remove(self) -> None:
537583 )
538584
539585 try :
540- parent .remove (self .root )
586+ parent .remove (self .root ) # type: ignore[union-attr]
541587 except AttributeError :
542588 # 'NoneType' object has no attribute 'remove'
543589 raise CannotRemoveElementWithoutParent (
0 commit comments