Skip to content

Commit 1942f6d

Browse files
authored
Merge pull request #261 from scrapy/improve-typing-1
Fix mypy issues in parsel.
2 parents deceb29 + d21a5bb commit 1942f6d

File tree

4 files changed

+74
-23
lines changed

4 files changed

+74
-23
lines changed

parsel/selector.py

Lines changed: 65 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,40 @@
44

55
import typing
66
import warnings
7-
from typing import Any, Dict, List, Mapping, Optional, Pattern, Union
7+
from typing import (
8+
Any,
9+
Dict,
10+
List,
11+
Mapping,
12+
Optional,
13+
Pattern,
14+
Type,
15+
TypeVar,
16+
Union,
17+
)
818
from warnings import warn
919

20+
from cssselect import GenericTranslator as OriginalGenericTranslator
1021
from lxml import etree, html
1122
from pkg_resources import parse_version
1223

1324
from .csstranslator import GenericTranslator, HTMLTranslator
1425
from .utils import extract_regex, flatten, iflatten, shorten
1526

16-
_SelectorType = typing.TypeVar("_SelectorType", bound="Selector")
27+
28+
if typing.TYPE_CHECKING:
29+
# both require Python 3.8
30+
from typing import Literal, SupportsIndex
31+
32+
# simplified _OutputMethodArg from types-lxml
33+
_TostringMethodType = Literal[
34+
"html",
35+
"xml",
36+
]
37+
38+
39+
_SelectorType = TypeVar("_SelectorType", bound="Selector")
40+
_ParserType = Union[etree.XMLParser, etree.HTMLParser]
1741

1842
lxml_version = parse_version(etree.__version__)
1943
lxml_huge_tree_version = parse_version("4.2")
@@ -62,16 +86,20 @@ def _st(st: Optional[str]) -> str:
6286

6387

6488
def create_root_node(
65-
text, parser_cls, base_url=None, huge_tree=LXML_SUPPORTS_HUGE_TREE
66-
):
89+
text: str,
90+
parser_cls: Type[_ParserType],
91+
base_url: Optional[str] = None,
92+
huge_tree: bool = LXML_SUPPORTS_HUGE_TREE,
93+
) -> etree._Element:
6794
"""Create root node for text using given parser class."""
6895
body = text.strip().replace("\x00", "").encode("utf8") or b"<html/>"
6996
if huge_tree and LXML_SUPPORTS_HUGE_TREE:
7097
parser = parser_cls(recover=True, encoding="utf8", huge_tree=True)
71-
root = etree.fromstring(body, parser=parser, base_url=base_url)
98+
# the stub wrongly thinks base_url can't be None
99+
root = etree.fromstring(body, parser=parser, base_url=base_url) # type: ignore[arg-type]
72100
else:
73101
parser = parser_cls(recover=True, encoding="utf8")
74-
root = etree.fromstring(body, parser=parser, base_url=base_url)
102+
root = etree.fromstring(body, parser=parser, base_url=base_url) # type: ignore[arg-type]
75103
for error in parser.error_log:
76104
if "use XML_PARSE_HUGE option" in error.message:
77105
warnings.warn(
@@ -90,18 +118,23 @@ class SelectorList(List[_SelectorType]):
90118
"""
91119

92120
@typing.overload
93-
def __getitem__(self, pos: int) -> _SelectorType:
121+
def __getitem__(self, pos: "SupportsIndex") -> _SelectorType:
94122
pass
95123

96124
@typing.overload
97125
def __getitem__(self, pos: slice) -> "SelectorList[_SelectorType]":
98126
pass
99127

100128
def __getitem__(
101-
self, pos: Union[int, slice]
129+
self, pos: Union["SupportsIndex", slice]
102130
) -> Union[_SelectorType, "SelectorList[_SelectorType]"]:
103131
o = super().__getitem__(pos)
104-
return self.__class__(o) if isinstance(pos, slice) else o
132+
if isinstance(pos, slice):
133+
return self.__class__(
134+
typing.cast("SelectorList[_SelectorType]", o)
135+
)
136+
else:
137+
return typing.cast(_SelectorType, o)
105138

106139
def __getstate__(self) -> None:
107140
raise TypeError("can't pickle SelectorList objects")
@@ -237,7 +270,7 @@ def attrib(self) -> Mapping[str, str]:
237270
return x.attrib
238271
return {}
239272

240-
def remove(self) -> None:
273+
def remove(self) -> None: # type: ignore[override]
241274
"""
242275
Remove matched nodes from the parent for each element in this list.
243276
"""
@@ -308,9 +341,15 @@ def __init__(
308341
huge_tree: bool = LXML_SUPPORTS_HUGE_TREE,
309342
) -> None:
310343
self.type = st = _st(type or self._default_type)
311-
self._parser = _ctgroup[st]["_parser"]
312-
self._csstranslator = _ctgroup[st]["_csstranslator"]
313-
self._tostring_method = _ctgroup[st]["_tostring_method"]
344+
self._parser: Type[_ParserType] = typing.cast(
345+
Type[_ParserType], _ctgroup[st]["_parser"]
346+
)
347+
self._csstranslator: OriginalGenericTranslator = typing.cast(
348+
OriginalGenericTranslator, _ctgroup[st]["_csstranslator"]
349+
)
350+
self._tostring_method: "_TostringMethodType" = typing.cast(
351+
"_TostringMethodType", _ctgroup[st]["_tostring_method"]
352+
)
314353

315354
if text is not None:
316355
if not isinstance(text, str):
@@ -334,7 +373,7 @@ def _get_root(
334373
text: str,
335374
base_url: Optional[str] = None,
336375
huge_tree: bool = LXML_SUPPORTS_HUGE_TREE,
337-
) -> Any:
376+
) -> etree._Element:
338377
return create_root_node(
339378
text, self._parser, base_url=base_url, huge_tree=huge_tree
340379
)
@@ -365,7 +404,9 @@ def xpath(
365404
try:
366405
xpathev = self.root.xpath
367406
except AttributeError:
368-
return self.selectorlist_cls([])
407+
return typing.cast(
408+
SelectorList[_SelectorType], self.selectorlist_cls([])
409+
)
369410

370411
nsp = dict(self.namespaces)
371412
if namespaces is not None:
@@ -389,7 +430,9 @@ def xpath(
389430
)
390431
for x in result
391432
]
392-
return self.selectorlist_cls(result)
433+
return typing.cast(
434+
SelectorList[_SelectorType], self.selectorlist_cls(result)
435+
)
393436

394437
def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]:
395438
"""
@@ -404,7 +447,7 @@ def css(self: _SelectorType, query: str) -> SelectorList[_SelectorType]:
404447
"""
405448
return self.xpath(self._css2xpath(query))
406449

407-
def _css2xpath(self, query: str) -> Any:
450+
def _css2xpath(self, query: str) -> str:
408451
return self._csstranslator.css_to_xpath(query)
409452

410453
def re(
@@ -512,7 +555,10 @@ def remove_namespaces(self) -> None:
512555
# loop on element attributes also
513556
for an in el.attrib:
514557
if an.startswith("{"):
515-
el.attrib[an.split("}", 1)[1]] = el.attrib.pop(an)
558+
# this cast shouldn't be needed as pop never returns None
559+
el.attrib[an.split("}", 1)[1]] = typing.cast(
560+
str, el.attrib.pop(an)
561+
)
516562
# remove namespace declarations
517563
etree.cleanup_namespaces(self.root)
518564

@@ -537,7 +583,7 @@ def remove(self) -> None:
537583
)
538584

539585
try:
540-
parent.remove(self.root)
586+
parent.remove(self.root) # type: ignore[union-attr]
541587
except AttributeError:
542588
# 'NoneType' object has no attribute 'remove'
543589
raise CannotRemoveElementWithoutParent(

parsel/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import re
2-
from typing import Any, List, Pattern, Union
2+
from typing import Any, List, Pattern, Union, cast, Match
33
from w3lib.html import replace_entities as w3lib_replace_entities
44

55

@@ -69,7 +69,7 @@ def extract_regex(
6969
if "extract" in regex.groupindex:
7070
# named group
7171
try:
72-
extracted = regex.search(text).group("extract")
72+
extracted = cast(Match[str], regex.search(text)).group("extract")
7373
except AttributeError:
7474
strings = []
7575
else:

tests/test_selector.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,12 @@
33
import weakref
44
import unittest
55
import pickle
6+
7+
import typing
68
from typing import Any
79

810
from lxml import etree
11+
from lxml.html import HtmlElement
912
from pkg_resources import parse_version
1013

1114
from parsel import Selector, SelectorList
@@ -718,7 +721,7 @@ def test_namespaces_multiple_adhoc(self) -> None:
718721
def test_make_links_absolute(self) -> None:
719722
text = '<a href="file.html">link to file</a>'
720723
sel = Selector(text=text, base_url="http://example.com")
721-
sel.root.make_links_absolute()
724+
typing.cast(HtmlElement, sel.root).make_links_absolute()
722725
self.assertEqual(
723726
"http://example.com/file.html",
724727
sel.xpath("//a/@href").extract_first(),

tox.ini

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,12 @@ commands =
2323
[testenv:typing]
2424
deps =
2525
{[testenv]deps}
26+
types-lxml==2022.4.10
27+
types-psutil==5.9.5.4
2628
types-setuptools==65.5.0.1
2729
mypy==0.982
2830
commands =
29-
mypy {posargs:tests} --warn-unused-ignores --ignore-missing-imports
31+
mypy {posargs:parsel tests} --warn-unused-ignores
3032

3133
[testenv:pylint]
3234
deps =

0 commit comments

Comments
 (0)