44"""
55
66from __future__ import unicode_literals , print_function
7- from .dom_helpers import get_descendents
7+ from .dom_helpers import get_children
8+ from .mf_helpers import unordered_list
89from . import mf2_classes
910import bs4
1011import copy
1819else :
1920 from urllib .parse import unquote
2021
21- # Classic Root Classname map
22- CLASSIC_ROOT_MAP = {}
23-
24- # Classic Root properties map
25- CLASSIC_PROPERTY_MAP = {}
22+ # Classic map
23+ _CLASSIC_MAP = {}
2624
2725# populate backcompat rules from JSON files
2826
3432 with codecs .open (file_path , 'r' , 'utf-8' ) as f :
3533 rules = json .load (f )
3634
37- CLASSIC_ROOT_MAP [root ] = rules ['type' ][0 ]
38- CLASSIC_PROPERTY_MAP [root ] = rules ['properties' ]
39-
40-
41-
42- def root (classes ):
43- """get all backcompat root classnames
44- """
45- return [c for c in classes if c in CLASSIC_ROOT_MAP ]
35+ _CLASSIC_MAP [root ] = rules
4636
4737
48- def make_classes_rule ( old_class , new_classes ):
38+ def _make_classes_rule ( old_classes , new_classes ):
4939 """Builds a rule for augmenting an mf1 class with its mf2
5040 equivalent(s).
5141 """
5242 def f (child , ** kwargs ):
43+ child_original = child .original or copy .copy (child )
5344 child_classes = child .get ('class' , [])
54- if old_class in child_classes :
55- child_classes += [c for c in new_classes
56- if c not in child_classes ]
45+ if all (cl in child_classes for cl in old_classes ):
46+ child_classes .extend ([cl for cl in new_classes if cl not in child_classes ])
5747 child ['class' ] = child_classes
58- return f
59-
60-
61- # The RULES map has a list of rules for each root class type.
62- # We'll build the vast majority of it from the CLASSIC_PROPERTY_MAP
63- RULES = dict (
64- (old_root , [make_classes_rule (old_class , new_classes )
65- for old_class , new_classes in properties .items ()])
66- for old_root , properties in CLASSIC_PROPERTY_MAP .items ())
67-
68-
69- def rel_bookmark_to_url_rule (child , ** kwargs ):
70- """rel=bookmark gets augmented with class="u-url
71- """
72- child_classes = child .get ('class' , [])
73- if ('bookmark' in child .get ('rel' , [])
74- and 'u-url' not in child_classes ):
75- child_classes .append ('u-url' )
76- child ['class' ] = child_classes
7748
49+ # if any new class is e-* attach original to parse originally authored HTML
50+ if mf2_classes .embedded (child_classes ) and child .original is None :
51+ child .original = child_original
52+ return f
7853
79- def rel_tag_to_category_rule (child , ** kwargs ):
54+ def _rel_tag_to_category_rule (child , html_parser , ** kwargs ):
8055 """rel=tag converts to p-category using a special transformation (the
81- category becomes the tag href's last path segment). This rule adds a new
82- data tag so that
83- <a rel="tag" href="http://example.com/tags/cat"></a> gets augmented with
56+ category becomes the tag href's last path segment). This rule adds a new data tag so that
57+ <a rel="tag" href="http://example.com/tags/cat"></a> gets replaced with
8458 <data class="p-category" value="cat"></data>
8559 """
60+
61+ href = child .get ('href' , '' )
8662 rels = child .get ('rel' , [])
87- classes = child .get ('class' , [])
88- if ('tag' in rels and child .get ('href' )
89- and 'p-category' not in classes
90- and 'u-category' not in classes ):
91- segments = [seg for seg in child .get ('href' ).split ('/' ) if seg ]
63+ if 'tag' in rels and href :
64+ segments = [seg for seg in href .split ('/' ) if seg ]
9265 if segments :
93- data = bs4 .BeautifulSoup ('<data></data>' ).data
94- # use mf1 class here so it doesn't get removed later
95- data ['class' ] = ['category' ]
66+ if html_parser :
67+ soup = bs4 .BeautifulSoup ('' , features = html_parser )
68+ else :
69+ soup = bs4 .BeautifulSoup ('' )
70+
71+ data = soup .new_tag ('data' )
72+ # this does not use what's given in the JSON
73+ # but that is not a problem currently
74+ # use mf1 class so it doesn't get removed later
75+ data ['class' ] = ['p-category' ]
9676 data ['value' ] = unquote (segments [- 1 ])
97- child .parent .append (data )
77+ child .insert_before (data )
78+ # remove tag from rels to avoid repeat
79+ child ['rel' ] = [r for r in rels if r != 'tag' ]
9880
9981
100- # Augment with special rules
101- RULES ['hentry' ] += [
102- rel_bookmark_to_url_rule ,
103- rel_tag_to_category_rule ,
104- ]
82+ def _make_rels_rule (old_rels , new_classes , html_parser ):
83+ """Builds a rule for augmenting an mf1 rel with its mf2 class equivalent(s).
84+ """
85+
86+ # need to special case rel=tag as it operates differently
87+
88+ def f (child , ** kwargs ):
89+ child_rels = child .get ('rel' , [])
90+ child_classes = child .get ('class' , [])
91+ if all (r in child_rels for r in old_rels ):
92+ if 'tag' in old_rels :
93+ _rel_tag_to_category_rule (child , html_parser , ** kwargs )
94+ else :
95+ child_classes .extend ([cl for cl in new_classes if cl not in child_classes ])
96+ child ['class' ] = child_classes
97+ return f
10598
106- def apply_rules (el ):
107- """add modern classnames for older mf1 classnames
10899
109- returns a copy of el and does not modify the original
100+ def _get_rules (old_root , html_parser ):
101+ """ for given mf1 root get the rules as a list of functions to act on children """
102+
103+ class_rules = [_make_classes_rule (old_classes .split (), new_classes )
104+ for old_classes , new_classes in _CLASSIC_MAP [old_root ].get ('properties' , {}).items ()]
105+ rel_rules = [_make_rels_rule (old_rels .split (), new_classes , html_parser )
106+ for old_rels , new_classes in _CLASSIC_MAP [old_root ].get ('rels' , {}).items ()]
107+
108+ return class_rules + rel_rules
109+
110+ def root (classes ):
111+ """get all backcompat root classnames
110112 """
113+ return unordered_list ([c for c in classes if c in _CLASSIC_MAP ])
111114
112- el_copy = copy .copy (el )
115+ def apply_rules (el , html_parser ):
116+ """add modern classnames for older mf1 classnames
117+ """
113118
114119 def apply_prop_rules_to_children (parent , rules ):
115120
116- for child in ( c for c in parent . children if isinstance ( c , bs4 . Tag ) ):
121+ for child in get_children ( parent ):
117122 classes = child .get ('class' ,[])
118123 # find existing mf2 properties if any and delete them
119124 mf2_props = mf2_classes .property_classes (classes )
@@ -129,19 +134,19 @@ def apply_prop_rules_to_children(parent, rules):
129134
130135
131136 # add mf2 root equivalent
132- classes = el_copy .get ('class' , [])
137+ classes = el .get ('class' , [])
133138 old_roots = root (classes )
134139 for old_root in old_roots :
135- new_root = CLASSIC_ROOT_MAP [old_root ]
136- if new_root not in classes :
137- el_copy ['class' ]. append ( new_root )
140+ new_roots = _CLASSIC_MAP [old_root ][ 'type' ]
141+ classes . extend ( new_roots )
142+ el ['class' ] = classes
138143
139144
140145 # add mf2 prop equivalent to descendents and remove existing mf2 props
141146 rules = []
142147 for old_root in old_roots :
143- rules .extend (RULES . get (old_root ,[] ))
148+ rules .extend (_get_rules (old_root , html_parser ))
144149
145- apply_prop_rules_to_children (el_copy , rules )
150+ apply_prop_rules_to_children (el , rules )
146151
147- return el_copy
152+ return el
0 commit comments