Skip to content

Commit d375116

Browse files
committed
Merge branch 'develop'
2 parents eb0330b + 87b9f6c commit d375116

File tree

5 files changed

+67
-18
lines changed

5 files changed

+67
-18
lines changed

README.rst

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,34 @@ code_language
102102
should be annotated with `````python`` or similar.
103103
Defaults to ``''`` (empty string) and can be any string.
104104

105+
code_language_callback
106+
When the HTML code contains ``pre`` tags that in some way provide the code
107+
language, for example as class, this callback can be used to extract the
108+
language from the tag and prefix it to the converted ``pre`` tag.
109+
The callback gets one single argument, an BeautifylSoup object, and returns
110+
a string containing the code language, or ``None``.
111+
An example to use the class name as code language could be::
112+
113+
def callback(el):
114+
return el['class'][0] if el.has_attr('class') else None
115+
116+
Defaults to ``None``.
117+
118+
escape_asterisks
119+
If set to ``False``, do not escape ``*`` to ``\*`` in text.
120+
Defaults to ``True``.
121+
105122
escape_underscores
106123
If set to ``False``, do not escape ``_`` to ``\_`` in text.
107124
Defaults to ``True``.
108125

126+
keep_inline_images_in
127+
Images are converted to their alt-text when the images are located inside
128+
headlines or table cells. If some inline images should be converted to
129+
markdown images instead, this option can be set to a list of parent tags
130+
that should be allowed to contain inline images, for example ``['td']``.
131+
Defaults to an empty list.
132+
109133
Options may be specified as kwargs to the ``markdownify`` function, or as a
110134
nested ``Options`` class in ``MarkdownConverter`` subclasses.
111135

@@ -119,7 +143,7 @@ Converting BeautifulSoup objects
119143
120144
# Create shorthand method for conversion
121145
def md(soup, **options):
122-
return ImageBlockConverter(**options).convert_soup(soup)
146+
return MarkdownConverter(**options).convert_soup(soup)
123147
124148
125149
Creating Custom Converters

markdownify/__init__.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,6 @@
2525
UNDERSCORE = '_'
2626

2727

28-
def escape(text, escape_underscores):
29-
if not text:
30-
return ''
31-
if escape_underscores:
32-
return text.replace('_', r'\_')
33-
return text
34-
35-
3628
def chomp(text):
3729
"""
3830
If the text in an inline tag like b, a, or em contains a leading or trailing
@@ -71,10 +63,13 @@ class DefaultOptions:
7163
autolinks = True
7264
bullets = '*+-' # An iterable of bullet types.
7365
code_language = ''
66+
code_language_callback = None
7467
convert = None
7568
default_title = False
69+
escape_asterisks = True
7670
escape_underscores = True
7771
heading_style = UNDERLINED
72+
keep_inline_images_in = []
7873
newline_style = SPACES
7974
strip = None
8075
strong_em_symbol = ASTERISK
@@ -161,7 +156,7 @@ def process_text(self, el):
161156
text = whitespace_re.sub(' ', text)
162157

163158
if el.parent.name != 'code':
164-
text = escape(text, self.options['escape_underscores'])
159+
text = self.escape(text)
165160

166161
# remove trailing whitespaces if any of the following condition is true:
167162
# - current text node is the last node in li
@@ -199,6 +194,15 @@ def should_convert_tag(self, tag):
199194
else:
200195
return True
201196

197+
def escape(self, text):
198+
if not text:
199+
return ''
200+
if self.options['escape_asterisks']:
201+
text = text.replace('*', r'\*')
202+
if self.options['escape_underscores']:
203+
text = text.replace('_', r'\_')
204+
return text
205+
202206
def indent(self, text, level):
203207
return line_beginning_re.sub('\t' * level, text) if text else ''
204208

@@ -278,7 +282,8 @@ def convert_img(self, el, text, convert_as_inline):
278282
src = el.attrs.get('src', None) or ''
279283
title = el.attrs.get('title', None) or ''
280284
title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
281-
if convert_as_inline:
285+
if (convert_as_inline
286+
and el.parent.name not in self.options['keep_inline_images_in']):
282287
return alt
283288

284289
return '![%s](%s%s)' % (alt, src, title_part)
@@ -331,7 +336,12 @@ def convert_p(self, el, text, convert_as_inline):
331336
def convert_pre(self, el, text, convert_as_inline):
332337
if not text:
333338
return ''
334-
return '\n```%s\n%s\n```\n' % (self.options['code_language'], text)
339+
code_language = self.options['code_language']
340+
341+
if self.options['code_language_callback']:
342+
code_language = self.options['code_language_callback'](el) or code_language
343+
344+
return '\n```%s\n%s\n```\n' % (code_language, text)
335345

336346
convert_s = convert_del
337347

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
pkgmeta = {
1111
'__title__': 'markdownify',
1212
'__author__': 'Matthew Tretter',
13-
'__version__': '0.10.3',
13+
'__version__': '0.11.0',
1414
}
1515

1616

tests/test_conversions.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,13 @@ def test_hn_nested_simple_tag():
133133

134134
def test_hn_nested_img():
135135
image_attributes_to_markdown = [
136-
("", ""),
137-
("alt='Alt Text'", "Alt Text"),
138-
("alt='Alt Text' title='Optional title'", "Alt Text"),
136+
("", "", ""),
137+
("alt='Alt Text'", "Alt Text", ""),
138+
("alt='Alt Text' title='Optional title'", "Alt Text", " \"Optional title\""),
139139
]
140-
for image_attributes, markdown in image_attributes_to_markdown:
141-
assert md('<h3>A <img src="/path/to/img.jpg " ' + image_attributes + '/> B</h3>') == '### A ' + markdown + ' B\n\n'
140+
for image_attributes, markdown, title in image_attributes_to_markdown:
141+
assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>') == '### A ' + markdown + ' B\n\n'
142+
assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>', keep_inline_images_in=['h3']) == '### A ![' + markdown + '](/path/to/img.jpg' + title + ') B\n\n'
142143

143144

144145
def test_hn_atx_headings():
@@ -215,3 +216,12 @@ def test_sup():
215216
def test_lang():
216217
assert md('<pre>test\n foo\nbar</pre>', code_language='python') == '\n```python\ntest\n foo\nbar\n```\n'
217218
assert md('<pre><code>test\n foo\nbar</code></pre>', code_language='javascript') == '\n```javascript\ntest\n foo\nbar\n```\n'
219+
220+
221+
def test_lang_callback():
222+
def callback(el):
223+
return el['class'][0] if el.has_attr('class') else None
224+
225+
assert md('<pre class="python">test\n foo\nbar</pre>', code_language_callback=callback) == '\n```python\ntest\n foo\nbar\n```\n'
226+
assert md('<pre class="javascript"><code>test\n foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n'
227+
assert md('<pre class="javascript"><code class="javascript">test\n foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n'

tests/test_escaping.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
from markdownify import markdownify as md
22

33

4+
def test_asterisks():
5+
assert md('*hey*dude*') == r'\*hey\*dude\*'
6+
assert md('*hey*dude*', escape_asterisks=False) == r'*hey*dude*'
7+
8+
49
def test_underscore():
510
assert md('_hey_dude_') == r'\_hey\_dude\_'
611
assert md('_hey_dude_', escape_underscores=False) == r'_hey_dude_'

0 commit comments

Comments
 (0)