#! /usr/bin/env python # -*- coding: utf8 -*- # $Id: test_inline_markup.py 7243 2011-12-05 19:35:32Z milde $ # Author: David Goodger # Copyright: This module has been placed in the public domain. """ Tests for inline markup in docutils/parsers/rst/states.py. Interpreted text tests are in a separate module, test_interpreted.py. """ from .__init__ import DocutilsTestSupport def suite(): s = DocutilsTestSupport.ParserTestSuite() s.generateTests(totest) return s totest = {} totest['emphasis'] = [ ["""\ *emphasis* """, """\ emphasis """], ["""\ l'*emphasis* with the *emphasis*' apostrophe. l\u2019*emphasis* with the *emphasis*\u2019 apostrophe. """, """\ l\' emphasis with the \n\ emphasis \' apostrophe. l\u2019 emphasis with the \n\ emphasis \u2019 apostrophe. """], ["""\ *emphasized sentence across lines* """, """\ emphasized sentence across lines """], ["""\ *emphasis without closing asterisk """, """\ * emphasis without closing asterisk Inline emphasis start-string without end-string. """], [r"""some punctuation is allowed around inline markup, e.g. /*emphasis*/, -*emphasis*-, and :*emphasis*: (delimiters), (*emphasis*), [*emphasis*], <*emphasis*>, {*emphasis*} (open/close pairs) but not )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs) (*), [*], '*' or '"*"' ("quoted" start-string), x*2* or 2*x* (alphanumeric char before), \*args or * (escaped, whitespace behind start-string) or *the\* *stars\* *inside* (escaped, whitespace before end-string). However, '*args' will trigger a warning and may be problematic. what about *this**? """, """\ some punctuation is allowed around inline markup, e.g. / emphasis /, - emphasis -, and : emphasis : (delimiters), ( emphasis ), [ emphasis ], < emphasis >, { emphasis } (open/close pairs) but not )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs) (*), [*], '*' or '"*"' ("quoted" start-string), x*2* or 2*x* (alphanumeric char before), *args or * (escaped, whitespace behind start-string) or \n\ the* *stars* *inside (escaped, whitespace before end-string). However, ' * args' will trigger a warning and may be problematic. Inline emphasis start-string without end-string. what about \n\ this* ? """], ["""\ Quotes around inline markup: '*emphasis*' "*emphasis*" Straight, ‘*emphasis*’ “*emphasis*” English, ..., « *emphasis* » ‹ *emphasis* › « *emphasis* » ‹ *emphasis* › « *emphasis* » ‹ *emphasis* › French, „*emphasis*“ ‚*emphasis*‘ »*emphasis*« ›*emphasis*‹ German, Czech, ..., „*emphasis*” «*emphasis*» Romanian, “*emphasis*„ ‘*emphasis*‚ Greek, 「*emphasis*」『*emphasis*』traditional Chinese, ”*emphasis*” ’*emphasis*’ »*emphasis*» ›*emphasis*› Swedish, Finnish, „*emphasis*” ‚*emphasis*’ Polish, „*emphasis*” »*emphasis*« ’*emphasis*’ Hungarian, """, """\ Quotes around inline markup: \' emphasis \' " emphasis " Straight, \u2018 emphasis \u2019 \u201c emphasis \u201d English, ..., \xab\u202f emphasis \u202f\xbb \u2039\u202f emphasis \u202f\u203a \xab\xa0 emphasis \xa0\xbb \u2039\xa0 emphasis \xa0\u203a \xab\u2005 emphasis \u2005\xbb \u2039\u2005 emphasis \u2005\u203a French, \u201e emphasis \u201c \u201a emphasis \u2018 \xbb emphasis \xab \u203a emphasis \u2039 German, Czech, ..., \u201e emphasis \u201d \xab emphasis \xbb Romanian, \u201c emphasis \u201e \u2018 emphasis \u201a Greek, \u300c emphasis \u300d \u300e emphasis \u300ftraditional Chinese, \u201d emphasis \u201d \u2019 emphasis \u2019 \xbb emphasis \xbb \u203a emphasis \u203a Swedish, Finnish, \u201e emphasis \u201d \u201a emphasis \u2019 Polish, \u201e emphasis \u201d \xbb emphasis \xab \u2019 emphasis \u2019 Hungarian, """], [r""" Emphasized asterisk: *\** Emphasized double asterisk: *\*** """, """\ Emphasized asterisk: \n\ * Emphasized double asterisk: \n\ ** """], ] totest['strong'] = [ ["""\ **strong** """, """\ strong """], ["""\ l'**strong** and l\u2019**strong** with apostrophe """, """\ l' strong and l\u2019 strong with apostrophe """], ["""\ quoted '**strong**', quoted "**strong**", quoted \u2018**strong**\u2019, quoted \u201c**strong**\u201d, quoted \xab**strong**\xbb """, """\ quoted ' strong ', quoted " strong ", quoted \u2018 strong \u2019, quoted \u201c strong \u201d, quoted \xab strong \xbb """], [r""" (**strong**) but not (**) or '(** ' or x**2 or \**kwargs or ** (however, '**kwargs' will trigger a warning and may be problematic) """, """\ ( strong ) but not (**) or '(** ' or x**2 or **kwargs or ** (however, ' ** kwargs' will trigger a warning and may be problematic) Inline strong start-string without end-string. """], ["""\ Strong asterisk: ***** Strong double asterisk: ****** """, """\ Strong asterisk: \n\ * Strong double asterisk: \n\ ** """], ["""\ **strong without closing asterisks """, """\ ** strong without closing asterisks Inline strong start-string without end-string. """], ] totest['literal'] = [ ["""\ ``literal`` """, """\ literal """], [r""" ``\literal`` """, """\ \\literal """], [r""" ``lite\ral`` """, """\ lite\\ral """], [r""" ``literal\`` """, """\ literal\\ """], ["""\ l'``literal`` and l\u2019``literal`` with apostrophe """, """\ l' literal and l\u2019 literal with apostrophe """], ["""\ quoted '``literal``', quoted "``literal``", quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d, quoted \xab``literal``\xbb """, """\ quoted ' literal ', quoted " literal ", quoted \u2018 literal \u2019, quoted \u201c literal \u201d, quoted \xab literal \xbb """], ["""\ ``'literal'`` with quotes, ``"literal"`` with quotes, ``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes, ``\xabliteral\xbb`` with quotes """, """\ 'literal' with quotes, \n\ "literal" with quotes, \u2018literal\u2019 with quotes, \n\ \u201cliteral\u201d with quotes, \xabliteral\xbb with quotes """], [r""" ``literal ``TeX quotes'' & \backslash`` but not "``" or `` (however, ``standalone TeX quotes'' will trigger a warning and may be problematic) """, """\ literal ``TeX quotes'' & \\backslash but not "``" or `` (however, \n\ `` standalone TeX quotes'' will trigger a warning and may be problematic) Inline literal start-string without end-string. """], ["""\ Find the ```interpreted text``` in this paragraph! """, """\ Find the \n\ `interpreted text` in this paragraph! """], ["""\ ``literal without closing backquotes """, """\ `` literal without closing backquotes Inline literal start-string without end-string. """], [r""" Python ``list``\s use square bracket syntax. """, """\ Python \n\ list s use square bracket syntax. """], ] totest['references'] = [ ["""\ ref_ """, """\ ref """], ["""\ l'ref_ and l\u2019ref_ with apostrophe """, """\ l' ref and l\u2019 ref with apostrophe """], ["""\ quoted 'ref_', quoted "ref_", quoted \u2018ref_\u2019, quoted \u201cref_\u201d, quoted \xabref_\xbb, but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_, \u201cref ref\u201d_, or \xabref ref\xbb_ """, """\ quoted ' ref ', quoted " ref ", quoted \u2018 ref \u2019, quoted \u201c ref \u201d, quoted \xab ref \xbb, but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_, \u201cref ref\u201d_, or \xabref ref\xbb_ """], ["""\ ref__ """, """\ ref """], ["""\ l'ref__ and l\u2019ref__ with apostrophe """, """\ l' ref and l\u2019 ref with apostrophe """], ["""\ quoted 'ref__', quoted "ref__", quoted \u2018ref__\u2019, quoted \u201cref__\u201d, quoted \xabref__\xbb, but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__, \u201cref ref\u201d__, or \xabref ref\xbb__ """, """\ quoted ' ref ', quoted " ref ", quoted \u2018 ref \u2019, quoted \u201c ref \u201d, quoted \xab ref \xbb, but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__, \u201cref ref\u201d__, or \xabref ref\xbb__ """], ["""\ ref_, r_, r_e-f_, -ref_, and anonymousref__, but not _ref_ or __attr__ or object.__attr__ """, """\ ref , \n\ r , \n\ r_e-f , - ref , and \n\ anonymousref , but not _ref_ or __attr__ or object.__attr__ """], ] totest['phrase_references'] = [ ["""\ `phrase reference`_ """, """\ phrase reference """], ["""\ l'`phrase reference`_ and l\u2019`phrase reference`_ with apostrophe """, """\ l' phrase reference and l\u2019 phrase reference with apostrophe """], ["""\ quoted '`phrase reference`_', quoted "`phrase reference`_", quoted \u2018`phrase reference`_\u2019, quoted \u201c`phrase reference`_\u201d, quoted \xab`phrase reference`_\xbb """, """\ quoted ' phrase reference ', quoted " phrase reference ", quoted \u2018 phrase reference \u2019, quoted \u201c phrase reference \u201d, quoted \xab phrase reference \xbb """], ["""\ `'phrase reference'`_ with quotes, `"phrase reference"`_ with quotes, `\u2018phrase reference\u2019`_ with quotes, `\u201cphrase reference\u201d`_ with quotes, `\xabphrase reference\xbb`_ with quotes """, """\ 'phrase reference' with quotes, \n\ "phrase reference" with quotes, \u2018phrase reference\u2019 with quotes, \u201cphrase reference\u201d with quotes, \xabphrase reference\xbb with quotes """], ["""\ `anonymous reference`__ """, """\ anonymous reference """], ["""\ l'`anonymous reference`__ and l\u2019`anonymous reference`__ with apostrophe """, """\ l' anonymous reference and l\u2019 anonymous reference with apostrophe """], ["""\ quoted '`anonymous reference`__', quoted "`anonymous reference`__", quoted \u2018`anonymous reference`__\u2019, quoted \u201c`anonymous reference`__\u201d, quoted \xab`anonymous reference`__\xbb """, """\ quoted ' anonymous reference ', quoted " anonymous reference ", quoted \u2018 anonymous reference \u2019, quoted \u201c anonymous reference \u201d, quoted \xab anonymous reference \xbb """], ["""\ `'anonymous reference'`__ with quotes, `"anonymous reference"`__ with quotes, `\u2018anonymous reference\u2019`__ with quotes, `\u201canonymous reference\u201d`__ with quotes, `\xabanonymous reference\xbb`__ with quotes """, """\ 'anonymous reference' with quotes, \n\ "anonymous reference" with quotes, \u2018anonymous reference\u2019 with quotes, \u201canonymous reference\u201d with quotes, \xabanonymous reference\xbb with quotes """], ["""\ `phrase reference across lines`_ """, """\ phrase reference across lines """], ["""\ `phrase\`_ reference`_ """, """\ phrase`_ reference """], ["""\ Invalid phrase reference: :role:`phrase reference`_ """, """\ Invalid phrase reference: :role:`phrase reference`_ Mismatch: both interpreted text role prefix and reference suffix. """], ["""\ Invalid phrase reference: `phrase reference`:role:_ """, """\ Invalid phrase reference: `phrase reference`:role:_ Mismatch: both interpreted text role suffix and reference suffix. """], ["""\ `phrase reference_ without closing backquote """, """\ ` phrase \n\ reference without closing backquote Inline interpreted text or phrase reference start-string without end-string. """], ["""\ `anonymous phrase reference__ without closing backquote """, """\ ` anonymous phrase \n\ reference without closing backquote Inline interpreted text or phrase reference start-string without end-string. """], ] totest['embedded_URIs'] = [ ["""\ `phrase reference `_ """, """\ phrase reference """], ["""\ `anonymous reference `__ """, """\ anonymous reference """], ["""\ `embedded URI on next line `__ """, """\ embedded URI on next line """], ["""\ `embedded URI across lines `__ """, """\ embedded URI across lines """], ["""\ `embedded URI with whitespace `__ """, """\ embedded URI with whitespace """], ["""\ `embedded email address `__ `embedded email address broken across lines `__ """, """\ embedded email address embedded email address broken across lines """], [r""" `embedded URI with too much whitespace < http://example.com/ long/path /and /whitespace >`__ `embedded URI with too much whitespace at end `__ `embedded URI with no preceding whitespace`__ `escaped URI \`__ See `HTML Anchors: \`_. """, """\ embedded URI with too much whitespace < http://example.com/ long/path /and /whitespace > embedded URI with too much whitespace at end embedded URI with no preceding whitespace escaped URI See \n\ HTML Anchors: . """], ["""\ Relative URIs' reference text can be omitted: ``_ ``__ """, """\ Relative URIs' reference text can be omitted: reference anonymous """], ] totest['inline_targets'] = [ ["""\ _`target` Here is _`another target` in some text. And _`yet another target`, spanning lines. _`Here is a TaRgeT` with case and spacial difficulties. """, """\ target Here is \n\ another target in some text. And \n\ yet another target , spanning lines. Here is a TaRgeT with case and spacial difficulties. """], ["""\ l'_`target1` and l\u2019_`target2` with apostrophe """, """\ l' target1 and l\u2019 target2 with apostrophe """], ["""\ quoted '_`target1`', quoted "_`target2`", quoted \u2018_`target3`\u2019, quoted \u201c_`target4`\u201d, quoted \xab_`target5`\xbb """, """\ quoted ' target1 ', quoted " target2 ", quoted \u2018 target3 \u2019, quoted \u201c target4 \u201d, quoted \xab target5 \xbb """], ["""\ _`'target1'` with quotes, _`"target2"` with quotes, _`\u2018target3\u2019` with quotes, _`\u201ctarget4\u201d` with quotes, _`\xabtarget5\xbb` with quotes """, """\ 'target1' with quotes, \n\ "target2" with quotes, \u2018target3\u2019 with quotes, \n\ \u201ctarget4\u201d with quotes, \xabtarget5\xbb with quotes """], ["""\ But this isn't a _target; targets require backquotes. And _`this`_ is just plain confusing. """, """\ But this isn't a _target; targets require backquotes. And \n\ _` this`_ is just plain confusing. Inline target start-string without end-string. """], ["""\ _`inline target without closing backquote """, """\ _` inline target without closing backquote Inline target start-string without end-string. """], ] totest['footnote_reference'] = [ ["""\ [1]_ """, """\ 1 """], ["""\ [#]_ """, """\ """], ["""\ [#label]_ """, """\ """], ["""\ [*]_ """, """\ """], ["""\ Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_ """, """\ Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_ """], ] totest['citation_reference'] = [ ["""\ [citation]_ """, """\ citation """], ["""\ [citation]_ and [cit-ation]_ and [cit.ation]_ and [CIT1]_ but not [CIT 1]_ """, """\ citation and \n\ cit-ation and \n\ cit.ation and \n\ CIT1 but not [CIT 1]_ """], ["""\ Adjacent citation refs are not possible: [citation]_[CIT1]_ """, """\ Adjacent citation refs are not possible: [citation]_[CIT1]_ """], ] totest['substitution_references'] = [ ["""\ |subref| """, """\ subref """], ["""\ |subref|_ and |subref|__ """, """\ subref and \n\ subref """], ["""\ |substitution reference| """, """\ substitution reference """], ["""\ |substitution reference| """, """\ substitution reference """], ["""\ |substitution reference without closing verbar """, """\ | substitution reference without closing verbar Inline substitution_reference start-string without end-string. """], ["""\ first | then || and finally ||| """, """\ first | then || and finally ||| """], ] totest['standalone_hyperlink'] = [ ["""\ http://www.standalone.hyperlink.com http:/one-slash-only.absolute.path [http://example.com] (http://example.com) http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html http://[3ffe:2a00:100:7031::1] (the final "]" is ambiguous in text) http://[3ffe:2a00:100:7031::1]/ mailto:someone@somewhere.com news:comp.lang.python An email address in a sentence: someone@somewhere.com. ftp://ends.with.a.period. (a.question.mark@end?) """, """\ http://www.standalone.hyperlink.com http:/one-slash-only.absolute.path [ http://example.com ] ( http://example.com ) < http://example.com > http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html http://[3ffe:2a00:100:7031::1 ] (the final "]" is ambiguous in text) http://[3ffe:2a00:100:7031::1]/ mailto:someone@somewhere.com news:comp.lang.python An email address in a sentence: \n\ someone@somewhere.com . ftp://ends.with.a.period . ( a.question.mark@end ?) """], [r""" Valid URLs with escaped markup characters: http://example.com/\*content\*/whatever http://example.com/\*content*/whatever """, """\ Valid URLs with escaped markup characters: http://example.com/*content*/whatever http://example.com/*content*/whatever """], ["""\ Valid URLs may end with punctuation inside "<>": """, """\ Valid URLs may end with punctuation inside "<>": < http://example.org/ends-with-dot. > """], ["""\ Valid URLs with interesting endings: http://example.org/ends-with-pluses++ """, """\ Valid URLs with interesting endings: http://example.org/ends-with-pluses++ """], ["""\ None of these are standalone hyperlinks (their "schemes" are not recognized): signal:noise, a:b. """, """\ None of these are standalone hyperlinks (their "schemes" are not recognized): signal:noise, a:b. """], ["""\ Escaped email addresses are not recognized: test\@example.org """, """\ Escaped email addresses are not recognized: test@example.org """], ] totest['markup recognition rules'] = [ ["""\ __This__ should be left alone. """, """\ __This__ should be left alone. """], [r""" Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p with backslash-escaped whitespace, including new\ lines. """, """\ Character-level m a r k u p with backslash-escaped whitespace, including newlines. """], ["""\ text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*. \u00bf*punctuation*? \u00a1*examples*!\u00a0*\u00a0no-break-space\u00a0*. """, """\ text- separated \u2010 by \u2011 various \u2012 dashes \u2013 and \u2014 hyphens . \xbf punctuation ? \xa1 examples !\xa0 \u00a0no-break-space\u00a0 . """], # Whitespace characters: # \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e, fails in Python 2.4 ["""\ text separated by *newline* or *space* or one of \xa0*NO-BREAK SPACE*\xa0, \u1680*OGHAM SPACE MARK*\u1680, \u2000*EN QUAD*\u2000, \u2001*EM QUAD*\u2001, \u2002*EN SPACE*\u2002, \u2003*EM SPACE*\u2003, \u2004*THREE-PER-EM SPACE*\u2004, \u2005*FOUR-PER-EM SPACE*\u2005, \u2006*SIX-PER-EM SPACE*\u2006, \u2007*FIGURE SPACE*\u2007, \u2008*PUNCTUATION SPACE*\u2008, \u2009*THIN SPACE*\u2009, \u200a*HAIR SPACE*\u200a, \u202f*NARROW NO-BREAK SPACE*\u202f, \u205f*MEDIUM MATHEMATICAL SPACE*\u205f, \u3000*IDEOGRAPHIC SPACE*\u3000, \u2028*LINE SEPARATOR*\u2028 """, """\ text separated by newline \n\ or \n\ space or one of \xa0 NO-BREAK SPACE \xa0, \u1680 OGHAM SPACE MARK \u1680, \u2000 EN QUAD \u2000, \u2001 EM QUAD \u2001, \u2002 EN SPACE \u2002, \u2003 EM SPACE \u2003, \u2004 THREE-PER-EM SPACE \u2004, \u2005 FOUR-PER-EM SPACE \u2005, \u2006 SIX-PER-EM SPACE \u2006, \u2007 FIGURE SPACE \u2007, \u2008 PUNCTUATION SPACE \u2008, \u2009 THIN SPACE \u2009, \u200a HAIR SPACE \u200a, \u202f NARROW NO-BREAK SPACE \u202f, \u205f MEDIUM MATHEMATICAL SPACE \u205f, \u3000 IDEOGRAPHIC SPACE \u3000, LINE SEPARATOR """], # « * » ‹ * › « * » ‹ * › « * » ‹ * › French, ["""\ "Quoted" markup start-string (matched openers & closers) -> no markup: '*' "*" (*) <*> [*] {*} ⁅*⁆ Some international quoting styles: ‘*’ “*” English, ..., „*“ ‚*‘ »*« ›*‹ German, Czech, ..., „*” «*» Romanian, “*„ ‘*‚ Greek, 「*」『*』traditional Chinese, ”*” ’*’ »*» ›*› Swedish, Finnish, „*” ‚*’ Polish, „*” »*« ’*’ Hungarian, But this is „*’ emphasized »*‹. """, """\ "Quoted" markup start-string (matched openers & closers) -> no markup: '*' "*" (*) <*> [*] {*} ⁅*⁆ Some international quoting styles: ‘*’ “*” English, ..., „*“ ‚*‘ »*« ›*‹ German, Czech, ..., „*” «*» Romanian, “*„ ‘*‚ Greek, 「*」『*』traditional Chinese, ”*” ’*’ »*» ›*› Swedish, Finnish, „*” ‚*’ Polish, „*” »*« ’*’ Hungarian, But this is „ ’ emphasized » ‹. """], ] if __name__ == '__main__': import unittest unittest.main(defaultTest='suite')