4 # Markdown -- A text-to-HTML conversion tool for web writers
6 # Copyright (c) 2004 John Gruber
7 # <http://daringfireball.net/projects/markdown/>
16 use Digest
::MD5
qw(md5_hex);
17 use vars
qw($VERSION);
23 # Global default settings:
25 my $g_empty_element_suffix = " />"; # Change to ">" for HTML output
33 # Regex to match balanced [brackets]. See Friedl's
34 # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
35 my $g_nested_brackets;
36 $g_nested_brackets = qr{
38 [^\
[\
]]+ # Anything other than brackets
41 (??{ $g_nested_brackets }) # Recursive set of nested brackets
47 # Table of hash values for escaped characters:
49 foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
50 $g_escape_table{$char} = md5_hex
($char);
54 # Global hashes, used by various utility routines
59 # Used to track when we're inside an ordered or unordered list
60 # (see _ProcessListItems() for details):
64 #### Blosxom plug-in interface ##########################################
66 # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
67 # which posts Markdown should process, using a "meta-markup: markdown"
68 # header. If it's set to 0 (the default), Markdown will process all
70 my $g_blosxom_use_meta = 0;
74 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
76 if ( (! $g_blosxom_use_meta) or
77 (defined($meta::markup
) and ($meta::markup
=~ /^\s*markdown\s*$/i))
79 $$body_ref = Markdown
($$body_ref);
85 #### Movable Type plug-in interface #####################################
86 eval {require MT
}; # Test to see if we're running in MT.
90 require MT
::Template
::Context
;
91 import MT
::Template
::Context
;
93 eval {require MT
::Plugin
}; # Test to see if we're running >= MT 3.0.
97 my $plugin = new MT
::Plugin
({
99 description
=> "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
100 doc_link
=> 'http://daringfireball.net/projects/markdown/'
102 MT-
>add_plugin( $plugin );
105 MT
::Template
::Context-
>add_container_tag(MarkdownOptions
=> sub {
108 my $builder = $ctx->stash('builder');
109 my $tokens = $ctx->stash('tokens');
111 if (defined ($args->{'output'}) ) {
112 $ctx->stash('markdown_output', lc $args->{'output'});
115 defined (my $str = $builder->build($ctx, $tokens) )
116 or return $ctx->error($builder->errstr);
120 MT-
>add_text_filter('markdown' => {
122 docs
=> 'http://daringfireball.net/projects/markdown/',
128 my $output = $ctx->stash('markdown_output');
129 if (defined $output && $output =~ m/^html/i) {
130 $g_empty_element_suffix = ">";
131 $ctx->stash('markdown_output', '');
133 elsif (defined $output && $output eq 'raw') {
135 $ctx->stash('markdown_output', '');
139 $g_empty_element_suffix = " />";
142 $text = $raw ? $text : Markdown
($text);
147 # If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
152 $smartypants = $MT::Template
::Context
::Global_filters
{'smarty_pants'};
156 MT-
>add_text_filter('markdown_with_smartypants' => {
157 label
=> 'Markdown With SmartyPants',
158 docs
=> 'http://daringfireball.net/projects/markdown/',
163 my $output = $ctx->stash('markdown_output');
164 if (defined $output && $output eq 'html') {
165 $g_empty_element_suffix = ">";
168 $g_empty_element_suffix = " />";
171 $text = Markdown
($text);
172 $text = $smartypants->($text, '1');
178 #### BBEdit/command-line text filter interface ##########################
179 # Needs to be hidden from MT (and Blosxom when running in static mode).
181 # We're only using $blosxom::version once; tell Perl not to warn us:
183 unless ( defined($blosxom::version
) ) {
186 #### Check for command-line switches: #################
189 Getopt
::Long
::Configure
('pass_through');
190 GetOptions
(\
%cli_opts,
195 if ($cli_opts{'version'}) { # Version info
196 print "\nThis is Markdown, version $VERSION.\n";
197 print "Copyright 2004 John Gruber\n";
198 print "http://daringfireball.net/projects/markdown/\n\n";
201 if ($cli_opts{'shortversion'}) { # Just the version number string.
205 if ($cli_opts{'html4tags'}) { # Use HTML tag style instead of XHTML
206 $g_empty_element_suffix = ">";
210 #### Process incoming text: ###########################
213 local $/; # Slurp the whole file
223 .markdown-body>*:first-child {
224 margin-top: 0 !important;
226 .markdown-body>*:last-child {
227 margin-bottom: 0 !important;
229 .markdown-body a.absent {
232 .markdown-body a.anchor {
242 .markdown-body h1, .markdown-body h2, .markdown-body h3, .markdown-body h4, .markdown-body h5, .markdown-body h6 {
246 -webkit-font-smoothing: antialiased;
250 .markdown-body h1 .mini-icon-link, .markdown-body h2 .mini-icon-link, .markdown-body h3 .mini-icon-link, .markdown-body h4 .mini-icon-link, .markdown-body h5 .mini-icon-link, .markdown-body h6 .mini-icon-link {
254 .markdown-body h1:hover a.anchor, .markdown-body h2:hover a.anchor, .markdown-body h3:hover a.anchor, .markdown-body h4:hover a.anchor, .markdown-body h5:hover a.anchor, .markdown-body h6:hover a.anchor {
255 text-decoration: none;
260 .markdown-body h1:hover a.anchor .mini-icon-link, .markdown-body h2:hover a.anchor .mini-icon-link, .markdown-body h3:hover a.anchor .mini-icon-link, .markdown-body h4:hover a.anchor .mini-icon-link, .markdown-body h5:hover a.anchor .mini-icon-link, .markdown-body h6:hover a.anchor .mini-icon-link {
261 display: inline-block;
263 .markdown-body h1 tt, .markdown-body h1 code, .markdown-body h2 tt, .markdown-body h2 code, .markdown-body h3 tt, .markdown-body h3 code, .markdown-body h4 tt, .markdown-body h4 code, .markdown-body h5 tt, .markdown-body h5 code, .markdown-body h6 tt, .markdown-body h6 code {
272 border-bottom: 1px solid #ccc;
288 .markdown-body p, .markdown-body blockquote, .markdown-body ul, .markdown-body ol, .markdown-body dl, .markdown-body table, .markdown-body pre {
292 background: transparent url("/dirty-shade.png") repeat-x 0 0;
298 .markdown-body>h2:first-child, .markdown-body>h1:first-child, .markdown-body>h1:first-child+h2, .markdown-body>h3:first-child, .markdown-body>h4:first-child, .markdown-body>h5:first-child, .markdown-body>h6:first-child {
302 .markdown-body a:first-child h1, .markdown-body a:first-child h2, .markdown-body a:first-child h3, .markdown-body a:first-child h4, .markdown-body a:first-child h5, .markdown-body a:first-child h6 {
306 .markdown-body h1+p, .markdown-body h2+p, .markdown-body h3+p, .markdown-body h4+p, .markdown-body h5+p, .markdown-body h6+p {
309 .markdown-body li p.first {
310 display: inline-block;
312 .markdown-body ul, .markdown-body ol {
315 .markdown-body ul.no-list, .markdown-body ol.no-list {
316 list-style-type: none;
319 .markdown-body ul li>:first-child, .markdown-body ul li ul:first-of-type, .markdown-body ul li ol:first-of-type, .markdown-body ol li>:first-child, .markdown-body ol li ul:first-of-type, .markdown-body ol li ol:first-of-type {
322 .markdown-body ul li p:last-of-type, .markdown-body ol li p:last-of-type {
325 .markdown-body ul ul, .markdown-body ul ol, .markdown-body ol ol, .markdown-body ol ul {
331 .markdown-body dl dt {
338 .markdown-body dl dt:first-child {
341 .markdown-body dl dt>:first-child {
344 .markdown-body dl dt>:last-child {
347 .markdown-body dl dd {
351 .markdown-body dl dd>:first-child {
354 .markdown-body dl dd>:last-child {
357 .markdown-body blockquote {
358 border-left: 4px solid #DDD;
362 .markdown-body blockquote>:first-child {
365 .markdown-body blockquote>:last-child {
368 .markdown-body table th {
371 .markdown-body table th, .markdown-body table td {
372 border: 1px solid #ccc;
375 .markdown-body table tr {
376 border-top: 1px solid #ccc;
377 background-color: #fff;
379 .markdown-body table tr:nth-child(2n) {
380 background-color: #f8f8f8;
384 -moz-box-sizing: border-box;
385 box-sizing: border-box;
387 .markdown-body span.frame {
391 .markdown-body span.frame>span {
392 border: 1px solid #ddd;
400 .markdown-body span.frame span img {
404 .markdown-body span.frame span span {
410 .markdown-body span.align-center {
415 .markdown-body span.align-center>span {
421 .markdown-body span.align-center span img {
425 .markdown-body span.align-right {
430 .markdown-body span.align-right>span {
436 .markdown-body span.align-right span img {
440 .markdown-body span.float-left {
446 .markdown-body span.float-left span {
449 .markdown-body span.float-right {
455 .markdown-body span.float-right>span {
461 .markdown-body code, .markdown-body tt {
464 border: 1px solid #eaeaea;
465 background-color: #f8f8f8;
468 .markdown-body code {
471 .markdown-body pre>code {
476 background: transparent;
478 .markdown-body .highlight pre, .markdown-body pre {
479 background-color: #f8f8f8;
480 border: 1px solid #ccc;
487 .markdown-body pre code, .markdown-body pre tt {
490 background-color: transparent;
495 print "<div class='markdown-body'>";
496 print Markdown
($text);
505 # Main function. The order in which other subs are called here is
506 # essential. Link and image substitutions need to happen before
507 # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
508 # and <img> tags get encoded.
512 # Clear the global hashes. If we don't clear these, you get conflicts
513 # from other articles when generating a page which contains more than
514 # one article (e.g. an index page that shows the N most recent
521 # Standardize line endings:
522 $text =~ s{\r\n}{\n}g; # DOS to Unix
523 $text =~ s{\r}{\n}g; # Mac to Unix
525 # Make sure $text ends with a couple of newlines:
528 # Convert all tabs to spaces.
529 $text = _Detab
($text);
531 # Strip any lines consisting only of spaces and tabs.
532 # This makes subsequent regexen easier to write, because we can
533 # match consecutive blank lines with /\n+/ instead of something
534 # contorted like /[ \t]*\n+/ .
535 $text =~ s/^[ \t]+$//mg;
537 # Turn block-level HTML blocks into hash entries
538 $text = _HashHTMLBlocks
($text);
540 # Strip link definitions, store in hashes.
541 $text = _StripLinkDefinitions
($text);
543 $text = _RunBlockGamut
($text);
545 $text = _UnescapeSpecialChars
($text);
551 sub _StripLinkDefinitions
{
553 # Strips link definitions from text, stores the URLs and titles in
557 my $less_than_tab = $g_tab_width - 1;
559 # Link defs are in the form: ^[id]: url "optional title"
561 ^[ ]{0,$less_than_tab}\
[(.+)\
]: # id = $1
563 \n? # maybe *one* newline
565 <?(\S
+?)>? # url = $2
567 \n? # maybe one newline
570 (?<=\s
) # lookbehind for whitespace
575 )? # title is optional
579 $g_urls{lc $1} = _EncodeAmpsAndAngles
( $2 ); # Link IDs are case-insensitive
581 $g_titles{lc $1} = $3;
582 $g_titles{lc $1} =~ s/"/"/g;
590 sub _HashHTMLBlocks
{
592 my $less_than_tab = $g_tab_width - 1;
594 # Hashify HTML blocks:
595 # We only want to do this for block-level HTML tags, such as headers,
596 # lists, and tables. That's because we still want to wrap <p>s around
597 # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
598 # phrase emphasis, and spans. The list of tags we're looking for is
600 my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
601 my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
603 # First, look for nested blocks, e.g.:
606 # tags for inner block must be indented.
610 # The outermost tags must start at the left margin for this to match, and
611 # the inner nested divs must be indented.
612 # We need to do this before the next, more liberal match, because the next
613 # match will start at the first `<div>` and stop at the first `</div>`.
616 ^ # start of line (with /m)
617 <($block_tags_a) # start tag = $2
619 (.*\n)*? # any number of lines, minimally matching
620 </\
2> # the matching end tag
621 [ \t]* # trailing spaces/tabs
622 (?=\n+|\Z
) # followed by a newline or end of document
625 my $key = md5_hex
($1);
626 $g_html_blocks{$key} = $1;
627 "\n\n" . $key . "\n\n";
632 # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
636 ^ # start of line (with /m)
637 <($block_tags_b) # start tag = $2
639 (.*\n)*? # any number of lines, minimally matching
640 .*</\
2> # the matching end tag
641 [ \t]* # trailing spaces/tabs
642 (?=\n+|\Z
) # followed by a newline or end of document
645 my $key = md5_hex
($1);
646 $g_html_blocks{$key} = $1;
647 "\n\n" . $key . "\n\n";
649 # Special case just for <hr />. It was easier to make a special case than
650 # to make the other regex more complicated.
653 (?<=\n\n) # Starting after a blank line
655 \A
\n? # the beginning of the doc
658 [ ]{0,$less_than_tab}
659 <(hr
) # start tag = $2
662 /?> # the matching end tag
664 (?=\n{2,}|\Z
) # followed by a blank line or end of document
667 my $key = md5_hex
($1);
668 $g_html_blocks{$key} = $1;
669 "\n\n" . $key . "\n\n";
672 # Special case for standalone HTML comments:
675 (?<=\n\n) # Starting after a blank line
677 \A
\n? # the beginning of the doc
680 [ ]{0,$less_than_tab}
687 (?=\n{2,}|\Z
) # followed by a blank line or end of document
690 my $key = md5_hex
($1);
691 $g_html_blocks{$key} = $1;
692 "\n\n" . $key . "\n\n";
702 # These are all the transformations that form block-level
703 # tags like paragraphs, headers, and list items.
707 $text = _DoHeaders
($text);
709 # Do Horizontal Rules:
710 $text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
711 $text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
712 $text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
714 $text = _DoLists
($text);
716 $text = _DoCodeBlocks
($text);
718 $text = _DoBlockQuotes
($text);
720 # We already ran _HashHTMLBlocks() before, in Markdown(), but that
721 # was to escape raw HTML in the original Markdown source. This time,
722 # we're escaping the markup we've just created, so that we don't wrap
723 # <p> tags around block-level tags.
724 $text = _HashHTMLBlocks
($text);
726 $text = _FormParagraphs
($text);
734 # These are all the transformations that occur *within* block-level
735 # tags like paragraphs, headers, and list items.
739 $text = _DoCodeSpans
($text);
741 $text = _EscapeSpecialChars
($text);
743 # Process anchor and image tags. Images must come first,
744 # because ![foo][f] looks like an anchor.
745 $text = _DoImages
($text);
746 $text = _DoAnchors
($text);
748 # Make links out of things like `<http://example.com/>`
749 # Must come after _DoAnchors(), because you can use < and >
750 # delimiters in inline links like [this](<url>).
751 $text = _DoAutoLinks
($text);
753 $text = _EncodeAmpsAndAngles
($text);
755 $text = _DoItalicsAndBold
($text);
758 $text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
764 sub _EscapeSpecialChars
{
766 my $tokens ||= _TokenizeHTML
($text);
768 $text = ''; # rebuild $text from the tokens
769 # my $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
770 # my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
772 foreach my $cur_token (@$tokens) {
773 if ($cur_token->[0] eq "tag") {
774 # Within tags, encode * and _ so they don't conflict
775 # with their use in Markdown for italics and strong.
776 # We're replacing each such character with its
777 # corresponding MD5 checksum value; this is likely
778 # overkill, but it should prevent us from colliding
779 # with the escape values by accident.
780 $cur_token->[1] =~ s! \* !$g_escape_table{'*'}!gx;
781 $cur_token->[1] =~ s! _ !$g_escape_table{'_'}!gx;
782 $text .= $cur_token->[1];
784 my $t = $cur_token->[1];
785 $t = _EncodeBackslashEscapes
($t);
795 # Turn Markdown link shortcuts into XHTML <a> tags.
800 # First, handle reference-style links: [link text] [id]
803 ( # wrap whole match in $1
805 ($g_nested_brackets) # link text = $2
808 [ ]? # one optional space
809 (?:\n[ ]*)? # one optional newline followed by spaces
817 my $whole_match = $1;
821 if ($link_id eq "") {
822 $link_id = lc $link_text; # for shortcut links like [this][].
825 if (defined $g_urls{$link_id}) {
826 my $url = $g_urls{$link_id};
827 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
828 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
829 $result = "<a href=\"$url\"";
830 if ( defined $g_titles{$link_id} ) {
831 my $title = $g_titles{$link_id};
832 $title =~ s! \* !$g_escape_table{'*'}!gx;
833 $title =~ s! _ !$g_escape_table{'_'}!gx;
834 $result .= " title=\"$title\"";
836 $result .= ">$link_text</a>";
839 $result = $whole_match;
845 # Next, inline-style links: [link text](url "optional title")
848 ( # wrap whole match in $1
850 ($g_nested_brackets) # link text = $2
854 <?(.*?)>? # href = $3
857 (['"]) # quote char = $5
860 )? # title is optional
865 my $whole_match = $1;
870 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
871 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
872 $result = "<a href=\"$url\"";
874 if (defined $title) {
875 $title =~ s/"/"/g;
876 $title =~ s! \* !$g_escape_table{'*'}!gx;
877 $title =~ s! _ !$g_escape_table{'_'}!gx;
878 $result .= " title=\"$title\"";
881 $result .= ">$link_text</a>";
892 # Turn Markdown image shortcuts into <img> tags.
897 # First, handle reference-style labeled images: ![alt text][id]
900 ( # wrap whole match in $1
902 (.*?) # alt text = $2
905 [ ]? # one optional space
906 (?:\n[ ]*)? # one optional newline followed by spaces
915 my $whole_match = $1;
919 if ($link_id eq "") {
920 $link_id = lc $alt_text; # for shortcut links like ![this][].
923 $alt_text =~ s/"/"/g;
924 if (defined $g_urls{$link_id}) {
925 my $url = $g_urls{$link_id};
926 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
927 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
928 $result = "<img src=\"$url\" alt=\"$alt_text\"";
929 if (defined $g_titles{$link_id}) {
930 my $title = $g_titles{$link_id};
931 $title =~ s! \* !$g_escape_table{'*'}!gx;
932 $title =~ s! _ !$g_escape_table{'_'}!gx;
933 $result .= " title=\"$title\"";
935 $result .= $g_empty_element_suffix;
938 # If there's no such link ID, leave intact:
939 $result = $whole_match;
946 # Next, handle inline images: ![alt text](url "optional title")
947 # Don't forget: encode * and _
950 ( # wrap whole match in $1
952 (.*?) # alt text = $2
956 <?(\S
+?)>? # src url = $3
959 (['"]) # quote char = $5
963 )? # title is optional
968 my $whole_match = $1;
976 $alt_text =~ s/"/"/g;
977 $title =~ s/"/"/g;
978 $url =~ s! \* !$g_escape_table{'*'}!gx; # We've got to encode these to avoid
979 $url =~ s! _ !$g_escape_table{'_'}!gx; # conflicting with italics/bold.
980 $result = "<img src=\"$url\" alt=\"$alt_text\"";
981 if (defined $title) {
982 $title =~ s! \* !$g_escape_table{'*'}!gx;
983 $title =~ s! _ !$g_escape_table{'_'}!gx;
984 $result .= " title=\"$title\"";
986 $result .= $g_empty_element_suffix;
998 # Setext-style headers:
1005 $text =~ s
{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
1006 "<h1>" . _RunSpanGamut
($1) . "</h1>\n\n";
1009 $text =~ s
{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
1010 "<h2>" . _RunSpanGamut
($1) . "</h2>\n\n";
1014 # atx-style headers:
1017 # ## Header 2 with closing hashes ##
1022 ^(\#
{1,6}) # $1 = string of #'s
1024 (.+?) # $2 = Header text
1026 \#
* # optional closing #'s (not counted)
1029 my $h_level = length($1);
1030 "<h$h_level>" . _RunSpanGamut
($2) . "</h$h_level>\n\n";
1039 # Form HTML ordered (numbered) and unordered (bulleted) lists.
1042 my $less_than_tab = $g_tab_width - 1;
1044 # Re-usable patterns to match list item bullets and number markers:
1045 my $marker_ul = qr/[*+-]/;
1046 my $marker_ol = qr/\d+[.]/;
1047 my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
1049 # Re-usable pattern to match any entirel ul or ol list:
1050 my $whole_list = qr{
1053 [ ]{0,$less_than_tab}
1054 (${marker_any
}) # $3 = first list item marker
1063 (?! # Negative lookahead for another list item marker
1071 # We use a different prefix before nested lists than top-level lists.
1072 # See extended comment in _ProcessListItems().
1074 # Note: There's a bit of duplication here. My original implementation
1075 # created a scalar regex pattern as the conditional result of the test on
1076 # $g_list_level, and then only ran the $text =~ s{...}{...}egmx
1077 # substitution once, using the scalar as the pattern. This worked,
1078 # everywhere except when running under MT on my hosting account at Pair
1079 # Networks. There, this caused all rebuilds to be killed by the reaper (or
1080 # perhaps they crashed, but that seems incredibly unlikely given that the
1081 # same script on the same server ran fine *except* under MT. I've spent
1082 # more time trying to figure out why this is happening than I'd like to
1083 # admit. My only guess, backed up by the fact that this workaround works,
1084 # is that Perl optimizes the substition when it can figure out that the
1085 # pattern will never change, and when this optimization isn't on, we run
1086 # afoul of the reaper. Thus, the slightly redundant code to that uses two
1087 # static s/// patterns rather than one conditional pattern.
1089 if ($g_list_level) {
1095 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1096 # Turn double returns into triple returns, so that we can make a
1097 # paragraph for the last item in a list, if necessary:
1098 $list =~ s/\n{2,}/\n\n\n/g;
1099 my $result = _ProcessListItems
($list, $marker_any);
1100 $result = "<$list_type>\n" . $result . "</$list_type>\n";
1110 my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
1111 # Turn double returns into triple returns, so that we can make a
1112 # paragraph for the last item in a list, if necessary:
1113 $list =~ s/\n{2,}/\n\n\n/g;
1114 my $result = _ProcessListItems
($list, $marker_any);
1115 $result = "<$list_type>\n" . $result . "</$list_type>\n";
1125 sub _ProcessListItems
{
1127 # Process the contents of a single ordered or unordered list, splitting it
1128 # into individual list items.
1131 my $list_str = shift;
1132 my $marker_any = shift;
1135 # The $g_list_level global keeps track of when we're inside a list.
1136 # Each time we enter a list, we increment it; when we leave a list,
1137 # we decrement. If it's zero, we're not in a list anymore.
1139 # We do this because when we're not inside a list, we want to treat
1140 # something like this:
1142 # I recommend upgrading to version
1143 # 8. Oops, now this line is treated
1146 # As a single paragraph, despite the fact that the second line starts
1147 # with a digit-period-space sequence.
1149 # Whereas when we're inside a list (or sub-list), that line will be
1150 # treated as the start of a sub-list. What a kludge, huh? This is
1151 # an aspect of Markdown's syntax that's hard to parse perfectly
1152 # without resorting to mind-reading. Perhaps the solution is to
1153 # change the syntax rules such that sub-lists must start with a
1154 # starting cardinal number; e.g. "1." or "a.".
1158 # trim trailing blank lines:
1159 $list_str =~ s/\n{2,}\z/\n/;
1163 (\n)? # leading line = $1
1164 (^[ \t]*) # leading whitespace = $2
1165 ($marker_any) [ \t]+ # list marker = $3
1166 ((?s
:.+?) # list item text = $4
1168 (?= \n* (\z
| \
2 ($marker_any) [ \t]+))
1171 my $leading_line = $1;
1172 my $leading_space = $2;
1174 if ($leading_line or ($item =~ m/\n{2,}/)) {
1175 $item = _RunBlockGamut
(_Outdent
($item));
1178 # Recursion for sub-lists:
1179 $item = _DoLists
(_Outdent
($item));
1181 $item = _RunSpanGamut
($item);
1184 "<li>" . $item . "</li>\n";
1195 # Process Markdown `<pre><code>` blocks.
1202 ( # $1 = the code block -- one or more lines, starting with a space/tab
1204 (?:[ ]{$g_tab_width} | \t) # Lines must start with a tab or a tab-width of spaces
1208 ((?=^[ ]{0,$g_tab_width}\S
)|\Z
) # Lookahead for non-space at line-start, or end of doc
1211 my $result; # return value
1213 $codeblock = _EncodeCode
(_Outdent
($codeblock));
1214 $codeblock = _Detab
($codeblock);
1215 $codeblock =~ s/\A\n+//; # trim leading newlines
1216 $codeblock =~ s/\s+\z//; # trim trailing whitespace
1218 $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
1229 # * Backtick quotes are used for <code></code> spans.
1231 # * You can use multiple backticks as the delimiters if you want to
1232 # include literal backticks in the code span. So, this input:
1234 # Just type ``foo `bar` baz`` at the prompt.
1236 # Will translate to:
1238 # <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
1240 # There's no arbitrary limit to the number of backticks you
1241 # can use as delimters. If you need three consecutive backticks
1242 # in your code, use four for delimiters, etc.
1244 # * You can use spaces to get literal backticks at the edges:
1246 # ... type `` `bar` `` ...
1250 # ... type <code>`bar`</code> ...
1256 (`+) # $1 = Opening run of `
1257 (.+?) # $2 = The code block
1259 \1 # Matching closer
1263 $c =~ s/^[ \t]*//g; # leading whitespace
1264 $c =~ s/[ \t]*$//g; # trailing whitespace
1265 $c = _EncodeCode
($c);
1275 # Encode/escape certain characters inside Markdown code runs.
1276 # The point is that in code, these characters are literals,
1277 # and lose their special Markdown meanings.
1281 # Encode all ampersands; HTML entities are not
1282 # entities within a Markdown code span.
1285 # Encode $'s, but only if we're running under Blosxom.
1286 # (Blosxom interpolates Perl variables in article bodies.)
1289 if (defined($blosxom::version
)) {
1295 # Do the angle bracket song and dance:
1299 # Now, escape characters that are magic in Markdown:
1300 s! \* !$g_escape_table{'*'}!gx;
1301 s! _ !$g_escape_table{'_'}!gx;
1302 s! { !$g_escape_table{'{'}!gx;
1303 s! } !$g_escape_table{'}'}!gx;
1304 s! \[ !$g_escape_table{'['}!gx;
1305 s! \] !$g_escape_table{']'}!gx;
1306 s! \\ !$g_escape_table{'\\'}!gx;
1312 sub _DoItalicsAndBold
{
1315 # <strong> must go first:
1316 $text =~ s
{ (\
*\
*|__
) (?=\S
) (.+?[*_
]*) (?<=\S
) \
1 }
1317 {<strong
>$2</strong
>}gsx
;
1319 $text =~ s
{ (\
*|_
) (?=\S
) (.+?) (?<=\S
) \
1 }
1326 sub _DoBlockQuotes
{
1330 ( # Wrap whole match in $1
1332 ^[ \t]*>[ \t]? # '>' at the start of a line
1333 .+\n # rest of the first line
1334 (.+\n)* # subsequent consecutive lines
1340 $bq =~ s/^[ \t]*>[ \t]?//gm; # trim one level of quoting
1341 $bq =~ s/^[ \t]+$//mg; # trim whitespace-only lines
1342 $bq = _RunBlockGamut
($bq); # recurse
1345 # These leading spaces screw with <pre> content, so we need to fix that:
1354 "<blockquote>\n$bq\n</blockquote>\n\n";
1362 sub _FormParagraphs
{
1365 # $text - string to process with html <p> tags
1369 # Strip leading and trailing lines:
1373 my @grafs = split(/\n{2,}/, $text);
1379 unless (defined( $g_html_blocks{$_} )) {
1380 $_ = _RunSpanGamut
($_);
1387 # Unhashify HTML blocks
1390 if (defined( $g_html_blocks{$_} )) {
1391 $_ = $g_html_blocks{$_};
1395 return join "\n\n", @grafs;
1399 sub _EncodeAmpsAndAngles
{
1400 # Smart processing for ampersands and angle brackets that need to be encoded.
1404 # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1405 # http://bumppo.net/projects/amputator/
1406 $text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
1409 $text =~ s{<(?![a-z/?\$!])}{<}gi;
1415 sub _EncodeBackslashEscapes
{
1417 # Parameter: String.
1418 # Returns: The string, with after processing the following backslash
1423 s! \\\\ !$g_escape_table{'\\'}!gx; # Must process escaped backslashes first.
1424 s! \\` !$g_escape_table{'`'}!gx;
1425 s! \\\* !$g_escape_table{'*'}!gx;
1426 s! \\_ !$g_escape_table{'_'}!gx;
1427 s! \\\{ !$g_escape_table{'{'}!gx;
1428 s! \\\} !$g_escape_table{'}'}!gx;
1429 s! \\\[ !$g_escape_table{'['}!gx;
1430 s! \\\] !$g_escape_table{']'}!gx;
1431 s! \\\( !$g_escape_table{'('}!gx;
1432 s! \\\) !$g_escape_table{')'}!gx;
1433 s! \\> !$g_escape_table{'>'}!gx;
1434 s! \\\# !$g_escape_table{'#'}!gx;
1435 s! \\\+ !$g_escape_table{'+'}!gx;
1436 s! \\\- !$g_escape_table{'-'}!gx;
1437 s! \\\. !$g_escape_table{'.'}!gx;
1438 s{ \\! }{$g_escape_table{'!'}}gx
;
1447 $text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
1449 # Email addresses: <address@domain.foo>
1456 [-a-z0-9
]+(\
.[-a-z0-9
]+)*\
.[a-z
]+
1460 _EncodeEmailAddress
( _UnescapeSpecialChars
($1) );
1467 sub _EncodeEmailAddress
{
1469 # Input: an email address, e.g. "foo@example.com"
1471 # Output: the email address as a mailto link, with each character
1472 # of the address encoded as either a decimal or hex entity, in
1473 # the hopes of foiling most address harvesting spam bots. E.g.:
1475 # <a href="mailto:foo@e
1476 # xample.com">foo
1477 # @example.com</a>
1479 # Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
1480 # mailing list: <http://tinyurl.com/yu7ue>
1487 sub { '&#' . ord(shift) . ';' },
1488 sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
1492 $addr = "mailto:" . $addr;
1496 if ( $char eq '@' ) {
1497 # this *must* be encoded. I insist.
1498 $char = $encode[int rand 1]->($char);
1499 } elsif ( $char ne ':' ) {
1500 # leave ':' alone (to spot mailto: later)
1502 # roughly 10% raw, 45% hex, 45% dec
1504 $r > .9 ? $encode[2]->($char) :
1505 $r < .45 ? $encode[1]->($char) :
1512 $addr = qq{<a href="$addr">$addr</a>};
1513 $addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
1519 sub _UnescapeSpecialChars
{
1521 # Swap back in all the special characters we've hidden.
1525 while( my($char, $hash) = each(%g_escape_table) ) {
1526 $text =~ s/$hash/$char/g;
1534 # Parameter: String containing HTML markup.
1535 # Returns: Reference to an array of the tokens comprising the input
1536 # string. Each token is either a tag (possibly with nested,
1537 # tags contained therein, such as <a href="<MTFoo>">, or a
1538 # run of text between tags. Each element of the array is a
1539 # two-element array; the first is either 'tag' or 'text';
1540 # the second is the actual value.
1543 # Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
1544 # <http://www.bradchoate.com/past/mtregex.php>
1549 my $len = length $str;
1553 my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x
$depth) . (')*>)' x
$depth);
1554 my $match = qr
/(?s
: <! ( -- .*? -- \s
* )+ > ) | # comment
1555 (?s
: <\? .*? \?> ) | # processing instruction
1556 $nested_tags/ix; # nested tags
1558 while ($str =~ m/($match)/g) {
1560 my $sec_start = pos $str;
1561 my $tag_start = $sec_start - length $whole_tag;
1562 if ($pos < $tag_start) {
1563 push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
1565 push @tokens, ['tag', $whole_tag];
1568 push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
1575 # Remove one level of line-leading tabs or spaces
1579 $text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
1586 # Cribbed from a post by Bart Lateur:
1587 # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
1591 $text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
1610 B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
1616 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1617 easy-to-write structured text format into HTML. Markdown's text format
1618 is most similar to that of plain text email, and supports features such
1619 as headers, *emphasis*, code blocks, blockquotes, and links.
1621 Markdown's syntax is designed not as a generic markup language, but
1622 specifically to serve as a front-end to (X)HTML. You can use span-level
1623 HTML tags anywhere in a Markdown document, and you can use block level
1624 HTML tags (like <div> and <table> as well).
1626 For more information about Markdown's syntax, see:
1628 http://daringfireball.net/projects/markdown/
1633 Use "--" to end switch parsing. For example, to open a file named "-z", use:
1640 =item B<--html4tags>
1642 Use HTML 4 style for empty element tags, e.g.:
1646 instead of Markdown's default XHTML style tags, e.g.:
1651 =item B<-v>, B<--version>
1653 Display Markdown's version number and copyright information.
1656 =item B<-s>, B<--shortversion>
1658 Display the short-form version number.
1667 To file bug reports or feature requests (other than topics listed in the
1668 Caveats section above) please send email to:
1670 support@daringfireball.net
1672 Please include with your report: (1) the example input; (2) the output
1673 you expected; (3) the output Markdown actually produced.
1676 =head1 VERSION HISTORY
1678 See the readme file for detailed release notes for this version.
1688 http://daringfireball.net
1690 PHP port and other contributions by Michel Fortin
1694 =head1 COPYRIGHT AND LICENSE
1696 Copyright (c) 2003-2004 John Gruber
1697 <http://daringfireball.net/>
1698 All rights reserved.
1700 Redistribution and use in source and binary forms, with or without
1701 modification, are permitted provided that the following conditions are
1704 * Redistributions of source code must retain the above copyright notice,
1705 this list of conditions and the following disclaimer.
1707 * Redistributions in binary form must reproduce the above copyright
1708 notice, this list of conditions and the following disclaimer in the
1709 documentation and/or other materials provided with the distribution.
1711 * Neither the name "Markdown" nor the names of its contributors may
1712 be used to endorse or promote products derived from this software
1713 without specific prior written permission.
1715 This software is provided by the copyright holders and contributors "as
1716 is" and any express or implied warranties, including, but not limited
1717 to, the implied warranties of merchantability and fitness for a
1718 particular purpose are disclaimed. In no event shall the copyright owner
1719 or contributors be liable for any direct, indirect, incidental, special,
1720 exemplary, or consequential damages (including, but not limited to,
1721 procurement of substitute goods or services; loss of use, data, or
1722 profits; or business interruption) however caused and on any theory of
1723 liability, whether in contract, strict liability, or tort (including
1724 negligence or otherwise) arising in any way out of the use of this
1725 software, even if advised of the possibility of such damage.