###############################################################################
#
# Package: NaturalDocs::Parser::Native
#
###############################################################################
#
# A package that converts comments from Natural Docs' native format into objects.
# Unlike most second-level packages, these are packages and not object classes.
#
###############################################################################
# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
# Natural Docs is licensed under the GPL
use strict;
use integer;
package NaturalDocs::Parser::Native;
###############################################################################
# Group: Variables
# Return values of TagType(). Not documented here.
use constant POSSIBLE_OPENING_TAG => 1;
use constant POSSIBLE_CLOSING_TAG => 2;
use constant NOT_A_TAG => 3;
#
# var: package
#
# A representing the package normal topics will be a part of at the current point in the file. This is a package variable
# because it needs to be reserved between function calls.
#
my $package;
#
# hash: functionListIgnoredHeadings
#
# An existence hash of all the headings that prevent the parser from creating function list symbols. Whenever one of
# these headings are used in a function list topic, symbols are not created from definition lists until the next heading. The keys
# are in all lowercase.
#
my %functionListIgnoredHeadings = ( 'parameters' => 1,
'parameter' => 1,
'params' => 1,
'param' => 1,
'arguments' => 1,
'argument' => 1,
'args' => 1,
'arg' => 1 );
###############################################################################
# Group: Interface Functions
#
# Function: Start
#
# This will be called whenever a file is about to be parsed. It allows the package to reset its internal state.
#
sub Start
{
my ($self) = @_;
$package = undef;
};
#
# Function: IsMine
#
# Examines the comment and returns whether it is *definitely* Natural Docs content, i.e. it is owned by this package. Note
# that a comment can fail this function and still be interpreted as a Natural Docs content, for example a JavaDoc-styled comment
# that doesn't have header lines but no JavaDoc tags either.
#
# Parameters:
#
# commentLines - An arrayref of the comment lines. Must have been run through CleanComment()>.
# isJavaDoc - Whether the comment was JavaDoc-styled.
#
# Returns:
#
# Whether the comment is *definitely* Natural Docs content.
#
sub IsMine #(string[] commentLines, bool isJavaDoc)
{
my ($self, $commentLines, $isJavaDoc) = @_;
# Skip to the first line with content.
my $line = 0;
while ($line < scalar @$commentLines && !length $commentLines->[$line])
{ $line++; };
return $self->ParseHeaderLine($commentLines->[$line]);
};
#
# Function: ParseComment
#
# This will be called whenever a comment capable of containing Natural Docs content is found.
#
# Parameters:
#
# commentLines - An arrayref of the comment lines. Must have been run through CleanComment()>.
# *The original memory will be changed.*
# isJavaDoc - Whether the comment is JavaDoc styled.
# lineNumber - The line number of the first of the comment lines.
# parsedTopics - A reference to the array where any new should be placed.
#
# Returns:
#
# The number of parsed topics added to the array, or zero if none.
#
sub ParseComment #(commentLines, isJavaDoc, lineNumber, parsedTopics)
{
my ($self, $commentLines, $isJavaDoc, $lineNumber, $parsedTopics) = @_;
my $topicCount = 0;
my $prevLineBlank = 1;
my $inCodeSection = 0;
my ($type, $scope, $isPlural, $title, $symbol);
#my $package; # package variable.
my ($newKeyword, $newTitle);
my $index = 0;
my $bodyStart = 0;
my $bodyEnd = 0; # Not inclusive.
while ($index < scalar @$commentLines)
{
# Everything but leading whitespace was removed beforehand.
# If we're in a code section...
if ($inCodeSection)
{
if ($commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i)
{ $inCodeSection = undef; };
$prevLineBlank = 0;
$bodyEnd++;
}
# If the line is empty...
elsif (!length($commentLines->[$index]))
{
$prevLineBlank = 1;
if ($topicCount)
{ $bodyEnd++; };
}
# If the line has a recognized header and the previous line is blank...
elsif ($prevLineBlank && (($newKeyword, $newTitle) = $self->ParseHeaderLine($commentLines->[$index])) )
{
# Process the previous one, if any.
if ($topicCount)
{
if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
{ $package = undef; };
my $body = $self->FormatBody($commentLines, $bodyStart, $bodyEnd, $type, $isPlural);
my $newTopic = $self->MakeParsedTopic($type, $title, $package, $body, $lineNumber + $bodyStart - 1, $isPlural);
push @$parsedTopics, $newTopic;
$package = $newTopic->Package();
};
$title = $newTitle;
my $typeInfo;
($type, $typeInfo, $isPlural) = NaturalDocs::Topics->KeywordInfo($newKeyword);
$scope = $typeInfo->Scope();
$bodyStart = $index + 1;
$bodyEnd = $index + 1;
$topicCount++;
$prevLineBlank = 0;
}
# If we're on a non-empty, non-header line of a JavaDoc-styled comment and we haven't started a topic yet...
elsif ($isJavaDoc && !$topicCount)
{
$type = undef;
$scope = ::SCOPE_NORMAL(); # The scope repair and topic merging processes will handle if this is a class topic.
$isPlural = undef;
$title = undef;
$symbol = undef;
$bodyStart = $index;
$bodyEnd = $index + 1;
$topicCount++;
$prevLineBlank = undef;
}
# If we're on a normal content line within a topic
elsif ($topicCount)
{
$prevLineBlank = 0;
$bodyEnd++;
if ($commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i)
{ $inCodeSection = 1; };
};
$index++;
};
# Last one, if any. This is the only one that gets the prototypes.
if ($bodyStart)
{
if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
{ $package = undef; };
my $body = $self->FormatBody($commentLines, $bodyStart, $bodyEnd, $type, $isPlural);
my $newTopic = $self->MakeParsedTopic($type, $title, $package, $body, $lineNumber + $bodyStart - 1, $isPlural);
push @$parsedTopics, $newTopic;
$topicCount++;
$package = $newTopic->Package();
};
return $topicCount;
};
#
# Function: ParseHeaderLine
#
# If the passed line is a topic header, returns the array ( keyword, title ). Otherwise returns an empty array.
#
sub ParseHeaderLine #(line)
{
my ($self, $line) = @_;
if ($line =~ /^ *([a-z0-9 ]*[a-z0-9]): +(.*)$/i)
{
my ($keyword, $title) = ($1, $2);
# We need to do it this way because if you do "if (ND:T->KeywordInfo($keyword)" and the last element of the array it
# returns is false, the statement is false. That is really retarded, but there it is.
my ($type, undef, undef) = NaturalDocs::Topics->KeywordInfo($keyword);
if ($type)
{ return ($keyword, $title); }
else
{ return ( ); };
}
else
{ return ( ); };
};
###############################################################################
# Group: Support Functions
#
# Function: MakeParsedTopic
#
# Creates a object for the passed parameters. Scope is gotten from
# the package variable instead of from the parameters. The summary is generated from the body.
#
# Parameters:
#
# type - The . May be undef for headerless topics.
# title - The title of the topic. May be undef for headerless topics.
# package - The package the topic appears in.
# body - The topic's body in .
# lineNumber - The topic's line number.
# isList - Whether the topic is a list.
#
# Returns:
#
# The object.
#
sub MakeParsedTopic #(type, title, package, body, lineNumber, isList)
{
my ($self, $type, $title, $package, $body, $lineNumber, $isList) = @_;
my $summary;
if (defined $body)
{ $summary = NaturalDocs::Parser->GetSummaryFromBody($body); };
return NaturalDocs::Parser::ParsedTopic->New($type, $title, $package, undef, undef, $summary,
$body, $lineNumber, $isList);
};
#
# Function: FormatBody
#
# Converts the section body to .
#
# Parameters:
#
# commentLines - The arrayref of comment lines.
# startingIndex - The starting index of the body to format.
# endingIndex - The ending index of the body to format, *not* inclusive.
# type - The type of the section. May be undef for headerless comments.
# isList - Whether it's a list topic.
#
# Returns:
#
# The body formatted in .
#
sub FormatBody #(commentLines, startingIndex, endingIndex, type, isList)
{
my ($self, $commentLines, $startingIndex, $endingIndex, $type, $isList) = @_;
use constant TAG_NONE => 1;
use constant TAG_PARAGRAPH => 2;
use constant TAG_BULLETLIST => 3;
use constant TAG_DESCRIPTIONLIST => 4;
use constant TAG_HEADING => 5;
use constant TAG_PREFIXCODE => 6;
use constant TAG_TAGCODE => 7;
my %tagEnders = ( TAG_NONE() => '',
TAG_PARAGRAPH() => '
',
TAG_BULLETLIST() => '',
TAG_DESCRIPTIONLIST() => '',
TAG_HEADING() => '',
TAG_PREFIXCODE() => '',
TAG_TAGCODE() => '' );
my $topLevelTag = TAG_NONE;
my $output;
my $textBlock;
my $prevLineBlank = 1;
my $codeBlock;
my $removedCodeSpaces;
my $ignoreListSymbols;
my $index = $startingIndex;
while ($index < $endingIndex)
{
# If we're in a tagged code section...
if ($topLevelTag == TAG_TAGCODE)
{
if ($commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i)
{
$codeBlock =~ s/\n+$//;
$output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '';
$codeBlock = undef;
$topLevelTag = TAG_NONE;
$prevLineBlank = undef;
}
else
{
$self->AddToCodeBlock($commentLines->[$index], \$codeBlock, \$removedCodeSpaces);
};
}
# If the line starts with a code designator...
elsif ($commentLines->[$index] =~ /^ *[>:|](.*)$/)
{
my $code = $1;
if ($topLevelTag == TAG_PREFIXCODE)
{
$self->AddToCodeBlock($code, \$codeBlock, \$removedCodeSpaces);
}
else # $topLevelTag != TAG_PREFIXCODE
{
if (defined $textBlock)
{
$output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag};
$textBlock = undef;
};
$topLevelTag = TAG_PREFIXCODE;
$output .= '';
$self->AddToCodeBlock($code, \$codeBlock, \$removedCodeSpaces);
};
}
# If we're not in either code style...
else
{
# Strip any leading whitespace.
$commentLines->[$index] =~ s/^ +//;
# If we were in a prefixed code section...
if ($topLevelTag == TAG_PREFIXCODE)
{
$codeBlock =~ s/\n+$//;
$output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '';
$codeBlock = undef;
$topLevelTag = TAG_NONE;
$prevLineBlank = undef;
};
# If the line is blank...
if (!length($commentLines->[$index]))
{
# End a paragraph. Everything else ignores it for now.
if ($topLevelTag == TAG_PARAGRAPH)
{
$output .= $self->RichFormatTextBlock($textBlock) . '';
$textBlock = undef;
$topLevelTag = TAG_NONE;
};
$prevLineBlank = 1;
}
# If the line starts with a bullet...
elsif ($commentLines->[$index] =~ /^[-\*o+] +([^ ].*)$/ &&
substr($1, 0, 2) ne '- ') # Make sure "o - Something" is a definition, not a bullet.
{
my $bulletedText = $1;
if (defined $textBlock)
{ $output .= $self->RichFormatTextBlock($textBlock); };
if ($topLevelTag == TAG_BULLETLIST)
{
$output .= '
';
};
$textBlock = $description;
$prevLineBlank = undef;
}
# If the line could be a header...
elsif ($prevLineBlank && $commentLines->[$index] =~ /^(.*)([^ ]):$/)
{
my $headerText = $1 . $2;
if (defined $textBlock)
{
$output .= $self->RichFormatTextBlock($textBlock);
$textBlock = undef;
}
$output .= $tagEnders{$topLevelTag};
$topLevelTag = TAG_NONE;
$output .= '' . $self->RichFormatTextBlock($headerText) . '';
if ($type eq ::TOPIC_FUNCTION() && $isList)
{
$ignoreListSymbols = exists $functionListIgnoredHeadings{lc($headerText)};
};
$prevLineBlank = undef;
}
# If the line looks like a code tag...
elsif ($commentLines->[$index] =~ /^\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i)
{
if (defined $textBlock)
{
$output .= $self->RichFormatTextBlock($textBlock);
$textBlock = undef;
};
$output .= $tagEnders{$topLevelTag} . '';
$topLevelTag = TAG_TAGCODE;
}
# If the line looks like an inline image...
elsif ($commentLines->[$index] =~ /^(\( *see +)([^\)]+?)( *\))$/i)
{
if (defined $textBlock)
{
$output .= $self->RichFormatTextBlock($textBlock);
$textBlock = undef;
};
$output .= $tagEnders{$topLevelTag};
$topLevelTag = TAG_NONE;
$output .= 'ConvertAmpChars($1 . $2 . $3) . '">';
$prevLineBlank = undef;
}
# If the line isn't any of those, we consider it normal text.
else
{
# A blank line followed by normal text ends lists. We don't handle this when we detect if the line's blank because
# we don't want blank lines between list items to break the list.
if ($prevLineBlank && ($topLevelTag == TAG_BULLETLIST || $topLevelTag == TAG_DESCRIPTIONLIST))
{
$output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag} . '
';
$topLevelTag = TAG_PARAGRAPH;
# textBlock will already be undef.
};
if (defined $textBlock)
{ $textBlock .= ' '; };
$textBlock .= $commentLines->[$index];
$prevLineBlank = undef;
};
};
$index++;
};
# Clean up anything left dangling.
if (defined $textBlock)
{
$output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag};
}
elsif (defined $codeBlock)
{
$codeBlock =~ s/\n+$//;
$output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '
';
};
return $output;
};
#
# Function: AddToCodeBlock
#
# Adds a line of text to a code block, handling all the indentation processing required.
#
# Parameters:
#
# line - The line of text to add.
# codeBlockRef - A reference to the code block to add it to.
# removedSpacesRef - A reference to a variable to hold the number of spaces removed. It needs to be stored between calls.
# It will reset itself automatically when the code block codeBlockRef points to is undef.
#
sub AddToCodeBlock #(line, codeBlockRef, removedSpacesRef)
{
my ($self, $line, $codeBlockRef, $removedSpacesRef) = @_;
$line =~ /^( *)(.*)$/;
my ($spaces, $code) = ($1, $2);
if (!defined $$codeBlockRef)
{
if (length($code))
{
$$codeBlockRef = $code . "\n";
$$removedSpacesRef = length($spaces);
};
# else ignore leading line breaks.
}
elsif (length $code)
{
# Make sure we have the minimum amount of spaces to the left possible.
if (length($spaces) != $$removedSpacesRef)
{
my $spaceDifference = abs( length($spaces) - $$removedSpacesRef );
my $spacesToAdd = ' ' x $spaceDifference;
if (length($spaces) > $$removedSpacesRef)
{
$$codeBlockRef .= $spacesToAdd;
}
else
{
$$codeBlockRef =~ s/^(.)/$spacesToAdd . $1/gme;
$$removedSpacesRef = length($spaces);
};
};
$$codeBlockRef .= $code . "\n";
}
else # (!length $code)
{
$$codeBlockRef .= "\n";
};
};
#
# Function: RichFormatTextBlock
#
# Applies rich formatting to a chunk of text. This includes both amp chars, formatting tags, and link tags.
#
# Parameters:
#
# text - The block of text to format.
#
# Returns:
#
# The formatted text block.
#
sub RichFormatTextBlock #(text)
{
my ($self, $text) = @_;
my $output;
# First find bare urls, e-mail addresses, and images. We have to do this before the split because they may contain underscores
# or asterisks. We have to mark the tags with \x1E and \x1F so they don't get confused with angle brackets from the comment.
# We can't convert the amp chars beforehand because we need lookbehinds in the regexps below and they need to be
# constant length. Sucks, huh?
$text =~ s{
# The previous character can't be an alphanumeric or an opening angle bracket.
(?] )
}
{"\x1E" . 'email target="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '" '
. 'name="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '"' . "\x1F"}igxe;
$text =~ s{
# The previous character can't be an alphanumeric or an opening angle bracket.
(?] )
}
{"\x1E" . 'url target="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '" '
. 'name="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '"' . "\x1F"}igxe;
# Find image links. Inline images should already be pulled out by now.
$text =~ s{(\( *see +)([^\)]+?)( *\))}
{"\x1E" . 'img mode="link" target="' . NaturalDocs::NDMarkup->ConvertAmpChars($2) . '" '
. 'original="' . NaturalDocs::NDMarkup->ConvertAmpChars($1 . $2 . $3) . '"' . "\x1F"}gie;
# Split the text from the potential tags.
my @tempTextBlocks = split(/([\*_<>\x1E\x1F])/, $text);
# Since the symbols are considered dividers, empty strings could appear between two in a row or at the beginning/end of the
# array. This could seriously screw up TagType(), so we need to get rid of them.
my @textBlocks;
while (scalar @tempTextBlocks)
{
my $tempTextBlock = shift @tempTextBlocks;
if (length $tempTextBlock)
{ push @textBlocks, $tempTextBlock; };
};
my $bold;
my $underline;
my $underlineHasWhitespace;
my $index = 0;
while ($index < scalar @textBlocks)
{
if ($textBlocks[$index] eq "\x1E")
{
$output .= '<';
$index++;
while ($textBlocks[$index] ne "\x1F")
{
$output .= $textBlocks[$index];
$index++;
};
$output .= '>';
}
elsif ($textBlocks[$index] eq '<' && $self->TagType(\@textBlocks, $index) == POSSIBLE_OPENING_TAG)
{
my $endingIndex = $self->ClosingTag(\@textBlocks, $index, undef);
if ($endingIndex != -1)
{
my $linkText;
$index++;
while ($index < $endingIndex)
{
$linkText .= $textBlocks[$index];
$index++;
};
# Index will be incremented again at the end of the loop.
$linkText = NaturalDocs::NDMarkup->ConvertAmpChars($linkText);
if ($linkText =~ /^(?:mailto\:)?((?:[a-z0-9\-_]+\.)*[a-z0-9\-_]+@(?:[a-z0-9\-]+\.)+[a-z]{2,4})$/i)
{ $output .= ''; }
elsif ($linkText =~ /^(?:http|https|ftp|news|file)\:/i)
{ $output .= ''; }
else
{ $output .= ''; };
}
else # it's not a link.
{
$output .= '<';
};
}
elsif ($textBlocks[$index] eq '*')
{
my $tagType = $self->TagType(\@textBlocks, $index);
if ($tagType == POSSIBLE_OPENING_TAG && $self->ClosingTag(\@textBlocks, $index, undef) != -1)
{
# ClosingTag() makes sure tags aren't opened multiple times in a row.
$bold = 1;
$output .= '';
}
elsif ($bold && $tagType == POSSIBLE_CLOSING_TAG)
{
$bold = undef;
$output .= '';
}
else
{
$output .= '*';
};
}
elsif ($textBlocks[$index] eq '_')
{
my $tagType = $self->TagType(\@textBlocks, $index);
if ($tagType == POSSIBLE_OPENING_TAG && $self->ClosingTag(\@textBlocks, $index, \$underlineHasWhitespace) != -1)
{
# ClosingTag() makes sure tags aren't opened multiple times in a row.
$underline = 1;
#underlineHasWhitespace is set by ClosingTag().
$output .= '';
}
elsif ($underline && $tagType == POSSIBLE_CLOSING_TAG)
{
$underline = undef;
#underlineHasWhitespace will be reset by the next opening underline.
$output .= '';
}
elsif ($underline && !$underlineHasWhitespace)
{
# If there's no whitespace between underline tags, all underscores are replaced by spaces so
# _some_underlined_text_ becomes some underlined text. The standard _some underlined text_
# will work too.
$output .= ' ';
}
else
{
$output .= '_';
};
}
else # plain text or a > that isn't part of a link
{
$output .= NaturalDocs::NDMarkup->ConvertAmpChars($textBlocks[$index]);
};
$index++;
};
return $output;
};
#
# Function: TagType
#
# Returns whether the tag is a possible opening or closing tag, or neither. "Possible" because it doesn't check if an opening tag is
# closed or a closing tag is opened, just whether the surrounding characters allow it to be a candidate for a tag. For example, in
# "A _B" the underscore is a possible opening underline tag, but in "A_B" it is not. Support function for .
#
# Parameters:
#
# textBlocks - A reference to an array of text blocks.
# index - The index of the tag.
#
# Returns:
#
# POSSIBLE_OPENING_TAG, POSSIBLE_CLOSING_TAG, or NOT_A_TAG.
#
sub TagType #(textBlocks, index)
{
my ($self, $textBlocks, $index) = @_;
# Possible opening tags
if ( ( $textBlocks->[$index] =~ /^[\*_<]$/ ) &&
# Before it must be whitespace, the beginning of the text, or ({["'-/*_.
( $index == 0 || $textBlocks->[$index-1] =~ /[\ \t\n\(\{\[\"\'\-\/\*\_]$/ ) &&
# Notes for 2.0: Include Spanish upside down ! and ? as well as opening quotes (66) and apostrophes (6). Look into
# Unicode character classes as well.
# After it must be non-whitespace.
( $index + 1 < scalar @$textBlocks && $textBlocks->[$index+1] !~ /^[\ \t\n]/) &&
# Make sure we don't accept <<, <=, <-, or *= as opening tags.
( $textBlocks->[$index] ne '<' || $textBlocks->[$index+1] !~ /^[<=-]/ ) &&
( $textBlocks->[$index] ne '*' || $textBlocks->[$index+1] !~ /^[\=\*]/ ) &&
# Make sure we don't accept * or _ before it unless it's <.
( $textBlocks->[$index] eq '<' || $index == 0 || $textBlocks->[$index-1] !~ /[\*\_]$/) )
{
return POSSIBLE_OPENING_TAG;
}
# Possible closing tags
elsif ( ( $textBlocks->[$index] =~ /^[\*_>]$/) &&
# After it must be whitespace, the end of the text, or )}].,!?"';:-/*_.
( $index + 1 == scalar @$textBlocks || $textBlocks->[$index+1] =~ /^[ \t\n\)\]\}\.\,\!\?\"\'\;\:\-\/\*\_]/ ||
# Links also get plurals, like s, es, 's, and '.
( $textBlocks->[$index] eq '>' && $textBlocks->[$index+1] =~ /^(?:es|s|\')/ ) ) &&
# Notes for 2.0: Include closing quotes (99) and apostrophes (9). Look into Unicode character classes as well.
# Before it must be non-whitespace.
( $index != 0 && $textBlocks->[$index-1] !~ /[ \t\n]$/ ) &&
# Make sure we don't accept >>, ->, or => as closing tags. >= is already taken care of.
( $textBlocks->[$index] ne '>' || $textBlocks->[$index-1] !~ /[>=-]$/ ) &&
# Make sure we don't accept * or _ after it unless it's >.
( $textBlocks->[$index] eq '>' || $textBlocks->[$index+1] !~ /[\*\_]$/) )
{
return POSSIBLE_CLOSING_TAG;
}
else
{
return NOT_A_TAG;
};
};
#
# Function: ClosingTag
#
# Returns whether a tag is closed or not, where it's closed if it is, and optionally whether there is any whitespace between the
# tags. Support function for .
#
# The results of this function are in full context, meaning that if it says a tag is closed, it can be interpreted as that tag in the
# final output. It takes into account any spoiling factors, like there being two opening tags in a row.
#
# Parameters:
#
# textBlocks - A reference to an array of text blocks.
# index - The index of the opening tag.
# hasWhitespaceRef - A reference to the variable that will hold whether there is whitespace between the tags or not. If
# undef, the function will not check. If the tag is not closed, the variable will not be changed.
#
# Returns:
#
# If the tag is closed, it returns the index of the closing tag and puts whether there was whitespace between the tags in
# hasWhitespaceRef if it was specified. If the tag is not closed, it returns -1 and doesn't touch the variable pointed to by
# hasWhitespaceRef.
#
sub ClosingTag #(textBlocks, index, hasWhitespace)
{
my ($self, $textBlocks, $index, $hasWhitespaceRef) = @_;
my $hasWhitespace;
my $closingTag;
if ($textBlocks->[$index] eq '*' || $textBlocks->[$index] eq '_')
{ $closingTag = $textBlocks->[$index]; }
elsif ($textBlocks->[$index] eq '<')
{ $closingTag = '>'; }
else
{ return -1; };
my $beginningIndex = $index;
$index++;
while ($index < scalar @$textBlocks)
{
if ($textBlocks->[$index] eq '<' && $self->TagType($textBlocks, $index) == POSSIBLE_OPENING_TAG)
{
# If we hit a < and we're checking whether a link is closed, it's not. The first < becomes literal and the second one
# becomes the new link opening.
if ($closingTag eq '>')
{
return -1;
}
# If we're not searching for the end of a link, we have to skip the link because formatting tags cannot appear within
# them. That's of course provided it's closed.
else
{
my $linkHasWhitespace;
my $endIndex = $self->ClosingTag($textBlocks, $index,
($hasWhitespaceRef && !$hasWhitespace ? \$linkHasWhitespace : undef) );
if ($endIndex != -1)
{
if ($linkHasWhitespace)
{ $hasWhitespace = 1; };
# index will be incremented again at the end of the loop, which will bring us past the link's >.
$index = $endIndex;
};
};
}
elsif ($textBlocks->[$index] eq $closingTag)
{
my $tagType = $self->TagType($textBlocks, $index);
if ($tagType == POSSIBLE_CLOSING_TAG)
{
# There needs to be something between the tags for them to count.
if ($index == $beginningIndex + 1)
{ return -1; }
else
{
# Success!
if ($hasWhitespaceRef)
{ $$hasWhitespaceRef = $hasWhitespace; };
return $index;
};
}
# If there are two opening tags of the same type, the first becomes literal and the next becomes part of a tag.
elsif ($tagType == POSSIBLE_OPENING_TAG)
{ return -1; }
}
elsif ($hasWhitespaceRef && !$hasWhitespace)
{
if ($textBlocks->[$index] =~ /[ \t\n]/)
{ $hasWhitespace = 1; };
};
$index++;
};
# Hit the end of the text blocks if we're here.
return -1;
};
1;