mirror of
https://github.com/ddnet/ddnet.git
synced 2024-11-15 04:28:20 +00:00
1061 lines
36 KiB
Perl
1061 lines
36 KiB
Perl
###############################################################################
|
|
#
|
|
# Package: NaturalDocs::Parser::Native
|
|
#
|
|
###############################################################################
|
|
#
|
|
# A package that converts comments from Natural Docs' native format into <NaturalDocs::Parser::ParsedTopic> objects.
|
|
# Unlike most second-level packages, these are packages and not object classes.
|
|
#
|
|
###############################################################################
|
|
|
|
# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
|
|
# Natural Docs is licensed under the GPL
|
|
|
|
|
|
use strict;
|
|
use integer;
|
|
|
|
package NaturalDocs::Parser::Native;
|
|
|
|
|
|
###############################################################################
|
|
# Group: Variables
|
|
|
|
|
|
# Return values of TagType(). Not documented here.
|
|
use constant POSSIBLE_OPENING_TAG => 1;
|
|
use constant POSSIBLE_CLOSING_TAG => 2;
|
|
use constant NOT_A_TAG => 3;
|
|
|
|
|
|
#
|
|
# var: package
|
|
#
|
|
# A <SymbolString> representing the package normal topics will be a part of at the current point in the file. This is a package variable
|
|
# because it needs to be reserved between function calls.
|
|
#
|
|
my $package;
|
|
|
|
#
|
|
# hash: functionListIgnoredHeadings
|
|
#
|
|
# An existence hash of all the headings that prevent the parser from creating function list symbols. Whenever one of
|
|
# these headings are used in a function list topic, symbols are not created from definition lists until the next heading. The keys
|
|
# are in all lowercase.
|
|
#
|
|
my %functionListIgnoredHeadings = ( 'parameters' => 1,
|
|
'parameter' => 1,
|
|
'params' => 1,
|
|
'param' => 1,
|
|
'arguments' => 1,
|
|
'argument' => 1,
|
|
'args' => 1,
|
|
'arg' => 1 );
|
|
|
|
|
|
###############################################################################
|
|
# Group: Interface Functions
|
|
|
|
|
|
#
|
|
# Function: Start
|
|
#
|
|
# This will be called whenever a file is about to be parsed. It allows the package to reset its internal state.
|
|
#
|
|
sub Start
|
|
{
|
|
my ($self) = @_;
|
|
$package = undef;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: IsMine
|
|
#
|
|
# Examines the comment and returns whether it is *definitely* Natural Docs content, i.e. it is owned by this package. Note
|
|
# that a comment can fail this function and still be interpreted as a Natural Docs content, for example a JavaDoc-styled comment
|
|
# that doesn't have header lines but no JavaDoc tags either.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# commentLines - An arrayref of the comment lines. Must have been run through <NaturalDocs::Parser->CleanComment()>.
|
|
# isJavaDoc - Whether the comment was JavaDoc-styled.
|
|
#
|
|
# Returns:
|
|
#
|
|
# Whether the comment is *definitely* Natural Docs content.
|
|
#
|
|
sub IsMine #(string[] commentLines, bool isJavaDoc)
|
|
{
|
|
my ($self, $commentLines, $isJavaDoc) = @_;
|
|
|
|
# Skip to the first line with content.
|
|
my $line = 0;
|
|
|
|
while ($line < scalar @$commentLines && !length $commentLines->[$line])
|
|
{ $line++; };
|
|
|
|
return $self->ParseHeaderLine($commentLines->[$line]);
|
|
};
|
|
|
|
|
|
|
|
#
|
|
# Function: ParseComment
|
|
#
|
|
# This will be called whenever a comment capable of containing Natural Docs content is found.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# commentLines - An arrayref of the comment lines. Must have been run through <NaturalDocs::Parser->CleanComment()>.
|
|
# *The original memory will be changed.*
|
|
# isJavaDoc - Whether the comment is JavaDoc styled.
|
|
# lineNumber - The line number of the first of the comment lines.
|
|
# parsedTopics - A reference to the array where any new <NaturalDocs::Parser::ParsedTopics> should be placed.
|
|
#
|
|
# Returns:
|
|
#
|
|
# The number of parsed topics added to the array, or zero if none.
|
|
#
|
|
sub ParseComment #(commentLines, isJavaDoc, lineNumber, parsedTopics)
|
|
{
|
|
my ($self, $commentLines, $isJavaDoc, $lineNumber, $parsedTopics) = @_;
|
|
|
|
my $topicCount = 0;
|
|
my $prevLineBlank = 1;
|
|
my $inCodeSection = 0;
|
|
|
|
my ($type, $scope, $isPlural, $title, $symbol);
|
|
#my $package; # package variable.
|
|
my ($newKeyword, $newTitle);
|
|
|
|
my $index = 0;
|
|
|
|
my $bodyStart = 0;
|
|
my $bodyEnd = 0; # Not inclusive.
|
|
|
|
while ($index < scalar @$commentLines)
|
|
{
|
|
# Everything but leading whitespace was removed beforehand.
|
|
|
|
# If we're in a code section...
|
|
if ($inCodeSection)
|
|
{
|
|
if ($commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i)
|
|
{ $inCodeSection = undef; };
|
|
|
|
$prevLineBlank = 0;
|
|
$bodyEnd++;
|
|
}
|
|
|
|
# If the line is empty...
|
|
elsif (!length($commentLines->[$index]))
|
|
{
|
|
$prevLineBlank = 1;
|
|
|
|
if ($topicCount)
|
|
{ $bodyEnd++; };
|
|
}
|
|
|
|
# If the line has a recognized header and the previous line is blank...
|
|
elsif ($prevLineBlank && (($newKeyword, $newTitle) = $self->ParseHeaderLine($commentLines->[$index])) )
|
|
{
|
|
# Process the previous one, if any.
|
|
|
|
if ($topicCount)
|
|
{
|
|
if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
|
|
{ $package = undef; };
|
|
|
|
my $body = $self->FormatBody($commentLines, $bodyStart, $bodyEnd, $type, $isPlural);
|
|
my $newTopic = $self->MakeParsedTopic($type, $title, $package, $body, $lineNumber + $bodyStart - 1, $isPlural);
|
|
push @$parsedTopics, $newTopic;
|
|
|
|
$package = $newTopic->Package();
|
|
};
|
|
|
|
$title = $newTitle;
|
|
|
|
my $typeInfo;
|
|
($type, $typeInfo, $isPlural) = NaturalDocs::Topics->KeywordInfo($newKeyword);
|
|
$scope = $typeInfo->Scope();
|
|
|
|
$bodyStart = $index + 1;
|
|
$bodyEnd = $index + 1;
|
|
|
|
$topicCount++;
|
|
|
|
$prevLineBlank = 0;
|
|
}
|
|
|
|
# If we're on a non-empty, non-header line of a JavaDoc-styled comment and we haven't started a topic yet...
|
|
elsif ($isJavaDoc && !$topicCount)
|
|
{
|
|
$type = undef;
|
|
$scope = ::SCOPE_NORMAL(); # The scope repair and topic merging processes will handle if this is a class topic.
|
|
$isPlural = undef;
|
|
$title = undef;
|
|
$symbol = undef;
|
|
|
|
$bodyStart = $index;
|
|
$bodyEnd = $index + 1;
|
|
|
|
$topicCount++;
|
|
|
|
$prevLineBlank = undef;
|
|
}
|
|
|
|
# If we're on a normal content line within a topic
|
|
elsif ($topicCount)
|
|
{
|
|
$prevLineBlank = 0;
|
|
$bodyEnd++;
|
|
|
|
if ($commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i)
|
|
{ $inCodeSection = 1; };
|
|
};
|
|
|
|
|
|
$index++;
|
|
};
|
|
|
|
|
|
# Last one, if any. This is the only one that gets the prototypes.
|
|
if ($bodyStart)
|
|
{
|
|
if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END())
|
|
{ $package = undef; };
|
|
|
|
my $body = $self->FormatBody($commentLines, $bodyStart, $bodyEnd, $type, $isPlural);
|
|
my $newTopic = $self->MakeParsedTopic($type, $title, $package, $body, $lineNumber + $bodyStart - 1, $isPlural);
|
|
push @$parsedTopics, $newTopic;
|
|
$topicCount++;
|
|
|
|
$package = $newTopic->Package();
|
|
};
|
|
|
|
return $topicCount;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: ParseHeaderLine
|
|
#
|
|
# If the passed line is a topic header, returns the array ( keyword, title ). Otherwise returns an empty array.
|
|
#
|
|
sub ParseHeaderLine #(line)
|
|
{
|
|
my ($self, $line) = @_;
|
|
|
|
if ($line =~ /^ *([a-z0-9 ]*[a-z0-9]): +(.*)$/i)
|
|
{
|
|
my ($keyword, $title) = ($1, $2);
|
|
|
|
# We need to do it this way because if you do "if (ND:T->KeywordInfo($keyword)" and the last element of the array it
|
|
# returns is false, the statement is false. That is really retarded, but there it is.
|
|
my ($type, undef, undef) = NaturalDocs::Topics->KeywordInfo($keyword);
|
|
|
|
if ($type)
|
|
{ return ($keyword, $title); }
|
|
else
|
|
{ return ( ); };
|
|
}
|
|
else
|
|
{ return ( ); };
|
|
};
|
|
|
|
|
|
|
|
###############################################################################
|
|
# Group: Support Functions
|
|
|
|
|
|
#
|
|
# Function: MakeParsedTopic
|
|
#
|
|
# Creates a <NaturalDocs::Parser::ParsedTopic> object for the passed parameters. Scope is gotten from
|
|
# the package variable <package> instead of from the parameters. The summary is generated from the body.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# type - The <TopicType>. May be undef for headerless topics.
|
|
# title - The title of the topic. May be undef for headerless topics.
|
|
# package - The package <SymbolString> the topic appears in.
|
|
# body - The topic's body in <NDMarkup>.
|
|
# lineNumber - The topic's line number.
|
|
# isList - Whether the topic is a list.
|
|
#
|
|
# Returns:
|
|
#
|
|
# The <NaturalDocs::Parser::ParsedTopic> object.
|
|
#
|
|
sub MakeParsedTopic #(type, title, package, body, lineNumber, isList)
|
|
{
|
|
my ($self, $type, $title, $package, $body, $lineNumber, $isList) = @_;
|
|
|
|
my $summary;
|
|
|
|
if (defined $body)
|
|
{ $summary = NaturalDocs::Parser->GetSummaryFromBody($body); };
|
|
|
|
return NaturalDocs::Parser::ParsedTopic->New($type, $title, $package, undef, undef, $summary,
|
|
$body, $lineNumber, $isList);
|
|
};
|
|
|
|
|
|
#
|
|
# Function: FormatBody
|
|
#
|
|
# Converts the section body to <NDMarkup>.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# commentLines - The arrayref of comment lines.
|
|
# startingIndex - The starting index of the body to format.
|
|
# endingIndex - The ending index of the body to format, *not* inclusive.
|
|
# type - The type of the section. May be undef for headerless comments.
|
|
# isList - Whether it's a list topic.
|
|
#
|
|
# Returns:
|
|
#
|
|
# The body formatted in <NDMarkup>.
|
|
#
|
|
sub FormatBody #(commentLines, startingIndex, endingIndex, type, isList)
|
|
{
|
|
my ($self, $commentLines, $startingIndex, $endingIndex, $type, $isList) = @_;
|
|
|
|
use constant TAG_NONE => 1;
|
|
use constant TAG_PARAGRAPH => 2;
|
|
use constant TAG_BULLETLIST => 3;
|
|
use constant TAG_DESCRIPTIONLIST => 4;
|
|
use constant TAG_HEADING => 5;
|
|
use constant TAG_PREFIXCODE => 6;
|
|
use constant TAG_TAGCODE => 7;
|
|
|
|
my %tagEnders = ( TAG_NONE() => '',
|
|
TAG_PARAGRAPH() => '</p>',
|
|
TAG_BULLETLIST() => '</li></ul>',
|
|
TAG_DESCRIPTIONLIST() => '</dd></dl>',
|
|
TAG_HEADING() => '</h>',
|
|
TAG_PREFIXCODE() => '</code>',
|
|
TAG_TAGCODE() => '</code>' );
|
|
|
|
my $topLevelTag = TAG_NONE;
|
|
|
|
my $output;
|
|
my $textBlock;
|
|
my $prevLineBlank = 1;
|
|
|
|
my $codeBlock;
|
|
my $removedCodeSpaces;
|
|
|
|
my $ignoreListSymbols;
|
|
|
|
my $index = $startingIndex;
|
|
|
|
while ($index < $endingIndex)
|
|
{
|
|
# If we're in a tagged code section...
|
|
if ($topLevelTag == TAG_TAGCODE)
|
|
{
|
|
if ($commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i)
|
|
{
|
|
$codeBlock =~ s/\n+$//;
|
|
$output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '</code>';
|
|
$codeBlock = undef;
|
|
$topLevelTag = TAG_NONE;
|
|
$prevLineBlank = undef;
|
|
}
|
|
else
|
|
{
|
|
$self->AddToCodeBlock($commentLines->[$index], \$codeBlock, \$removedCodeSpaces);
|
|
};
|
|
}
|
|
|
|
# If the line starts with a code designator...
|
|
elsif ($commentLines->[$index] =~ /^ *[>:|](.*)$/)
|
|
{
|
|
my $code = $1;
|
|
|
|
if ($topLevelTag == TAG_PREFIXCODE)
|
|
{
|
|
$self->AddToCodeBlock($code, \$codeBlock, \$removedCodeSpaces);
|
|
}
|
|
else # $topLevelTag != TAG_PREFIXCODE
|
|
{
|
|
if (defined $textBlock)
|
|
{
|
|
$output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag};
|
|
$textBlock = undef;
|
|
};
|
|
|
|
$topLevelTag = TAG_PREFIXCODE;
|
|
$output .= '<code>';
|
|
$self->AddToCodeBlock($code, \$codeBlock, \$removedCodeSpaces);
|
|
};
|
|
}
|
|
|
|
# If we're not in either code style...
|
|
else
|
|
{
|
|
# Strip any leading whitespace.
|
|
$commentLines->[$index] =~ s/^ +//;
|
|
|
|
# If we were in a prefixed code section...
|
|
if ($topLevelTag == TAG_PREFIXCODE)
|
|
{
|
|
$codeBlock =~ s/\n+$//;
|
|
$output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '</code>';
|
|
$codeBlock = undef;
|
|
$topLevelTag = TAG_NONE;
|
|
$prevLineBlank = undef;
|
|
};
|
|
|
|
|
|
# If the line is blank...
|
|
if (!length($commentLines->[$index]))
|
|
{
|
|
# End a paragraph. Everything else ignores it for now.
|
|
if ($topLevelTag == TAG_PARAGRAPH)
|
|
{
|
|
$output .= $self->RichFormatTextBlock($textBlock) . '</p>';
|
|
$textBlock = undef;
|
|
$topLevelTag = TAG_NONE;
|
|
};
|
|
|
|
$prevLineBlank = 1;
|
|
}
|
|
|
|
# If the line starts with a bullet...
|
|
elsif ($commentLines->[$index] =~ /^[-\*o+] +([^ ].*)$/ &&
|
|
substr($1, 0, 2) ne '- ') # Make sure "o - Something" is a definition, not a bullet.
|
|
{
|
|
my $bulletedText = $1;
|
|
|
|
if (defined $textBlock)
|
|
{ $output .= $self->RichFormatTextBlock($textBlock); };
|
|
|
|
if ($topLevelTag == TAG_BULLETLIST)
|
|
{
|
|
$output .= '</li><li>';
|
|
}
|
|
else #($topLevelTag != TAG_BULLETLIST)
|
|
{
|
|
$output .= $tagEnders{$topLevelTag} . '<ul><li>';
|
|
$topLevelTag = TAG_BULLETLIST;
|
|
};
|
|
|
|
$textBlock = $bulletedText;
|
|
|
|
$prevLineBlank = undef;
|
|
}
|
|
|
|
# If the line looks like a description list entry...
|
|
elsif ($commentLines->[$index] =~ /^(.+?) +- +([^ ].*)$/ && $topLevelTag != TAG_PARAGRAPH)
|
|
{
|
|
my $entry = $1;
|
|
my $description = $2;
|
|
|
|
if (defined $textBlock)
|
|
{ $output .= $self->RichFormatTextBlock($textBlock); };
|
|
|
|
if ($topLevelTag == TAG_DESCRIPTIONLIST)
|
|
{
|
|
$output .= '</dd>';
|
|
}
|
|
else #($topLevelTag != TAG_DESCRIPTIONLIST)
|
|
{
|
|
$output .= $tagEnders{$topLevelTag} . '<dl>';
|
|
$topLevelTag = TAG_DESCRIPTIONLIST;
|
|
};
|
|
|
|
if (($isList && !$ignoreListSymbols) || $type eq ::TOPIC_ENUMERATION())
|
|
{
|
|
$output .= '<ds>' . NaturalDocs::NDMarkup->ConvertAmpChars($entry) . '</ds><dd>';
|
|
}
|
|
else
|
|
{
|
|
$output .= '<de>' . NaturalDocs::NDMarkup->ConvertAmpChars($entry) . '</de><dd>';
|
|
};
|
|
|
|
$textBlock = $description;
|
|
|
|
$prevLineBlank = undef;
|
|
}
|
|
|
|
# If the line could be a header...
|
|
elsif ($prevLineBlank && $commentLines->[$index] =~ /^(.*)([^ ]):$/)
|
|
{
|
|
my $headerText = $1 . $2;
|
|
|
|
if (defined $textBlock)
|
|
{
|
|
$output .= $self->RichFormatTextBlock($textBlock);
|
|
$textBlock = undef;
|
|
}
|
|
|
|
$output .= $tagEnders{$topLevelTag};
|
|
$topLevelTag = TAG_NONE;
|
|
|
|
$output .= '<h>' . $self->RichFormatTextBlock($headerText) . '</h>';
|
|
|
|
if ($type eq ::TOPIC_FUNCTION() && $isList)
|
|
{
|
|
$ignoreListSymbols = exists $functionListIgnoredHeadings{lc($headerText)};
|
|
};
|
|
|
|
$prevLineBlank = undef;
|
|
}
|
|
|
|
# If the line looks like a code tag...
|
|
elsif ($commentLines->[$index] =~ /^\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i)
|
|
{
|
|
if (defined $textBlock)
|
|
{
|
|
$output .= $self->RichFormatTextBlock($textBlock);
|
|
$textBlock = undef;
|
|
};
|
|
|
|
$output .= $tagEnders{$topLevelTag} . '<code>';
|
|
$topLevelTag = TAG_TAGCODE;
|
|
}
|
|
|
|
# If the line looks like an inline image...
|
|
elsif ($commentLines->[$index] =~ /^(\( *see +)([^\)]+?)( *\))$/i)
|
|
{
|
|
if (defined $textBlock)
|
|
{
|
|
$output .= $self->RichFormatTextBlock($textBlock);
|
|
$textBlock = undef;
|
|
};
|
|
|
|
$output .= $tagEnders{$topLevelTag};
|
|
$topLevelTag = TAG_NONE;
|
|
|
|
$output .= '<img mode="inline" target="' . NaturalDocs::NDMarkup->ConvertAmpChars($2) . '" '
|
|
. 'original="' . NaturalDocs::NDMarkup->ConvertAmpChars($1 . $2 . $3) . '">';
|
|
|
|
$prevLineBlank = undef;
|
|
}
|
|
|
|
# If the line isn't any of those, we consider it normal text.
|
|
else
|
|
{
|
|
# A blank line followed by normal text ends lists. We don't handle this when we detect if the line's blank because
|
|
# we don't want blank lines between list items to break the list.
|
|
if ($prevLineBlank && ($topLevelTag == TAG_BULLETLIST || $topLevelTag == TAG_DESCRIPTIONLIST))
|
|
{
|
|
$output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag} . '<p>';
|
|
|
|
$topLevelTag = TAG_PARAGRAPH;
|
|
$textBlock = undef;
|
|
}
|
|
|
|
elsif ($topLevelTag == TAG_NONE)
|
|
{
|
|
$output .= '<p>';
|
|
$topLevelTag = TAG_PARAGRAPH;
|
|
# textBlock will already be undef.
|
|
};
|
|
|
|
if (defined $textBlock)
|
|
{ $textBlock .= ' '; };
|
|
|
|
$textBlock .= $commentLines->[$index];
|
|
|
|
$prevLineBlank = undef;
|
|
};
|
|
};
|
|
|
|
$index++;
|
|
};
|
|
|
|
# Clean up anything left dangling.
|
|
if (defined $textBlock)
|
|
{
|
|
$output .= $self->RichFormatTextBlock($textBlock) . $tagEnders{$topLevelTag};
|
|
}
|
|
elsif (defined $codeBlock)
|
|
{
|
|
$codeBlock =~ s/\n+$//;
|
|
$output .= NaturalDocs::NDMarkup->ConvertAmpChars($codeBlock) . '</code>';
|
|
};
|
|
|
|
return $output;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: AddToCodeBlock
|
|
#
|
|
# Adds a line of text to a code block, handling all the indentation processing required.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# line - The line of text to add.
|
|
# codeBlockRef - A reference to the code block to add it to.
|
|
# removedSpacesRef - A reference to a variable to hold the number of spaces removed. It needs to be stored between calls.
|
|
# It will reset itself automatically when the code block codeBlockRef points to is undef.
|
|
#
|
|
sub AddToCodeBlock #(line, codeBlockRef, removedSpacesRef)
|
|
{
|
|
my ($self, $line, $codeBlockRef, $removedSpacesRef) = @_;
|
|
|
|
$line =~ /^( *)(.*)$/;
|
|
my ($spaces, $code) = ($1, $2);
|
|
|
|
if (!defined $$codeBlockRef)
|
|
{
|
|
if (length($code))
|
|
{
|
|
$$codeBlockRef = $code . "\n";
|
|
$$removedSpacesRef = length($spaces);
|
|
};
|
|
# else ignore leading line breaks.
|
|
}
|
|
|
|
elsif (length $code)
|
|
{
|
|
# Make sure we have the minimum amount of spaces to the left possible.
|
|
if (length($spaces) != $$removedSpacesRef)
|
|
{
|
|
my $spaceDifference = abs( length($spaces) - $$removedSpacesRef );
|
|
my $spacesToAdd = ' ' x $spaceDifference;
|
|
|
|
if (length($spaces) > $$removedSpacesRef)
|
|
{
|
|
$$codeBlockRef .= $spacesToAdd;
|
|
}
|
|
else
|
|
{
|
|
$$codeBlockRef =~ s/^(.)/$spacesToAdd . $1/gme;
|
|
$$removedSpacesRef = length($spaces);
|
|
};
|
|
};
|
|
|
|
$$codeBlockRef .= $code . "\n";
|
|
}
|
|
|
|
else # (!length $code)
|
|
{
|
|
$$codeBlockRef .= "\n";
|
|
};
|
|
};
|
|
|
|
|
|
#
|
|
# Function: RichFormatTextBlock
|
|
#
|
|
# Applies rich <NDMarkup> formatting to a chunk of text. This includes both amp chars, formatting tags, and link tags.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# text - The block of text to format.
|
|
#
|
|
# Returns:
|
|
#
|
|
# The formatted text block.
|
|
#
|
|
sub RichFormatTextBlock #(text)
|
|
{
|
|
my ($self, $text) = @_;
|
|
my $output;
|
|
|
|
|
|
# First find bare urls, e-mail addresses, and images. We have to do this before the split because they may contain underscores
|
|
# or asterisks. We have to mark the tags with \x1E and \x1F so they don't get confused with angle brackets from the comment.
|
|
# We can't convert the amp chars beforehand because we need lookbehinds in the regexps below and they need to be
|
|
# constant length. Sucks, huh?
|
|
|
|
$text =~ s{
|
|
# The previous character can't be an alphanumeric or an opening angle bracket.
|
|
(?<! [a-z0-9<] )
|
|
|
|
# Optional mailto:. Ignored in output.
|
|
(?:mailto\:)?
|
|
|
|
# Begin capture
|
|
(
|
|
|
|
# The user portion. Alphanumeric and - _. Dots can appear between, but not at the edges or more than
|
|
# one in a row.
|
|
(?: [a-z0-9\-_]+ \. )* [a-z0-9\-_]+
|
|
|
|
@
|
|
|
|
# The domain. Alphanumeric and -. Dots same as above, however, there must be at least two sections
|
|
# and the last one must be two to four alphanumeric characters (.com, .uk, .info, .203 for IP addresses)
|
|
(?: [a-z0-9\-]+ \. )+ [a-z]{2,4}
|
|
|
|
# End capture.
|
|
)
|
|
|
|
# The next character can't be an alphanumeric, which should prevent .abcde from matching the two to
|
|
# four character requirement, or a closing angle bracket.
|
|
(?! [a-z0-9>] )
|
|
|
|
}
|
|
|
|
{"\x1E" . 'email target="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '" '
|
|
. 'name="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '"' . "\x1F"}igxe;
|
|
|
|
$text =~ s{
|
|
# The previous character can't be an alphanumeric or an opening angle bracket.
|
|
(?<! [a-z0-9<] )
|
|
|
|
# Begin capture.
|
|
(
|
|
|
|
# URL must start with one of the acceptable protocols.
|
|
(?:http|https|ftp|news|file)\:
|
|
|
|
# The acceptable URL characters as far as I know.
|
|
[a-z0-9\-\=\~\@\#\%\&\_\+\/\;\:\?\*\.\,]*
|
|
|
|
# The URL characters minus period and comma. If it ends on them, they're probably intended as
|
|
# punctuation.
|
|
[a-z0-9\-\=\~\@\#\%\&\_\+\/\;\:\?\*]
|
|
|
|
# End capture.
|
|
)
|
|
|
|
# The next character must not be an acceptable character or a closing angle bracket. This will prevent the URL
|
|
# from ending early just to get a match.
|
|
(?! [a-z0-9\-\=\~\@\#\%\&\_\+\/\;\:\?\*\>] )
|
|
|
|
}
|
|
|
|
{"\x1E" . 'url target="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '" '
|
|
. 'name="' . NaturalDocs::NDMarkup->ConvertAmpChars($1) . '"' . "\x1F"}igxe;
|
|
|
|
|
|
# Find image links. Inline images should already be pulled out by now.
|
|
|
|
$text =~ s{(\( *see +)([^\)]+?)( *\))}
|
|
{"\x1E" . 'img mode="link" target="' . NaturalDocs::NDMarkup->ConvertAmpChars($2) . '" '
|
|
. 'original="' . NaturalDocs::NDMarkup->ConvertAmpChars($1 . $2 . $3) . '"' . "\x1F"}gie;
|
|
|
|
|
|
|
|
# Split the text from the potential tags.
|
|
|
|
my @tempTextBlocks = split(/([\*_<>\x1E\x1F])/, $text);
|
|
|
|
# Since the symbols are considered dividers, empty strings could appear between two in a row or at the beginning/end of the
|
|
# array. This could seriously screw up TagType(), so we need to get rid of them.
|
|
my @textBlocks;
|
|
|
|
while (scalar @tempTextBlocks)
|
|
{
|
|
my $tempTextBlock = shift @tempTextBlocks;
|
|
|
|
if (length $tempTextBlock)
|
|
{ push @textBlocks, $tempTextBlock; };
|
|
};
|
|
|
|
|
|
my $bold;
|
|
my $underline;
|
|
my $underlineHasWhitespace;
|
|
|
|
my $index = 0;
|
|
|
|
while ($index < scalar @textBlocks)
|
|
{
|
|
if ($textBlocks[$index] eq "\x1E")
|
|
{
|
|
$output .= '<';
|
|
$index++;
|
|
|
|
while ($textBlocks[$index] ne "\x1F")
|
|
{
|
|
$output .= $textBlocks[$index];
|
|
$index++;
|
|
};
|
|
|
|
$output .= '>';
|
|
}
|
|
|
|
elsif ($textBlocks[$index] eq '<' && $self->TagType(\@textBlocks, $index) == POSSIBLE_OPENING_TAG)
|
|
{
|
|
my $endingIndex = $self->ClosingTag(\@textBlocks, $index, undef);
|
|
|
|
if ($endingIndex != -1)
|
|
{
|
|
my $linkText;
|
|
$index++;
|
|
|
|
while ($index < $endingIndex)
|
|
{
|
|
$linkText .= $textBlocks[$index];
|
|
$index++;
|
|
};
|
|
# Index will be incremented again at the end of the loop.
|
|
|
|
$linkText = NaturalDocs::NDMarkup->ConvertAmpChars($linkText);
|
|
|
|
if ($linkText =~ /^(?:mailto\:)?((?:[a-z0-9\-_]+\.)*[a-z0-9\-_]+@(?:[a-z0-9\-]+\.)+[a-z]{2,4})$/i)
|
|
{ $output .= '<email target="' . $1 . '" name="' . $1 . '">'; }
|
|
elsif ($linkText =~ /^(?:http|https|ftp|news|file)\:/i)
|
|
{ $output .= '<url target="' . $linkText . '" name="' . $linkText . '">'; }
|
|
else
|
|
{ $output .= '<link target="' . $linkText . '" name="' . $linkText . '" original="<' . $linkText . '>">'; };
|
|
}
|
|
|
|
else # it's not a link.
|
|
{
|
|
$output .= '<';
|
|
};
|
|
}
|
|
|
|
elsif ($textBlocks[$index] eq '*')
|
|
{
|
|
my $tagType = $self->TagType(\@textBlocks, $index);
|
|
|
|
if ($tagType == POSSIBLE_OPENING_TAG && $self->ClosingTag(\@textBlocks, $index, undef) != -1)
|
|
{
|
|
# ClosingTag() makes sure tags aren't opened multiple times in a row.
|
|
$bold = 1;
|
|
$output .= '<b>';
|
|
}
|
|
elsif ($bold && $tagType == POSSIBLE_CLOSING_TAG)
|
|
{
|
|
$bold = undef;
|
|
$output .= '</b>';
|
|
}
|
|
else
|
|
{
|
|
$output .= '*';
|
|
};
|
|
}
|
|
|
|
elsif ($textBlocks[$index] eq '_')
|
|
{
|
|
my $tagType = $self->TagType(\@textBlocks, $index);
|
|
|
|
if ($tagType == POSSIBLE_OPENING_TAG && $self->ClosingTag(\@textBlocks, $index, \$underlineHasWhitespace) != -1)
|
|
{
|
|
# ClosingTag() makes sure tags aren't opened multiple times in a row.
|
|
$underline = 1;
|
|
#underlineHasWhitespace is set by ClosingTag().
|
|
$output .= '<u>';
|
|
}
|
|
elsif ($underline && $tagType == POSSIBLE_CLOSING_TAG)
|
|
{
|
|
$underline = undef;
|
|
#underlineHasWhitespace will be reset by the next opening underline.
|
|
$output .= '</u>';
|
|
}
|
|
elsif ($underline && !$underlineHasWhitespace)
|
|
{
|
|
# If there's no whitespace between underline tags, all underscores are replaced by spaces so
|
|
# _some_underlined_text_ becomes <u>some underlined text</u>. The standard _some underlined text_
|
|
# will work too.
|
|
$output .= ' ';
|
|
}
|
|
else
|
|
{
|
|
$output .= '_';
|
|
};
|
|
}
|
|
|
|
else # plain text or a > that isn't part of a link
|
|
{
|
|
$output .= NaturalDocs::NDMarkup->ConvertAmpChars($textBlocks[$index]);
|
|
};
|
|
|
|
$index++;
|
|
};
|
|
|
|
return $output;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TagType
|
|
#
|
|
# Returns whether the tag is a possible opening or closing tag, or neither. "Possible" because it doesn't check if an opening tag is
|
|
# closed or a closing tag is opened, just whether the surrounding characters allow it to be a candidate for a tag. For example, in
|
|
# "A _B" the underscore is a possible opening underline tag, but in "A_B" it is not. Support function for <RichFormatTextBlock()>.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# textBlocks - A reference to an array of text blocks.
|
|
# index - The index of the tag.
|
|
#
|
|
# Returns:
|
|
#
|
|
# POSSIBLE_OPENING_TAG, POSSIBLE_CLOSING_TAG, or NOT_A_TAG.
|
|
#
|
|
sub TagType #(textBlocks, index)
|
|
{
|
|
my ($self, $textBlocks, $index) = @_;
|
|
|
|
|
|
# Possible opening tags
|
|
|
|
if ( ( $textBlocks->[$index] =~ /^[\*_<]$/ ) &&
|
|
|
|
# Before it must be whitespace, the beginning of the text, or ({["'-/*_.
|
|
( $index == 0 || $textBlocks->[$index-1] =~ /[\ \t\n\(\{\[\"\'\-\/\*\_]$/ ) &&
|
|
|
|
# Notes for 2.0: Include Spanish upside down ! and ? as well as opening quotes (66) and apostrophes (6). Look into
|
|
# Unicode character classes as well.
|
|
|
|
# After it must be non-whitespace.
|
|
( $index + 1 < scalar @$textBlocks && $textBlocks->[$index+1] !~ /^[\ \t\n]/) &&
|
|
|
|
# Make sure we don't accept <<, <=, <-, or *= as opening tags.
|
|
( $textBlocks->[$index] ne '<' || $textBlocks->[$index+1] !~ /^[<=-]/ ) &&
|
|
( $textBlocks->[$index] ne '*' || $textBlocks->[$index+1] !~ /^[\=\*]/ ) &&
|
|
|
|
# Make sure we don't accept * or _ before it unless it's <.
|
|
( $textBlocks->[$index] eq '<' || $index == 0 || $textBlocks->[$index-1] !~ /[\*\_]$/) )
|
|
{
|
|
return POSSIBLE_OPENING_TAG;
|
|
}
|
|
|
|
|
|
# Possible closing tags
|
|
|
|
elsif ( ( $textBlocks->[$index] =~ /^[\*_>]$/) &&
|
|
|
|
# After it must be whitespace, the end of the text, or )}].,!?"';:-/*_.
|
|
( $index + 1 == scalar @$textBlocks || $textBlocks->[$index+1] =~ /^[ \t\n\)\]\}\.\,\!\?\"\'\;\:\-\/\*\_]/ ||
|
|
# Links also get plurals, like <link>s, <linx>es, <link>'s, and <links>'.
|
|
( $textBlocks->[$index] eq '>' && $textBlocks->[$index+1] =~ /^(?:es|s|\')/ ) ) &&
|
|
|
|
# Notes for 2.0: Include closing quotes (99) and apostrophes (9). Look into Unicode character classes as well.
|
|
|
|
# Before it must be non-whitespace.
|
|
( $index != 0 && $textBlocks->[$index-1] !~ /[ \t\n]$/ ) &&
|
|
|
|
# Make sure we don't accept >>, ->, or => as closing tags. >= is already taken care of.
|
|
( $textBlocks->[$index] ne '>' || $textBlocks->[$index-1] !~ /[>=-]$/ ) &&
|
|
|
|
# Make sure we don't accept * or _ after it unless it's >.
|
|
( $textBlocks->[$index] eq '>' || $textBlocks->[$index+1] !~ /[\*\_]$/) )
|
|
{
|
|
return POSSIBLE_CLOSING_TAG;
|
|
}
|
|
|
|
else
|
|
{
|
|
return NOT_A_TAG;
|
|
};
|
|
|
|
};
|
|
|
|
|
|
#
|
|
# Function: ClosingTag
|
|
#
|
|
# Returns whether a tag is closed or not, where it's closed if it is, and optionally whether there is any whitespace between the
|
|
# tags. Support function for <RichFormatTextBlock()>.
|
|
#
|
|
# The results of this function are in full context, meaning that if it says a tag is closed, it can be interpreted as that tag in the
|
|
# final output. It takes into account any spoiling factors, like there being two opening tags in a row.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# textBlocks - A reference to an array of text blocks.
|
|
# index - The index of the opening tag.
|
|
# hasWhitespaceRef - A reference to the variable that will hold whether there is whitespace between the tags or not. If
|
|
# undef, the function will not check. If the tag is not closed, the variable will not be changed.
|
|
#
|
|
# Returns:
|
|
#
|
|
# If the tag is closed, it returns the index of the closing tag and puts whether there was whitespace between the tags in
|
|
# hasWhitespaceRef if it was specified. If the tag is not closed, it returns -1 and doesn't touch the variable pointed to by
|
|
# hasWhitespaceRef.
|
|
#
|
|
sub ClosingTag #(textBlocks, index, hasWhitespace)
|
|
{
|
|
my ($self, $textBlocks, $index, $hasWhitespaceRef) = @_;
|
|
|
|
my $hasWhitespace;
|
|
my $closingTag;
|
|
|
|
if ($textBlocks->[$index] eq '*' || $textBlocks->[$index] eq '_')
|
|
{ $closingTag = $textBlocks->[$index]; }
|
|
elsif ($textBlocks->[$index] eq '<')
|
|
{ $closingTag = '>'; }
|
|
else
|
|
{ return -1; };
|
|
|
|
my $beginningIndex = $index;
|
|
$index++;
|
|
|
|
while ($index < scalar @$textBlocks)
|
|
{
|
|
if ($textBlocks->[$index] eq '<' && $self->TagType($textBlocks, $index) == POSSIBLE_OPENING_TAG)
|
|
{
|
|
# If we hit a < and we're checking whether a link is closed, it's not. The first < becomes literal and the second one
|
|
# becomes the new link opening.
|
|
if ($closingTag eq '>')
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
# If we're not searching for the end of a link, we have to skip the link because formatting tags cannot appear within
|
|
# them. That's of course provided it's closed.
|
|
else
|
|
{
|
|
my $linkHasWhitespace;
|
|
|
|
my $endIndex = $self->ClosingTag($textBlocks, $index,
|
|
($hasWhitespaceRef && !$hasWhitespace ? \$linkHasWhitespace : undef) );
|
|
|
|
if ($endIndex != -1)
|
|
{
|
|
if ($linkHasWhitespace)
|
|
{ $hasWhitespace = 1; };
|
|
|
|
# index will be incremented again at the end of the loop, which will bring us past the link's >.
|
|
$index = $endIndex;
|
|
};
|
|
};
|
|
}
|
|
|
|
elsif ($textBlocks->[$index] eq $closingTag)
|
|
{
|
|
my $tagType = $self->TagType($textBlocks, $index);
|
|
|
|
if ($tagType == POSSIBLE_CLOSING_TAG)
|
|
{
|
|
# There needs to be something between the tags for them to count.
|
|
if ($index == $beginningIndex + 1)
|
|
{ return -1; }
|
|
else
|
|
{
|
|
# Success!
|
|
|
|
if ($hasWhitespaceRef)
|
|
{ $$hasWhitespaceRef = $hasWhitespace; };
|
|
|
|
return $index;
|
|
};
|
|
}
|
|
|
|
# If there are two opening tags of the same type, the first becomes literal and the next becomes part of a tag.
|
|
elsif ($tagType == POSSIBLE_OPENING_TAG)
|
|
{ return -1; }
|
|
}
|
|
|
|
elsif ($hasWhitespaceRef && !$hasWhitespace)
|
|
{
|
|
if ($textBlocks->[$index] =~ /[ \t\n]/)
|
|
{ $hasWhitespace = 1; };
|
|
};
|
|
|
|
$index++;
|
|
};
|
|
|
|
# Hit the end of the text blocks if we're here.
|
|
return -1;
|
|
};
|
|
|
|
|
|
1;
|