ddnet/docs/tool/Modules/NaturalDocs/Parser/JavaDoc.pm
2008-08-02 08:21:29 +00:00

465 lines
14 KiB
Perl

###############################################################################
#
# Package: NaturalDocs::Parser::JavaDoc
#
###############################################################################
#
# A package for translating JavaDoc topics into Natural Docs.
#
# Supported tags:
#
# - @param
# - @author
# - @deprecated
# - @code, @literal (doesn't change font)
# - @exception, @throws (doesn't link to class)
# - @link, @linkplain (doesn't change font)
# - @return, @returns
# - @see
# - @since
# - @value (shown as link instead of replacement)
# - @version
#
# Stripped tags:
#
# - @inheritDoc
# - @serial, @serialField, @serialData
# - All other block level tags.
#
# Unsupported tags:
#
# These will appear literally in the output because I cannot handle them easily.
#
# - @docRoot
# - Any other tags not mentioned
#
# Supported HTML:
#
# - p
# - b, i, u
# - pre
# - a href
# - ol, ul, li (ol gets converted to ul)
# - gt, lt, amp, quot, nbsp entities
#
# Stripped HTML:
#
# - code
# - HTML comments
#
# Unsupported HTML:
#
# These will appear literally in the output because I cannot handle them easily.
#
# - Any tags with additional properties other than a href. (ex. <p class=Something>)
# - Any other tags not mentioned
#
# Reference:
#
# http://java.sun.com/j2se/1.5.0/docs/tooldocs/windows/javadoc.html
#
###############################################################################
# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
# Natural Docs is licensed under the GPL
use strict;
use integer;
package NaturalDocs::Parser::JavaDoc;
#
# hash: blockTags
# An existence hash of the all-lowercase JavaDoc block tags, not including the @.
#
my %blockTags = ( 'param' => 1, 'author' => 1, 'deprecated' => 1, 'exception' => 1, 'return' => 1, 'see' => 1,
'serial' => 1, 'serialfield' => 1, 'serialdata' => 1, 'since' => 1, 'throws' => 1, 'version' => 1,
'returns' => 1 );
#
# hash: inlineTags
# An existence hash of the all-lowercase JavaDoc inline tags, not including the @.
#
my %inlineTags = ( 'inheritdoc' => 1, 'docroot' => 1, 'code' => 1, 'literal' => 1, 'link' => 1, 'linkplain' => 1, 'value' => 1 );
##
# Examines the comment and returns whether it is *definitely* JavaDoc content, i.e. is owned by this package.
#
# Parameters:
#
# commentLines - An arrayref of the comment lines. Must have been run through <NaturalDocs::Parser->CleanComment()>.
# isJavaDoc - Whether the comment is JavaDoc styled. This doesn't necessarily mean it has JavaDoc content.
#
# Returns:
#
# Whether the comment is *definitely* JavaDoc content.
#
sub IsMine #(string[] commentLines, bool isJavaDoc)
{
my ($self, $commentLines, $isJavaDoc) = @_;
if (!$isJavaDoc)
{ return undef; };
for (my $line = 0; $line < scalar @$commentLines; $line++)
{
if ($commentLines->[$line] =~ /^ *@([a-z]+) /i && exists $blockTags{$1} ||
$commentLines->[$line] =~ /\{@([a-z]+) /i && exists $inlineTags{$1})
{
return 1;
};
};
return 0;
};
##
# Parses the JavaDoc-syntax comment and adds it to the parsed topic list.
#
# Parameters:
#
# commentLines - An arrayref of the comment lines. Must have been run through <NaturalDocs::Parser->CleanComment()>.
# *The original memory will be changed.*
# isJavaDoc - Whether the comment is JavaDoc styled. This doesn't necessarily mean it has JavaDoc content.
# lineNumber - The line number of the first of the comment lines.
# parsedTopics - A reference to the array where any new <NaturalDocs::Parser::ParsedTopics> should be placed.
#
# Returns:
#
# The number of parsed topics added to the array, which in this case will always be one.
#
sub ParseComment #(string[] commentLines, bool isJavaDoc, int lineNumber, ParsedTopics[]* parsedTopics)
{
my ($self, $commentLines, $isJavaDoc, $lineNumber, $parsedTopics) = @_;
# Stage one: Before block level tags.
my $i = 0;
my $output;
my $unformattedText;
my $inCode;
my $sharedCodeIndent;
while ($i < scalar @$commentLines &&
!($commentLines->[$i] =~ /^ *@([a-z]+) /i && exists $blockTags{$1}) )
{
my $line = $self->ConvertAmpChars($commentLines->[$i]);
my @tokens = split(/(&lt;\/?pre&gt;)/, $line);
foreach my $token (@tokens)
{
if ($token =~ /^&lt;pre&gt;$/i)
{
if (!$inCode && $unformattedText)
{
$output .= '<p>' . $self->FormatText($unformattedText, 1) . '</p>';
};
$inCode = 1;
$unformattedText = undef;
}
elsif ($token =~ /^&lt;\/pre&gt;$/i)
{
if ($inCode && $unformattedText)
{
$unformattedText =~ s/^ {$sharedCodeIndent}//mg;
$unformattedText =~ s/\n{3,}/\n\n/g;
$unformattedText =~ s/\n+$//;
$output .= '<code>' . $unformattedText . '</code>';
$sharedCodeIndent = undef;
};
$inCode = 0;
$unformattedText = undef;
}
elsif (length($token))
{
if (!$inCode)
{
$token =~ s/^ +//;
if ($unformattedText)
{ $unformattedText .= ' '; };
}
else
{
$token =~ /^( *)/;
my $indent = length($1);
if (!defined $sharedCodeIndent || $indent < $sharedCodeIndent)
{ $sharedCodeIndent = $indent; };
};
$unformattedText .= $token;
};
};
if ($inCode && $unformattedText)
{ $unformattedText .= "\n"; };
$i++;
};
if ($unformattedText)
{
if ($inCode)
{
$unformattedText =~ s/^ {$sharedCodeIndent}//mg;
$unformattedText =~ s/\n{3,}/\n\n/g;
$unformattedText =~ s/\n+$//;
$output .= '<code>' . $unformattedText . '</code>';
}
else
{ $output .= '<p>' . $self->FormatText($unformattedText, 1) . '</p>'; };
$unformattedText = undef;
};
# Stage two: Block level tags.
my ($keyword, $value, $unformattedTextPtr, $unformattedTextCloser);
my ($params, $authors, $deprecation, $throws, $returns, $seeAlso, $since, $version);
while ($i < scalar @$commentLines)
{
my $line = $self->ConvertAmpChars($commentLines->[$i]);
$line =~ s/^ +//;
if ($line =~ /^@([a-z]+) ?(.*)$/i)
{
($keyword, $value) = (lc($1), $2);
# Process the previous one, if any.
if ($unformattedText)
{
$$unformattedTextPtr .= $self->FormatText($unformattedText) . $unformattedTextCloser;
$unformattedText = undef;
};
if ($keyword eq 'param')
{
$value =~ /^([a-z0-9_]+) *(.*)$/i;
$params .= '<de>' . $1 . '</de><dd>';
$unformattedText = $2;
$unformattedTextPtr = \$params;
$unformattedTextCloser = '</dd>';
}
elsif ($keyword eq 'exception' || $keyword eq 'throws')
{
$value =~ /^([a-z0-9_]+) *(.*)$/i;
$throws .= '<de>' . $1 . '</de><dd>';
$unformattedText = $2;
$unformattedTextPtr = \$throws;
$unformattedTextCloser = '</dd>';
}
elsif ($keyword eq 'return' || $keyword eq 'returns')
{
if ($returns)
{ $returns .= ' '; };
$unformattedText = $value;
$unformattedTextPtr = \$returns;
$unformattedTextCloser = undef;
}
elsif ($keyword eq 'author')
{
if ($authors)
{ $authors .= ', '; };
$unformattedText = $value;
$unformattedTextPtr = \$authors;
$unformattedTextCloser = undef;
}
elsif ($keyword eq 'deprecated')
{
if ($deprecation)
{ $deprecation .= ' '; };
$unformattedText = $value;
$unformattedTextPtr = \$deprecation;
$unformattedTextCloser = undef;
}
elsif ($keyword eq 'since')
{
if ($since)
{ $since .= ', '; };
$unformattedText = $value;
$unformattedTextPtr = \$since;
$unformattedTextCloser = undef;
}
elsif ($keyword eq 'version')
{
if ($version)
{ $version .= ', '; };
$unformattedText = $value;
$unformattedTextPtr = \$version;
$unformattedTextCloser = undef;
}
elsif ($keyword eq 'see')
{
if ($seeAlso)
{ $seeAlso .= ', '; };
$unformattedText = undef;
if ($value =~ /^&(?:quot|lt);/i)
{ $seeAlso .= $self->FormatText($value); }
else
{ $seeAlso .= $self->ConvertLink($value); };
};
# Everything else will be skipped.
}
elsif ($unformattedText)
{
$unformattedText .= ' ' . $line;
};
$i++;
};
if ($unformattedText)
{
$$unformattedTextPtr .= $self->FormatText($unformattedText) . $unformattedTextCloser;
$unformattedText = undef;
};
if ($params)
{ $output .= '<h>Parameters</h><dl>' . $params . '</dl>'; };
if ($returns)
{ $output .= '<h>Returns</h><p>' . $returns . '</p>'; };
if ($throws)
{ $output .= '<h>Throws</h><dl>' . $throws . '</dl>'; };
if ($since)
{ $output .= '<h>Since</h><p>' . $since . '</p>'; };
if ($version)
{ $output .= '<h>Version</h><p>' . $version . '</p>'; };
if ($deprecation)
{ $output .= '<h>Deprecated</h><p>' . $deprecation . '</p>'; };
if ($authors)
{ $output .= '<h>Author</h><p>' . $authors . '</p>'; };
if ($seeAlso)
{ $output .= '<h>See Also</h><p>' . $seeAlso . '</p>'; };
# Stage three: Build the parsed topic.
my $summary = NaturalDocs::Parser->GetSummaryFromBody($output);
push @$parsedTopics, NaturalDocs::Parser::ParsedTopic->New(undef, undef, undef, undef, undef, $summary,
$output, $lineNumber, undef);
return 1;
};
##
# Translates any inline tags or HTML codes to <NDMarkup> and returns it.
#
sub FormatText #(string text, bool inParagraph)
{
my ($self, $text, $inParagraph) = @_;
# JavaDoc Literal
$text =~ s/\{\@(?:code|literal) ([^\}]*)\}/$self->ConvertAmpChars($1)/gie;
# HTML
$text =~ s/&lt;b&gt;(.*?)&lt;\/b&gt;/<b>$1<\/b>/gi;
$text =~ s/&lt;i&gt;(.*?)&lt;\/i&gt;/<i>$1<\/i>/gi;
$text =~ s/&lt;u&gt;(.*?)&lt;\/u&gt;/<u>$1<\/u>/gi;
$text =~ s/&lt;code&gt;(.*?)&lt;\/code&gt;/$1/gi;
$text =~ s/&lt;ul.*?&gt;(.*?)&lt;\/ul&gt;/<ul>$1<\/ul>/gi;
$text =~ s/&lt;ol.*?&gt;(.*?)&lt;\/ol&gt;/<ul>$1<\/ul>/gi;
$text =~ s/&lt;li.*?&gt;(.*?)&lt;\/li&gt;/<li>$1<\/li>/gi;
$text =~ s/&lt;!--.*?--&gt;//gi;
$text =~ s/&lt;\/p&gt;//gi;
$text =~ s/^&lt;p&gt;//i;
if ($inParagraph)
{ $text =~ s/&lt;p&gt;/<\/p><p>/gi; }
else
{ $text =~ s/&lt;p&gt;//gi; };
$text =~ s/&lt;a href=&quot;mailto:(.*?)&quot;.*?&gt;(.*?)&lt;\/a&gt;/$self->MakeEMailLink($1, $2)/gie;
$text =~ s/&lt;a href=&quot;(.*?)&quot;.*?&gt;(.*?)&lt;\/a&gt;/$self->MakeURLLink($1, $2)/gie;
$text =~ s/&amp;nbsp;/ /gi;
$text =~ s/&amp;amp;/&amp;/gi;
$text =~ s/&amp;gt;/&gt;/gi;
$text =~ s/&amp;lt;/&lt;/gi;
$text =~ s/&amp;quot;/&quot;/gi;
# JavaDoc
$text =~ s/\{\@inheritdoc\}//gi;
$text =~ s/\{\@(?:linkplain|link|value) ([^\}]*)\}/$self->ConvertLink($1)/gie;
return $text;
};
sub ConvertAmpChars #(text)
{
my ($self, $text) = @_;
$text =~ s/&/&amp;/g;
$text =~ s/</&lt;/g;
$text =~ s/>/&gt;/g;
$text =~ s/"/&quot;/g;
return $text;
};
sub ConvertLink #(text)
{
my ($self, $text) = @_;
$text =~ /^ *([a-z0-9\_\.\:\#]+(?:\([^\)]*\))?) *(.*)$/i;
my ($target, $label) = ($1, $2);
# Convert the anchor to part of the link, but remove it altogether if it's the beginning of the link.
$target =~ s/^\#//;
$target =~ s/\#/\./;
$label =~ s/ +$//;
if (!length $label)
{ return '<link target="' . $target . '" name="' . $target . '" original="' . $target . '">'; }
else
{ return '<link target="' . $target . '" name="' . $label . '" original="' . $label . ' (' . $target . ')">'; };
};
sub MakeURLLink #(target, text)
{
my ($self, $target, $text) = @_;
return '<url target="' . $target . '" name="' . $text . '">';
};
sub MakeEMailLink #(target, text)
{
my ($self, $target, $text) = @_;
return '<email target="' . $target . '" name="' . $text . '">';
};
1;