ddnet/docs/tool/Modules/NaturalDocs/Languages/Simple.pm
2008-08-02 08:21:29 +00:00

504 lines
18 KiB
Perl

###############################################################################
#
# Class: NaturalDocs::Languages::Simple
#
###############################################################################
#
# A class containing the characteristics of a particular programming language for basic support within Natural Docs.
# Also serves as a base class for languages that break from general conventions, such as not having parameter lists use
# parenthesis and commas.
#
###############################################################################
# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
# Natural Docs is licensed under the GPL
use strict;
use integer;
package NaturalDocs::Languages::Simple;
use base 'NaturalDocs::Languages::Base';
use base 'Exporter';
our @EXPORT = ( 'ENDER_ACCEPT', 'ENDER_IGNORE', 'ENDER_ACCEPT_AND_CONTINUE', 'ENDER_REVERT_TO_ACCEPTED' );
use NaturalDocs::DefineMembers 'LINE_COMMENT_SYMBOLS', 'LineCommentSymbols()', 'SetLineCommentSymbols() duparrayref',
'BLOCK_COMMENT_SYMBOLS', 'BlockCommentSymbols()',
'SetBlockCommentSymbols() duparrayref',
'PROTOTYPE_ENDERS',
'LINE_EXTENDER', 'LineExtender()', 'SetLineExtender()',
'PACKAGE_SEPARATOR', 'PackageSeparator()',
'PACKAGE_SEPARATOR_WAS_SET', 'PackageSeparatorWasSet()',
'ENUM_VALUES', 'EnumValues()',
'ENUM_VALUES_WAS_SET', 'EnumValuesWasSet()';
#
# Function: New
#
# Creates and returns a new object.
#
# Parameters:
#
# name - The name of the language.
#
sub New #(name)
{
my ($selfPackage, $name) = @_;
my $object = $selfPackage->SUPER::New($name);
$object->[ENUM_VALUES] = ::ENUM_GLOBAL();
$object->[PACKAGE_SEPARATOR] = '.';
return $object;
};
#
# Functions: Members
#
# LineCommentSymbols - Returns an arrayref of symbols that start a line comment, or undef if none.
# SetLineCommentSymbols - Replaces the arrayref of symbols that start a line comment.
# BlockCommentSymbols - Returns an arrayref of start/end symbol pairs that specify a block comment, or undef if none. Pairs
# are specified with two consecutive array entries.
# SetBlockCommentSymbols - Replaces the arrayref of start/end symbol pairs that specify a block comment. Pairs are
# specified with two consecutive array entries.
# LineExtender - Returns the symbol to ignore a line break in languages where line breaks are significant.
# SetLineExtender - Replaces the symbol to ignore a line break in languages where line breaks are significant.
# PackageSeparator - Returns the package separator symbol.
# PackageSeparatorWasSet - Returns whether the package separator symbol was ever changed from the default.
#
#
# Function: SetPackageSeparator
# Replaces the language's package separator string.
#
sub SetPackageSeparator #(separator)
{
my ($self, $separator) = @_;
$self->[PACKAGE_SEPARATOR] = $separator;
$self->[PACKAGE_SEPARATOR_WAS_SET] = 1;
};
#
# Functions: Members
#
# EnumValues - Returns the <EnumValuesType> that describes how the language handles enums.
# EnumValuesWasSet - Returns whether <EnumValues> was ever changed from the default.
#
# Function: SetEnumValues
# Replaces the <EnumValuesType> that describes how the language handles enums.
#
sub SetEnumValues #(EnumValuesType newBehavior)
{
my ($self, $behavior) = @_;
$self->[ENUM_VALUES] = $behavior;
$self->[ENUM_VALUES_WAS_SET] = 1;
};
#
# Function: PrototypeEndersFor
#
# Returns an arrayref of prototype ender symbols for the passed <TopicType>, or undef if none.
#
sub PrototypeEndersFor #(type)
{
my ($self, $type) = @_;
if (defined $self->[PROTOTYPE_ENDERS])
{ return $self->[PROTOTYPE_ENDERS]->{$type}; }
else
{ return undef; };
};
#
# Function: SetPrototypeEndersFor
#
# Replaces the arrayref of prototype ender symbols for the passed <TopicType>.
#
sub SetPrototypeEndersFor #(type, enders)
{
my ($self, $type, $enders) = @_;
if (!defined $self->[PROTOTYPE_ENDERS])
{ $self->[PROTOTYPE_ENDERS] = { }; };
if (!defined $enders)
{ delete $self->[PROTOTYPE_ENDERS]->{$type}; }
else
{
$self->[PROTOTYPE_ENDERS]->{$type} = [ @$enders ];
};
};
###############################################################################
# Group: Parsing Functions
#
# Function: ParseFile
#
# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>
# and all other sections to <OnCode()>.
#
# Parameters:
#
# sourceFile - The <FileName> of the source file to parse.
# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
#
# Returns:
#
# Since this class cannot automatically document the code or generate a scope record, it always returns ( undef, undef ).
#
sub ParseFile #(sourceFile, topicsList)
{
my ($self, $sourceFile, $topicsList) = @_;
open(SOURCEFILEHANDLE, '<' . $sourceFile)
or die "Couldn't open input file " . $sourceFile . "\n";
my @commentLines;
my @codeLines;
my $lastCommentTopicCount = 0;
if ($self->Name() eq 'Text File')
{
my $line = <SOURCEFILEHANDLE>;
# On the very first line, remove a Unicode BOM if present. Information on it available at:
# http://www.unicode.org/faq/utf_bom.html#BOM
$line =~ s/^\xEF\xBB\xBF//;
while ($line)
{
::XChomp(\$line);
push @commentLines, $line;
$line = <SOURCEFILEHANDLE>;
};
NaturalDocs::Parser->OnComment(\@commentLines, 1);
}
else
{
my $line = <SOURCEFILEHANDLE>;
my $lineNumber = 1;
# On the very first line, remove a Unicode BOM if present. Information on it available at:
# http://www.unicode.org/faq/utf_bom.html#BOM
$line =~ s/^\xEF\xBB\xBF//;
while (defined $line)
{
::XChomp(\$line);
my $originalLine = $line;
# Retrieve single line comments. This leaves $line at the next line.
if ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()))
{
do
{
push @commentLines, $line;
$line = <SOURCEFILEHANDLE>;
if (!defined $line)
{ goto EndDo; };
::XChomp(\$line);
}
while ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()));
EndDo: # I hate Perl sometimes.
}
# Retrieve multiline comments. This leaves $line at the next line.
elsif (my $closingSymbol = $self->StripOpeningBlockSymbols(\$line, $self->BlockCommentSymbols()))
{
# Note that it is possible for a multiline comment to start correctly but not end so. We want those comments to stay in
# the code. For example, look at this prototype with this splint annotation:
#
# int get_array(integer_t id,
# /*@out@*/ array_t array);
#
# The annotation starts correctly but doesn't end so because it is followed by code on the same line.
my $lineRemainder;
for (;;)
{
$lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol);
push @commentLines, $line;
# If we found an end comment symbol...
if (defined $lineRemainder)
{ last; };
$line = <SOURCEFILEHANDLE>;
if (!defined $line)
{ last; };
::XChomp(\$line);
};
if ($lineRemainder !~ /^[ \t]*$/)
{
# If there was something past the closing symbol this wasn't an acceptable comment, so move the lines to code.
push @codeLines, @commentLines;
@commentLines = ( );
};
$line = <SOURCEFILEHANDLE>;
}
# Otherwise just add it to the code.
else
{
push @codeLines, $line;
$line = <SOURCEFILEHANDLE>;
};
# If there were comments, send them to Parser->OnComment().
if (scalar @commentLines)
{
# First process any code lines before the comment.
if (scalar @codeLines)
{
$self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
$lineNumber += scalar @codeLines;
@codeLines = ( );
};
$lastCommentTopicCount = NaturalDocs::Parser->OnComment(\@commentLines, $lineNumber);
$lineNumber += scalar @commentLines;
@commentLines = ( );
};
}; # while (defined $line)
# Clean up any remaining code.
if (scalar @codeLines)
{
$self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
@codeLines = ( );
};
};
close(SOURCEFILEHANDLE);
return ( undef, undef );
};
#
# Function: OnCode
#
# Called whenever a section of code is encountered by the parser. Is used to find the prototype of the last topic created.
#
# Parameters:
#
# codeLines - The source code as an arrayref of lines.
# codeLineNumber - The line number of the first line of code.
# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
# lastCommentTopicCount - The number of Natural Docs topics that were created by the last comment.
#
sub OnCode #(codeLines, codeLineNumber, topicList, lastCommentTopicCount)
{
my ($self, $codeLines, $codeLineNumber, $topicList, $lastCommentTopicCount) = @_;
if ($lastCommentTopicCount && defined $self->PrototypeEndersFor($topicList->[-1]->Type()))
{
my $lineIndex = 0;
my $prototype;
# Skip all blank lines before a prototype.
while ($lineIndex < scalar @$codeLines && $codeLines->[$lineIndex] =~ /^[ \t]*$/)
{ $lineIndex++; };
my @tokens;
my $tokenIndex = 0;
my @brackets;
my $enders = $self->PrototypeEndersFor($topicList->[-1]->Type());
# Add prototype lines until we reach the end of the prototype or the end of the code lines.
while ($lineIndex < scalar @$codeLines)
{
my $line = $self->RemoveLineExtender($codeLines->[$lineIndex] . "\n");
push @tokens, $line =~ /([^\(\)\[\]\{\}\<\>]+|.)/g;
while ($tokenIndex < scalar @tokens)
{
# If we're not inside brackets, check for ender symbols.
if (!scalar @brackets)
{
my $startingIndex = 0;
my $testPrototype;
for (;;)
{
my ($enderIndex, $ender) = ::FindFirstSymbol($tokens[$tokenIndex], $enders, $startingIndex);
if ($enderIndex == -1)
{ last; }
else
{
# We do this here so we don't duplicate prototype for every single token. Just the first time an ender symbol
# is found in one.
if (!defined $testPrototype)
{ $testPrototype = $prototype; };
$testPrototype .= substr($tokens[$tokenIndex], $startingIndex, $enderIndex - $startingIndex);
my $enderResult;
# If the ender is all text and the character preceding or following it is as well, ignore it.
if ($ender =~ /^[a-z0-9]+$/i &&
( ($enderIndex > 0 && substr($tokens[$tokenIndex], $enderIndex - 1, 1) =~ /^[a-z0-9_]$/i) ||
substr($tokens[$tokenIndex], $enderIndex + length($ender), 1) =~ /^[a-z0-9_]$/i ) )
{ $enderResult = ENDER_IGNORE(); }
else
{ $enderResult = $self->OnPrototypeEnd($topicList->[-1]->Type(), \$testPrototype, $ender); }
if ($enderResult == ENDER_IGNORE())
{
$testPrototype .= $ender;
$startingIndex = $enderIndex + length($ender);
}
elsif ($enderResult == ENDER_REVERT_TO_ACCEPTED())
{
return;
}
else # ENDER_ACCEPT || ENDER_ACCEPT_AND_CONTINUE
{
my $titleInPrototype = $topicList->[-1]->Title();
# Strip parenthesis so Function(2) and Function(int, int) will still match Function(anything).
$titleInPrototype =~ s/[\t ]*\([^\(]*$//;
if (index($testPrototype, $titleInPrototype) != -1)
{
$topicList->[-1]->SetPrototype( $self->NormalizePrototype($testPrototype) );
};
if ($enderResult == ENDER_ACCEPT())
{ return; }
else # ENDER_ACCEPT_AND_CONTINUE
{
$testPrototype .= $ender;
$startingIndex = $enderIndex + length($ender);
};
};
};
};
}
# If we are inside brackets, check for closing symbols.
elsif ( ($tokens[$tokenIndex] eq ')' && $brackets[-1] eq '(') ||
($tokens[$tokenIndex] eq ']' && $brackets[-1] eq '[') ||
($tokens[$tokenIndex] eq '}' && $brackets[-1] eq '{') ||
($tokens[$tokenIndex] eq '>' && $brackets[-1] eq '<') )
{
pop @brackets;
};
# Check for opening brackets.
if ($tokens[$tokenIndex] =~ /^[\(\[\{\<]$/)
{
push @brackets, $tokens[$tokenIndex];
};
$prototype .= $tokens[$tokenIndex];
$tokenIndex++;
};
$lineIndex++;
};
# If we got out of that while loop by running out of lines, there was no prototype.
};
};
use constant ENDER_ACCEPT => 1;
use constant ENDER_IGNORE => 2;
use constant ENDER_ACCEPT_AND_CONTINUE => 3;
use constant ENDER_REVERT_TO_ACCEPTED => 4;
#
# Function: OnPrototypeEnd
#
# Called whenever the end of a prototype is found so that there's a chance for derived classes to mark false positives.
#
# Parameters:
#
# type - The <TopicType> of the prototype.
# prototypeRef - A reference to the prototype so far, minus the ender in dispute.
# ender - The ender symbol.
#
# Returns:
#
# ENDER_ACCEPT - The ender is accepted and the prototype is finished.
# ENDER_IGNORE - The ender is rejected and parsing should continue. Note that the prototype will be rejected as a whole
# if all enders are ignored before reaching the end of the code.
# ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is. However, the prototype might
# also continue on so continue parsing. If there is no accepted ender between here and
# the end of the code this version will be accepted instead.
# ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed. Use the last accepted
# version and end parsing.
#
sub OnPrototypeEnd #(type, prototypeRef, ender)
{
return ENDER_ACCEPT();
};
#
# Function: RemoveLineExtender
#
# If the passed line has a line extender, returns it without the extender or the line break that follows. If it doesn't, or there are
# no line extenders defined, returns the passed line unchanged.
#
sub RemoveLineExtender #(line)
{
my ($self, $line) = @_;
if (defined $self->LineExtender())
{
my $lineExtenderIndex = rindex($line, $self->LineExtender());
if ($lineExtenderIndex != -1 &&
substr($line, $lineExtenderIndex + length($self->LineExtender())) =~ /^[ \t]*\n$/)
{
$line = substr($line, 0, $lineExtenderIndex) . ' ';
};
};
return $line;
};
1;