mirror of
https://github.com/ddnet/ddnet.git
synced 2024-11-10 10:08:18 +00:00
504 lines
18 KiB
Perl
504 lines
18 KiB
Perl
###############################################################################
|
|
#
|
|
# Class: NaturalDocs::Languages::Simple
|
|
#
|
|
###############################################################################
|
|
#
|
|
# A class containing the characteristics of a particular programming language for basic support within Natural Docs.
|
|
# Also serves as a base class for languages that break from general conventions, such as not having parameter lists use
|
|
# parenthesis and commas.
|
|
#
|
|
###############################################################################
|
|
|
|
# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
|
|
# Natural Docs is licensed under the GPL
|
|
|
|
use strict;
|
|
use integer;
|
|
|
|
package NaturalDocs::Languages::Simple;
|
|
|
|
use base 'NaturalDocs::Languages::Base';
|
|
use base 'Exporter';
|
|
|
|
our @EXPORT = ( 'ENDER_ACCEPT', 'ENDER_IGNORE', 'ENDER_ACCEPT_AND_CONTINUE', 'ENDER_REVERT_TO_ACCEPTED' );
|
|
|
|
|
|
use NaturalDocs::DefineMembers 'LINE_COMMENT_SYMBOLS', 'LineCommentSymbols()', 'SetLineCommentSymbols() duparrayref',
|
|
'BLOCK_COMMENT_SYMBOLS', 'BlockCommentSymbols()',
|
|
'SetBlockCommentSymbols() duparrayref',
|
|
'PROTOTYPE_ENDERS',
|
|
'LINE_EXTENDER', 'LineExtender()', 'SetLineExtender()',
|
|
'PACKAGE_SEPARATOR', 'PackageSeparator()',
|
|
'PACKAGE_SEPARATOR_WAS_SET', 'PackageSeparatorWasSet()',
|
|
'ENUM_VALUES', 'EnumValues()',
|
|
'ENUM_VALUES_WAS_SET', 'EnumValuesWasSet()';
|
|
|
|
#
|
|
# Function: New
|
|
#
|
|
# Creates and returns a new object.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# name - The name of the language.
|
|
#
|
|
sub New #(name)
|
|
{
|
|
my ($selfPackage, $name) = @_;
|
|
|
|
my $object = $selfPackage->SUPER::New($name);
|
|
|
|
$object->[ENUM_VALUES] = ::ENUM_GLOBAL();
|
|
$object->[PACKAGE_SEPARATOR] = '.';
|
|
|
|
return $object;
|
|
};
|
|
|
|
|
|
#
|
|
# Functions: Members
|
|
#
|
|
# LineCommentSymbols - Returns an arrayref of symbols that start a line comment, or undef if none.
|
|
# SetLineCommentSymbols - Replaces the arrayref of symbols that start a line comment.
|
|
# BlockCommentSymbols - Returns an arrayref of start/end symbol pairs that specify a block comment, or undef if none. Pairs
|
|
# are specified with two consecutive array entries.
|
|
# SetBlockCommentSymbols - Replaces the arrayref of start/end symbol pairs that specify a block comment. Pairs are
|
|
# specified with two consecutive array entries.
|
|
# LineExtender - Returns the symbol to ignore a line break in languages where line breaks are significant.
|
|
# SetLineExtender - Replaces the symbol to ignore a line break in languages where line breaks are significant.
|
|
# PackageSeparator - Returns the package separator symbol.
|
|
# PackageSeparatorWasSet - Returns whether the package separator symbol was ever changed from the default.
|
|
#
|
|
|
|
#
|
|
# Function: SetPackageSeparator
|
|
# Replaces the language's package separator string.
|
|
#
|
|
sub SetPackageSeparator #(separator)
|
|
{
|
|
my ($self, $separator) = @_;
|
|
$self->[PACKAGE_SEPARATOR] = $separator;
|
|
$self->[PACKAGE_SEPARATOR_WAS_SET] = 1;
|
|
};
|
|
|
|
|
|
#
|
|
# Functions: Members
|
|
#
|
|
# EnumValues - Returns the <EnumValuesType> that describes how the language handles enums.
|
|
# EnumValuesWasSet - Returns whether <EnumValues> was ever changed from the default.
|
|
|
|
|
|
#
|
|
# Function: SetEnumValues
|
|
# Replaces the <EnumValuesType> that describes how the language handles enums.
|
|
#
|
|
sub SetEnumValues #(EnumValuesType newBehavior)
|
|
{
|
|
my ($self, $behavior) = @_;
|
|
$self->[ENUM_VALUES] = $behavior;
|
|
$self->[ENUM_VALUES_WAS_SET] = 1;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: PrototypeEndersFor
|
|
#
|
|
# Returns an arrayref of prototype ender symbols for the passed <TopicType>, or undef if none.
|
|
#
|
|
sub PrototypeEndersFor #(type)
|
|
{
|
|
my ($self, $type) = @_;
|
|
|
|
if (defined $self->[PROTOTYPE_ENDERS])
|
|
{ return $self->[PROTOTYPE_ENDERS]->{$type}; }
|
|
else
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: SetPrototypeEndersFor
|
|
#
|
|
# Replaces the arrayref of prototype ender symbols for the passed <TopicType>.
|
|
#
|
|
sub SetPrototypeEndersFor #(type, enders)
|
|
{
|
|
my ($self, $type, $enders) = @_;
|
|
|
|
if (!defined $self->[PROTOTYPE_ENDERS])
|
|
{ $self->[PROTOTYPE_ENDERS] = { }; };
|
|
|
|
if (!defined $enders)
|
|
{ delete $self->[PROTOTYPE_ENDERS]->{$type}; }
|
|
else
|
|
{
|
|
$self->[PROTOTYPE_ENDERS]->{$type} = [ @$enders ];
|
|
};
|
|
};
|
|
|
|
|
|
|
|
|
|
###############################################################################
|
|
# Group: Parsing Functions
|
|
|
|
|
|
#
|
|
# Function: ParseFile
|
|
#
|
|
# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>
|
|
# and all other sections to <OnCode()>.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# sourceFile - The <FileName> of the source file to parse.
|
|
# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
|
|
#
|
|
# Returns:
|
|
#
|
|
# Since this class cannot automatically document the code or generate a scope record, it always returns ( undef, undef ).
|
|
#
|
|
sub ParseFile #(sourceFile, topicsList)
|
|
{
|
|
my ($self, $sourceFile, $topicsList) = @_;
|
|
|
|
open(SOURCEFILEHANDLE, '<' . $sourceFile)
|
|
or die "Couldn't open input file " . $sourceFile . "\n";
|
|
|
|
my @commentLines;
|
|
my @codeLines;
|
|
my $lastCommentTopicCount = 0;
|
|
|
|
if ($self->Name() eq 'Text File')
|
|
{
|
|
my $line = <SOURCEFILEHANDLE>;
|
|
|
|
# On the very first line, remove a Unicode BOM if present. Information on it available at:
|
|
# http://www.unicode.org/faq/utf_bom.html#BOM
|
|
$line =~ s/^\xEF\xBB\xBF//;
|
|
|
|
while ($line)
|
|
{
|
|
::XChomp(\$line);
|
|
push @commentLines, $line;
|
|
$line = <SOURCEFILEHANDLE>;
|
|
};
|
|
|
|
NaturalDocs::Parser->OnComment(\@commentLines, 1);
|
|
}
|
|
|
|
else
|
|
{
|
|
my $line = <SOURCEFILEHANDLE>;
|
|
my $lineNumber = 1;
|
|
|
|
# On the very first line, remove a Unicode BOM if present. Information on it available at:
|
|
# http://www.unicode.org/faq/utf_bom.html#BOM
|
|
$line =~ s/^\xEF\xBB\xBF//;
|
|
|
|
while (defined $line)
|
|
{
|
|
::XChomp(\$line);
|
|
my $originalLine = $line;
|
|
|
|
|
|
# Retrieve single line comments. This leaves $line at the next line.
|
|
|
|
if ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()))
|
|
{
|
|
do
|
|
{
|
|
push @commentLines, $line;
|
|
$line = <SOURCEFILEHANDLE>;
|
|
|
|
if (!defined $line)
|
|
{ goto EndDo; };
|
|
|
|
::XChomp(\$line);
|
|
}
|
|
while ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()));
|
|
|
|
EndDo: # I hate Perl sometimes.
|
|
}
|
|
|
|
|
|
# Retrieve multiline comments. This leaves $line at the next line.
|
|
|
|
elsif (my $closingSymbol = $self->StripOpeningBlockSymbols(\$line, $self->BlockCommentSymbols()))
|
|
{
|
|
# Note that it is possible for a multiline comment to start correctly but not end so. We want those comments to stay in
|
|
# the code. For example, look at this prototype with this splint annotation:
|
|
#
|
|
# int get_array(integer_t id,
|
|
# /*@out@*/ array_t array);
|
|
#
|
|
# The annotation starts correctly but doesn't end so because it is followed by code on the same line.
|
|
|
|
my $lineRemainder;
|
|
|
|
for (;;)
|
|
{
|
|
$lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol);
|
|
|
|
push @commentLines, $line;
|
|
|
|
# If we found an end comment symbol...
|
|
if (defined $lineRemainder)
|
|
{ last; };
|
|
|
|
$line = <SOURCEFILEHANDLE>;
|
|
|
|
if (!defined $line)
|
|
{ last; };
|
|
|
|
::XChomp(\$line);
|
|
};
|
|
|
|
if ($lineRemainder !~ /^[ \t]*$/)
|
|
{
|
|
# If there was something past the closing symbol this wasn't an acceptable comment, so move the lines to code.
|
|
push @codeLines, @commentLines;
|
|
@commentLines = ( );
|
|
};
|
|
|
|
$line = <SOURCEFILEHANDLE>;
|
|
}
|
|
|
|
|
|
# Otherwise just add it to the code.
|
|
|
|
else
|
|
{
|
|
push @codeLines, $line;
|
|
$line = <SOURCEFILEHANDLE>;
|
|
};
|
|
|
|
|
|
# If there were comments, send them to Parser->OnComment().
|
|
|
|
if (scalar @commentLines)
|
|
{
|
|
# First process any code lines before the comment.
|
|
if (scalar @codeLines)
|
|
{
|
|
$self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
|
|
$lineNumber += scalar @codeLines;
|
|
@codeLines = ( );
|
|
};
|
|
|
|
$lastCommentTopicCount = NaturalDocs::Parser->OnComment(\@commentLines, $lineNumber);
|
|
$lineNumber += scalar @commentLines;
|
|
@commentLines = ( );
|
|
};
|
|
|
|
}; # while (defined $line)
|
|
|
|
|
|
# Clean up any remaining code.
|
|
if (scalar @codeLines)
|
|
{
|
|
$self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
|
|
@codeLines = ( );
|
|
};
|
|
|
|
};
|
|
|
|
close(SOURCEFILEHANDLE);
|
|
|
|
return ( undef, undef );
|
|
};
|
|
|
|
|
|
#
|
|
# Function: OnCode
|
|
#
|
|
# Called whenever a section of code is encountered by the parser. Is used to find the prototype of the last topic created.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# codeLines - The source code as an arrayref of lines.
|
|
# codeLineNumber - The line number of the first line of code.
|
|
# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
|
|
# lastCommentTopicCount - The number of Natural Docs topics that were created by the last comment.
|
|
#
|
|
sub OnCode #(codeLines, codeLineNumber, topicList, lastCommentTopicCount)
|
|
{
|
|
my ($self, $codeLines, $codeLineNumber, $topicList, $lastCommentTopicCount) = @_;
|
|
|
|
if ($lastCommentTopicCount && defined $self->PrototypeEndersFor($topicList->[-1]->Type()))
|
|
{
|
|
my $lineIndex = 0;
|
|
my $prototype;
|
|
|
|
# Skip all blank lines before a prototype.
|
|
while ($lineIndex < scalar @$codeLines && $codeLines->[$lineIndex] =~ /^[ \t]*$/)
|
|
{ $lineIndex++; };
|
|
|
|
my @tokens;
|
|
my $tokenIndex = 0;
|
|
|
|
my @brackets;
|
|
my $enders = $self->PrototypeEndersFor($topicList->[-1]->Type());
|
|
|
|
# Add prototype lines until we reach the end of the prototype or the end of the code lines.
|
|
while ($lineIndex < scalar @$codeLines)
|
|
{
|
|
my $line = $self->RemoveLineExtender($codeLines->[$lineIndex] . "\n");
|
|
|
|
push @tokens, $line =~ /([^\(\)\[\]\{\}\<\>]+|.)/g;
|
|
|
|
while ($tokenIndex < scalar @tokens)
|
|
{
|
|
# If we're not inside brackets, check for ender symbols.
|
|
if (!scalar @brackets)
|
|
{
|
|
my $startingIndex = 0;
|
|
my $testPrototype;
|
|
|
|
for (;;)
|
|
{
|
|
my ($enderIndex, $ender) = ::FindFirstSymbol($tokens[$tokenIndex], $enders, $startingIndex);
|
|
|
|
if ($enderIndex == -1)
|
|
{ last; }
|
|
else
|
|
{
|
|
# We do this here so we don't duplicate prototype for every single token. Just the first time an ender symbol
|
|
# is found in one.
|
|
if (!defined $testPrototype)
|
|
{ $testPrototype = $prototype; };
|
|
|
|
$testPrototype .= substr($tokens[$tokenIndex], $startingIndex, $enderIndex - $startingIndex);
|
|
|
|
my $enderResult;
|
|
|
|
# If the ender is all text and the character preceding or following it is as well, ignore it.
|
|
if ($ender =~ /^[a-z0-9]+$/i &&
|
|
( ($enderIndex > 0 && substr($tokens[$tokenIndex], $enderIndex - 1, 1) =~ /^[a-z0-9_]$/i) ||
|
|
substr($tokens[$tokenIndex], $enderIndex + length($ender), 1) =~ /^[a-z0-9_]$/i ) )
|
|
{ $enderResult = ENDER_IGNORE(); }
|
|
else
|
|
{ $enderResult = $self->OnPrototypeEnd($topicList->[-1]->Type(), \$testPrototype, $ender); }
|
|
|
|
if ($enderResult == ENDER_IGNORE())
|
|
{
|
|
$testPrototype .= $ender;
|
|
$startingIndex = $enderIndex + length($ender);
|
|
}
|
|
elsif ($enderResult == ENDER_REVERT_TO_ACCEPTED())
|
|
{
|
|
return;
|
|
}
|
|
else # ENDER_ACCEPT || ENDER_ACCEPT_AND_CONTINUE
|
|
{
|
|
my $titleInPrototype = $topicList->[-1]->Title();
|
|
|
|
# Strip parenthesis so Function(2) and Function(int, int) will still match Function(anything).
|
|
$titleInPrototype =~ s/[\t ]*\([^\(]*$//;
|
|
|
|
if (index($testPrototype, $titleInPrototype) != -1)
|
|
{
|
|
$topicList->[-1]->SetPrototype( $self->NormalizePrototype($testPrototype) );
|
|
};
|
|
|
|
if ($enderResult == ENDER_ACCEPT())
|
|
{ return; }
|
|
else # ENDER_ACCEPT_AND_CONTINUE
|
|
{
|
|
$testPrototype .= $ender;
|
|
$startingIndex = $enderIndex + length($ender);
|
|
};
|
|
};
|
|
};
|
|
};
|
|
}
|
|
|
|
# If we are inside brackets, check for closing symbols.
|
|
elsif ( ($tokens[$tokenIndex] eq ')' && $brackets[-1] eq '(') ||
|
|
($tokens[$tokenIndex] eq ']' && $brackets[-1] eq '[') ||
|
|
($tokens[$tokenIndex] eq '}' && $brackets[-1] eq '{') ||
|
|
($tokens[$tokenIndex] eq '>' && $brackets[-1] eq '<') )
|
|
{
|
|
pop @brackets;
|
|
};
|
|
|
|
# Check for opening brackets.
|
|
if ($tokens[$tokenIndex] =~ /^[\(\[\{\<]$/)
|
|
{
|
|
push @brackets, $tokens[$tokenIndex];
|
|
};
|
|
|
|
$prototype .= $tokens[$tokenIndex];
|
|
$tokenIndex++;
|
|
};
|
|
|
|
$lineIndex++;
|
|
};
|
|
|
|
# If we got out of that while loop by running out of lines, there was no prototype.
|
|
};
|
|
};
|
|
|
|
|
|
use constant ENDER_ACCEPT => 1;
|
|
use constant ENDER_IGNORE => 2;
|
|
use constant ENDER_ACCEPT_AND_CONTINUE => 3;
|
|
use constant ENDER_REVERT_TO_ACCEPTED => 4;
|
|
|
|
#
|
|
# Function: OnPrototypeEnd
|
|
#
|
|
# Called whenever the end of a prototype is found so that there's a chance for derived classes to mark false positives.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# type - The <TopicType> of the prototype.
|
|
# prototypeRef - A reference to the prototype so far, minus the ender in dispute.
|
|
# ender - The ender symbol.
|
|
#
|
|
# Returns:
|
|
#
|
|
# ENDER_ACCEPT - The ender is accepted and the prototype is finished.
|
|
# ENDER_IGNORE - The ender is rejected and parsing should continue. Note that the prototype will be rejected as a whole
|
|
# if all enders are ignored before reaching the end of the code.
|
|
# ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is. However, the prototype might
|
|
# also continue on so continue parsing. If there is no accepted ender between here and
|
|
# the end of the code this version will be accepted instead.
|
|
# ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed. Use the last accepted
|
|
# version and end parsing.
|
|
#
|
|
sub OnPrototypeEnd #(type, prototypeRef, ender)
|
|
{
|
|
return ENDER_ACCEPT();
|
|
};
|
|
|
|
|
|
#
|
|
# Function: RemoveLineExtender
|
|
#
|
|
# If the passed line has a line extender, returns it without the extender or the line break that follows. If it doesn't, or there are
|
|
# no line extenders defined, returns the passed line unchanged.
|
|
#
|
|
sub RemoveLineExtender #(line)
|
|
{
|
|
my ($self, $line) = @_;
|
|
|
|
if (defined $self->LineExtender())
|
|
{
|
|
my $lineExtenderIndex = rindex($line, $self->LineExtender());
|
|
|
|
if ($lineExtenderIndex != -1 &&
|
|
substr($line, $lineExtenderIndex + length($self->LineExtender())) =~ /^[ \t]*\n$/)
|
|
{
|
|
$line = substr($line, 0, $lineExtenderIndex) . ' ';
|
|
};
|
|
};
|
|
|
|
return $line;
|
|
};
|
|
|
|
|
|
1;
|