ddnet/docs/tool/Modules/NaturalDocs/Languages/Base.pm
2008-08-02 08:21:29 +00:00

833 lines
22 KiB
Perl

###############################################################################
#
# Class: NaturalDocs::Languages::Base
#
###############################################################################
#
# A base class for all programming language parsers.
#
###############################################################################
# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
# Natural Docs is licensed under the GPL
use strict;
use integer;
package NaturalDocs::Languages::Base;
use NaturalDocs::DefineMembers 'NAME', 'Name()',
'EXTENSIONS', 'Extensions()', 'SetExtensions() duparrayref',
'SHEBANG_STRINGS', 'ShebangStrings()', 'SetShebangStrings() duparrayref',
'IGNORED_PREFIXES',
'ENUM_VALUES';
use base 'Exporter';
our @EXPORT = ('ENUM_GLOBAL', 'ENUM_UNDER_TYPE', 'ENUM_UNDER_PARENT');
#
# Constants: EnumValuesType
#
# How enum values are handled in the language.
#
# ENUM_GLOBAL - Values are always global and thus 'value'.
# ENUM_UNDER_TYPE - Values are under the type in the hierarchy, and thus 'package.enum.value'.
# ENUM_UNDER_PARENT - Values are under the parent in the hierarchy, putting them on the same level as the enum itself. Thus
# 'package.value'.
#
use constant ENUM_GLOBAL => 1;
use constant ENUM_UNDER_TYPE => 2;
use constant ENUM_UNDER_PARENT => 3;
#
# Handle: SOURCEFILEHANDLE
#
# The handle of the source file currently being parsed.
#
#
# Function: New
#
# Creates and returns a new object.
#
# Parameters:
#
# name - The name of the language.
#
sub New #(name)
{
my ($selfPackage, $name) = @_;
my $object = [ ];
$object->[NAME] = $name;
bless $object, $selfPackage;
return $object;
};
#
# Functions: Members
#
# Name - Returns the language's name.
# Extensions - Returns an arrayref of the language's file extensions, or undef if none.
# SetExtensions - Replaces the arrayref of the language's file extensions.
# ShebangStrings - Returns an arrayref of the language's shebang strings, or undef if none.
# SetShebangStrings - Replaces the arrayref of the language's shebang strings.
#
#
# Function: PackageSeparator
# Returns the language's package separator string.
#
sub PackageSeparator
{ return '.'; };
#
# Function: PackageSeparatorWasSet
# Returns whether the language's package separator string was ever changed from the default.
#
sub PackageSeparatorWasSet
{ return 0; };
#
# Function: EnumValues
# Returns the <EnumValuesType> that describes how the language handles enums.
#
sub EnumValues
{ return ENUM_GLOBAL; };
#
# Function: IgnoredPrefixesFor
#
# Returns an arrayref of ignored prefixes for the passed <TopicType>, or undef if none. The array is sorted so that the longest
# prefixes are first.
#
sub IgnoredPrefixesFor #(type)
{
my ($self, $type) = @_;
if (defined $self->[IGNORED_PREFIXES])
{ return $self->[IGNORED_PREFIXES]->{$type}; }
else
{ return undef; };
};
#
# Function: SetIgnoredPrefixesFor
#
# Replaces the arrayref of ignored prefixes for the passed <TopicType>.
#
sub SetIgnoredPrefixesFor #(type, prefixes)
{
my ($self, $type, $prefixesRef) = @_;
if (!defined $self->[IGNORED_PREFIXES])
{ $self->[IGNORED_PREFIXES] = { }; };
if (!defined $prefixesRef)
{ delete $self->[IGNORED_PREFIXES]->{$type}; }
else
{
my $prefixes = [ @$prefixesRef ];
# Sort prefixes to be longest to shortest.
@$prefixes = sort { length $b <=> length $a } @$prefixes;
$self->[IGNORED_PREFIXES]->{$type} = $prefixes;
};
};
#
# Function: HasIgnoredPrefixes
#
# Returns whether the language has any ignored prefixes at all.
#
sub HasIgnoredPrefixes
{ return defined $_[0]->[IGNORED_PREFIXES]; };
#
# Function: CopyIgnoredPrefixesOf
#
# Copies all the ignored prefix settings of the passed <NaturalDocs::Languages::Base> object.
#
sub CopyIgnoredPrefixesOf #(language)
{
my ($self, $language) = @_;
if ($language->HasIgnoredPrefixes())
{
$self->[IGNORED_PREFIXES] = { };
while (my ($topicType, $prefixes) = each %{$language->[IGNORED_PREFIXES]})
{
$self->[IGNORED_PREFIXES]->{$topicType} = [ @$prefixes ];
};
};
};
###############################################################################
# Group: Parsing Functions
#
# Function: ParseFile
#
# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>.
# This *must* be defined by a subclass.
#
# Parameters:
#
# sourceFile - The <FileName> of the source file to parse.
# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
#
# Returns:
#
# The array ( autoTopics, scopeRecord ).
#
# autoTopics - An arrayref of automatically generated <NaturalDocs::Parser::ParsedTopics> from the file, or undef if none.
# scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none.
#
#
# Function: ParsePrototype
#
# Parses the prototype and returns it as a <NaturalDocs::Languages::Prototype> object.
#
# Parameters:
#
# type - The <TopicType>.
# prototype - The text prototype.
#
# Returns:
#
# A <NaturalDocs::Languages::Prototype> object.
#
sub ParsePrototype #(type, prototype)
{
my ($self, $type, $prototype) = @_;
my $isClass = NaturalDocs::Topics->TypeInfo($type)->ClassHierarchy();
if ($prototype !~ /\(.*[^ ].*\)/ && (!$isClass || $prototype !~ /\{.*[^ ].*\}/))
{
my $object = NaturalDocs::Languages::Prototype->New($prototype);
return $object;
};
# Parse the parameters out of the prototype.
my @tokens = $prototype =~ /([^\(\)\[\]\{\}\<\>\'\"\,\;]+|.)/g;
my $parameter;
my @parameterLines;
my @symbolStack;
my $finishedParameters;
my ($beforeParameters, $afterParameters);
foreach my $token (@tokens)
{
if ($finishedParameters)
{ $afterParameters .= $token; }
elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"')
{
if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{'))
{ $parameter .= $token; }
else
{ $beforeParameters .= $token; };
if ($token eq $symbolStack[-1])
{ pop @symbolStack; };
}
elsif ($token =~ /^[\(\[\{\<\'\"]$/)
{
if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{'))
{ $parameter .= $token; }
else
{ $beforeParameters .= $token; };
push @symbolStack, $token;
}
elsif ( ($token eq ')' && $symbolStack[-1] eq '(') ||
($token eq ']' && $symbolStack[-1] eq '[') ||
($token eq '}' && $symbolStack[-1] eq '{') ||
($token eq '>' && $symbolStack[-1] eq '<') )
{
if ($symbolStack[0] eq '(')
{
if ($token eq ')' && scalar @symbolStack == 1)
{
if ($parameter ne ' ')
{ push @parameterLines, $parameter; };
$finishedParameters = 1;
$afterParameters .= $token;
}
else
{ $parameter .= $token; };
}
elsif ($isClass && $symbolStack[0] eq '{')
{
if ($token eq '}' && scalar @symbolStack == 1)
{
if ($parameter ne ' ')
{ push @parameterLines, $parameter; };
$finishedParameters = 1;
$afterParameters .= $token;
}
else
{ $parameter .= $token; };
}
else
{
$beforeParameters .= $token;
};
pop @symbolStack;
}
elsif ($token eq ',' || $token eq ';')
{
if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{'))
{
if (scalar @symbolStack == 1)
{
push @parameterLines, $parameter . $token;
$parameter = undef;
}
else
{
$parameter .= $token;
};
}
else
{
$beforeParameters .= $token;
};
}
else
{
if ($symbolStack[0] eq '(' || ($isClass && $symbolStack[0] eq '{'))
{ $parameter .= $token; }
else
{ $beforeParameters .= $token; };
};
};
foreach my $part (\$beforeParameters, \$afterParameters)
{
$$part =~ s/^ //;
$$part =~ s/ $//;
};
my $prototypeObject = NaturalDocs::Languages::Prototype->New($beforeParameters, $afterParameters);
# Parse the actual parameters.
foreach my $parameterLine (@parameterLines)
{
$prototypeObject->AddParameter( $self->ParseParameterLine($parameterLine) );
};
return $prototypeObject;
};
#
# Function: ParseParameterLine
#
# Parses a prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object.
#
# This vesion assumes a C++ style line. If you need a Pascal style line, override this function to forward to
# <ParsePascalParameterLine()>.
#
# > Function(parameter, type parameter, type parameter = value);
#
sub ParseParameterLine #(line)
{
my ($self, $line) = @_;
$line =~ s/^ //;
$line =~ s/ $//;
my @tokens = $line =~ /([^ \(\)\{\}\[\]\<\>\'\"\=]+|.)/g;
my @symbolStack;
my @parameterWords = ( undef );
my ($defaultValue, $defaultValuePrefix, $inDefaultValue);
foreach my $token (@tokens)
{
if ($inDefaultValue)
{ $defaultValue .= $token; }
elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"')
{
$parameterWords[-1] .= $token;
if ($token eq $symbolStack[-1])
{ pop @symbolStack; };
}
elsif ($token =~ /^[\(\[\{\<\'\"]$/)
{
push @symbolStack, $token;
$parameterWords[-1] .= $token;
}
elsif ( ($token eq ')' && $symbolStack[-1] eq '(') ||
($token eq ']' && $symbolStack[-1] eq '[') ||
($token eq '}' && $symbolStack[-1] eq '{') ||
($token eq '>' && $symbolStack[-1] eq '<') )
{
pop @symbolStack;
$parameterWords[-1] .= $token;
}
elsif ($token eq ' ')
{
if (!scalar @symbolStack)
{ push @parameterWords, undef; }
else
{ $parameterWords[-1] .= $token; };
}
elsif ($token eq '=')
{
if (!scalar @symbolStack)
{
$defaultValuePrefix = $token;
$inDefaultValue = 1;
}
else
{ $parameterWords[-1] .= $token; };
}
else
{
$parameterWords[-1] .= $token;
};
};
my ($name, $namePrefix, $type, $typePrefix);
if (!$parameterWords[-1])
{ pop @parameterWords; };
$name = pop @parameterWords;
if ($parameterWords[-1]=~ /([\*\&]+)$/)
{
$namePrefix = $1;
$parameterWords[-1] = substr($parameterWords[-1], 0, 0 - length($namePrefix));
$parameterWords[-1] =~ s/ $//;
if (!$parameterWords[-1])
{ pop @parameterWords; };
}
elsif ($name =~ /^([\*\&]+)/)
{
$namePrefix = $1;
$name = substr($name, length($namePrefix));
$name =~ s/^ //;
};
$type = pop @parameterWords;
$typePrefix = join(' ', @parameterWords);
if ($typePrefix)
{ $typePrefix .= ' '; };
if ($type =~ /^([a-z0-9_\:\.]+(?:\.|\:\:))[a-z0-9_]/i)
{
my $attachedTypePrefix = $1;
$typePrefix .= $attachedTypePrefix;
$type = substr($type, length($attachedTypePrefix));
};
$defaultValue =~ s/ $//;
return NaturalDocs::Languages::Prototype::Parameter->New($type, $typePrefix, $name, $namePrefix,
$defaultValue, $defaultValuePrefix);
};
#
# Function: ParsePascalParameterLine
#
# Parses a Pascal-like prototype parameter line and returns it as a <NaturalDocs::Languages::Prototype::Parameter> object.
# Pascal lines are as follows:
#
# > Function (name: type; name, name: type := value)
#
# Also supports ActionScript lines
#
# > Function (name: type, name, name: type = value)
#
sub ParsePascalParameterLine #(line)
{
my ($self, $line) = @_;
$line =~ s/^ //;
$line =~ s/ $//;
my @tokens = $line =~ /([^\(\)\{\}\[\]\<\>\'\"\=\:]+|\:\=|.)/g;
my ($type, $name, $defaultValue, $defaultValuePrefix, $afterName, $afterDefaultValue);
my @symbolStack;
foreach my $token (@tokens)
{
if ($afterDefaultValue)
{ $defaultValue .= $token; }
elsif ($symbolStack[-1] eq '\'' || $symbolStack[-1] eq '"')
{
if ($afterName)
{ $type .= $token; }
else
{ $name .= $token; };
if ($token eq $symbolStack[-1])
{ pop @symbolStack; };
}
elsif ($token =~ /^[\(\[\{\<\'\"]$/)
{
push @symbolStack, $token;
if ($afterName)
{ $type .= $token; }
else
{ $name .= $token; };
}
elsif ( ($token eq ')' && $symbolStack[-1] eq '(') ||
($token eq ']' && $symbolStack[-1] eq '[') ||
($token eq '}' && $symbolStack[-1] eq '{') ||
($token eq '>' && $symbolStack[-1] eq '<') )
{
pop @symbolStack;
if ($afterName)
{ $type .= $token; }
else
{ $name .= $token; };
}
elsif ($afterName)
{
if (($token eq ':=' || $token eq '=') && !scalar @symbolStack)
{
$defaultValuePrefix = $token;
$afterDefaultValue = 1;
}
else
{ $type .= $token; };
}
elsif ($token eq ':' && !scalar @symbolStack)
{
$name .= $token;
$afterName = 1;
}
else
{ $name .= $token; };
};
foreach my $part (\$type, \$name, \$defaultValue)
{
$$part =~ s/^ //;
$$part =~ s/ $//;
};
return NaturalDocs::Languages::Prototype::Parameter->New($type, undef, $name, undef, $defaultValue, $defaultValuePrefix);
};
#
# Function: TypeBeforeParameter
#
# Returns whether the type appears before the parameter in prototypes.
#
# For example, it does in C++
# > void Function (int a, int b)
#
# but does not in Pascal
# > function Function (a: int; b, c: int)
#
sub TypeBeforeParameter
{
return 1;
};
#
# Function: IgnoredPrefixLength
#
# Returns the length of the prefix that should be ignored in the index, or zero if none.
#
# Parameters:
#
# name - The name of the symbol.
# type - The symbol's <TopicType>.
#
# Returns:
#
# The length of the prefix to ignore, or zero if none.
#
sub IgnoredPrefixLength #(name, type)
{
my ($self, $name, $type) = @_;
foreach my $prefixes ($self->IgnoredPrefixesFor($type), $self->IgnoredPrefixesFor(::TOPIC_GENERAL()))
{
if (defined $prefixes)
{
foreach my $prefix (@$prefixes)
{
if (substr($name, 0, length($prefix)) eq $prefix)
{ return length($prefix); };
};
};
};
return 0;
};
###############################################################################
# Group: Support Functions
#
# Function: StripOpeningSymbols
#
# Determines if the line starts with any of the passed symbols, and if so, replaces it with spaces. This only happens
# if the only thing before it on the line is whitespace.
#
# Parameters:
#
# lineRef - A reference to the line to check.
# symbols - An arrayref of the symbols to check for.
#
# Returns:
#
# If the line starts with any of the passed comment symbols, it will replace it in the line with spaces and return the symbol.
# If the line doesn't, it will leave the line alone and return undef.
#
sub StripOpeningSymbols #(lineRef, symbols)
{
my ($self, $lineRef, $symbols) = @_;
if (!defined $symbols)
{ return undef; };
my ($index, $symbol) = ::FindFirstSymbol($$lineRef, $symbols);
if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/)
{
return substr($$lineRef, $index, length($symbol), ' ' x length($symbol));
};
return undef;
};
#
# Function: StripOpeningJavaDocSymbols
#
# Determines if the line starts with any of the passed symbols, and if so, replaces it with spaces. This only happens
# if the only thing before it on the line is whitespace and the next character after it is whitespace or the end of the line.
#
# Parameters:
#
# lineRef - A reference to the line to check.
# symbols - An arrayref of the symbols to check for.
#
# Returns:
#
# If the line starts with any of the passed comment symbols, it will replace it in the line with spaces and return the symbol.
# If the line doesn't, it will leave the line alone and return undef.
#
sub StripOpeningJavaDocSymbols #(lineRef, symbols)
{
my ($self, $lineRef, $symbols) = @_;
if (!defined $symbols)
{ return undef; };
my ($index, $symbol) = ::FindFirstSymbol($$lineRef, $symbols);
if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/ && substr($$lineRef, $index + length($symbol), 1) =~ /^[ \t]?$/)
{
return substr($$lineRef, $index, length($symbol), ' ' x length($symbol));
};
return undef;
};
#
# Function: StripOpeningBlockSymbols
#
# Determines if the line starts with any of the opening symbols in the passed symbol pairs, and if so, replaces it with spaces.
# This only happens if the only thing before it on the line is whitespace.
#
# Parameters:
#
# lineRef - A reference to the line to check.
# symbolPairs - An arrayref of the symbol pairs to check for. Pairs are specified as two consecutive array entries, with the
# opening symbol first.
#
# Returns:
#
# If the line starts with any of the opening symbols, it will replace it in the line with spaces and return the closing symbol.
# If the line doesn't, it will leave the line alone and return undef.
#
sub StripOpeningBlockSymbols #(lineRef, symbolPairs)
{
my ($self, $lineRef, $symbolPairs) = @_;
if (!defined $symbolPairs)
{ return undef; };
for (my $i = 0; $i < scalar @$symbolPairs; $i += 2)
{
my $index = index($$lineRef, $symbolPairs->[$i]);
if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/)
{
substr($$lineRef, $index, length($symbolPairs->[$i]), ' ' x length($symbolPairs->[$i]));
return $symbolPairs->[$i + 1];
};
};
return undef;
};
#
# Function: StripOpeningJavaDocBlockSymbols
#
# Determines if the line starts with any of the opening symbols in the passed symbol pairs, and if so, replaces it with spaces.
# This only happens if the only thing before it on the line is whitespace and the next character is whitespace or the end of the line.
#
# Parameters:
#
# lineRef - A reference to the line to check.
# symbolPairs - An arrayref of the symbol pairs to check for. Pairs are specified as two consecutive array entries, with the
# opening symbol first.
#
# Returns:
#
# If the line starts with any of the opening symbols, it will replace it in the line with spaces and return the closing symbol.
# If the line doesn't, it will leave the line alone and return undef.
#
sub StripOpeningJavaDocBlockSymbols #(lineRef, symbolPairs)
{
my ($self, $lineRef, $symbolPairs) = @_;
if (!defined $symbolPairs)
{ return undef; };
for (my $i = 0; $i < scalar @$symbolPairs; $i += 2)
{
my $index = index($$lineRef, $symbolPairs->[$i]);
if ($index != -1 && substr($$lineRef, 0, $index) =~ /^[ \t]*$/ &&
substr($$lineRef, $index + length($symbolPairs->[$i]), 1) =~ /^[ \t]?$/)
{
substr($$lineRef, $index, length($symbolPairs->[$i]), ' ' x length($symbolPairs->[$i]));
return $symbolPairs->[$i + 1];
};
};
return undef;
};
#
# Function: StripClosingSymbol
#
# Determines if the line contains a symbol, and if so, truncates it just before the symbol.
#
# Parameters:
#
# lineRef - A reference to the line to check.
# symbol - The symbol to check for.
#
# Returns:
#
# The remainder of the line, or undef if the symbol was not found.
#
sub StripClosingSymbol #(lineRef, symbol)
{
my ($self, $lineRef, $symbol) = @_;
my $index = index($$lineRef, $symbol);
if ($index != -1)
{
my $lineRemainder = substr($$lineRef, $index + length($symbol));
$$lineRef = substr($$lineRef, 0, $index);
return $lineRemainder;
}
else
{ return undef; };
};
#
# Function: NormalizePrototype
#
# Normalizes a prototype. Specifically, condenses spaces, tabs, and line breaks into single spaces and removes leading and
# trailing ones.
#
# Parameters:
#
# prototype - The original prototype string.
#
# Returns:
#
# The normalized prototype.
#
sub NormalizePrototype #(prototype)
{
my ($self, $prototype) = @_;
$prototype =~ tr/ \t\r\n/ /s;
$prototype =~ s/^ //;
$prototype =~ s/ $//;
return $prototype;
};
1;