mirror of
https://github.com/ddnet/ddnet.git
synced 2024-11-15 04:28:20 +00:00
1371 lines
44 KiB
Perl
1371 lines
44 KiB
Perl
###############################################################################
|
|
#
|
|
# Class: NaturalDocs::Languages::Perl
|
|
#
|
|
###############################################################################
|
|
#
|
|
# A subclass to handle the language variations of Perl.
|
|
#
|
|
#
|
|
# Topic: Language Support
|
|
#
|
|
# Supported:
|
|
#
|
|
# - Packages
|
|
# - Inheritance via "use base" and "@ISA =".
|
|
# - Functions
|
|
# - Variables
|
|
#
|
|
# Not supported yet:
|
|
#
|
|
# - Constants
|
|
#
|
|
###############################################################################
|
|
|
|
# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
|
|
# Natural Docs is licensed under the GPL
|
|
|
|
use strict;
|
|
use integer;
|
|
|
|
package NaturalDocs::Languages::Perl;
|
|
|
|
use base 'NaturalDocs::Languages::Advanced';
|
|
|
|
|
|
#
|
|
# array: hereDocTerminators
|
|
# An array of active Here Doc terminators, or an empty array if not active. Each entry is an arrayref of tokens. The entries
|
|
# must appear in the order they must appear in the source.
|
|
#
|
|
my @hereDocTerminators;
|
|
|
|
|
|
|
|
###############################################################################
|
|
# Group: Interface Functions
|
|
|
|
|
|
#
|
|
# Function: PackageSeparator
|
|
# Returns the package separator symbol.
|
|
#
|
|
sub PackageSeparator
|
|
{ return '::'; };
|
|
|
|
#
|
|
# Function: EnumValues
|
|
# Returns the <EnumValuesType> that describes how the language handles enums.
|
|
#
|
|
sub EnumValues
|
|
{ return ::ENUM_GLOBAL(); };
|
|
|
|
|
|
#
|
|
# Function: ParseFile
|
|
#
|
|
# Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# sourceFile - The name of the source file to parse.
|
|
# topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
|
|
#
|
|
# Returns:
|
|
#
|
|
# The array ( autoTopics, scopeRecord ).
|
|
#
|
|
# autoTopics - An arrayref of automatically generated topics from the file, or undef if none.
|
|
# scopeRecord - An arrayref of <NaturalDocs::Languages::Advanced::ScopeChanges>, or undef if none.
|
|
#
|
|
sub ParseFile #(sourceFile, topicsList)
|
|
{
|
|
my ($self, $sourceFile, $topicsList) = @_;
|
|
|
|
@hereDocTerminators = ( );
|
|
|
|
# The regular block comment symbols are undef because they're all potentially JavaDoc comments. PreprocessFile() will
|
|
# handle translating things like =begin naturaldocs and =begin javadoc to =begin nd.
|
|
$self->ParseForCommentsAndTokens($sourceFile, [ '#' ], undef, [ '##' ], [ '=begin nd', '=end nd' ]);
|
|
|
|
my $tokens = $self->Tokens();
|
|
my $index = 0;
|
|
my $lineNumber = 1;
|
|
|
|
while ($index < scalar @$tokens)
|
|
{
|
|
if ($self->TryToSkipWhitespace(\$index, \$lineNumber) ||
|
|
$self->TryToGetPackage(\$index, \$lineNumber) ||
|
|
$self->TryToGetBase(\$index, \$lineNumber) ||
|
|
$self->TryToGetFunction(\$index, \$lineNumber) ||
|
|
$self->TryToGetVariable(\$index, \$lineNumber) )
|
|
{
|
|
# The functions above will handle everything.
|
|
}
|
|
|
|
elsif ($tokens->[$index] eq '{')
|
|
{
|
|
$self->StartScope('}', $lineNumber, undef);
|
|
$index++;
|
|
}
|
|
|
|
elsif ($tokens->[$index] eq '}')
|
|
{
|
|
if ($self->ClosingScopeSymbol() eq '}')
|
|
{ $self->EndScope($lineNumber); };
|
|
|
|
$index++;
|
|
}
|
|
|
|
elsif (lc($tokens->[$index]) eq 'eval')
|
|
{
|
|
# We want to skip the token in this case instead of letting it fall to SkipRestOfStatement. This allows evals with braces
|
|
# to be treated like normal floating braces.
|
|
$index++;
|
|
}
|
|
|
|
else
|
|
{
|
|
$self->SkipRestOfStatement(\$index, \$lineNumber);
|
|
};
|
|
};
|
|
|
|
|
|
# Don't need to keep these around.
|
|
$self->ClearTokens();
|
|
|
|
return ( $self->AutoTopics(), $self->ScopeRecord() );
|
|
};
|
|
|
|
|
|
#
|
|
# Function: PreprocessFile
|
|
#
|
|
# Overridden to support "=begin nd" and similar.
|
|
#
|
|
# - "=begin [nd|naturaldocs|natural docs|jd|javadoc|java doc]" all translate to "=begin nd".
|
|
# - "=[nd|naturaldocs|natural docs]" also translate to "=begin nd".
|
|
# - "=end [nd|naturaldocs|natural docs|jd|javadoc]" all translate to "=end nd".
|
|
# - "=cut" from a ND block translates into "=end nd", but the next line will be altered to begin with "(NDPODBREAK)". This is
|
|
# so if there is POD leading into ND which ends with a cut, the parser can still end the original POD because the end ND line
|
|
# would have been removed. Remember, <NaturalDocs::Languages::Advanced->ParseForCommentsAndTokens()> removes
|
|
# Natural Docs-worthy comments to save parsing time.
|
|
# - "=pod begin nd" and "=pod end nd" are supported for compatibility with ND 1.32 and earlier, even though the syntax is a
|
|
# mistake.
|
|
# - It also supports the wrong plural forms, so naturaldoc/natural doc/javadocs/java docs will work.
|
|
#
|
|
sub PreprocessFile #(lines)
|
|
{
|
|
my ($self, $lines) = @_;
|
|
|
|
my $inNDPOD = 0;
|
|
my $mustBreakPOD = 0;
|
|
|
|
for (my $i = 0; $i < scalar @$lines; $i++)
|
|
{
|
|
if ($lines->[$i] =~ /^\=(?:(?:pod[ \t]+)?begin[ \t]+)?(?:nd|natural[ \t]*docs?|jd|java[ \t]*docs?)[ \t]*$/i)
|
|
{
|
|
$lines->[$i] = '=begin nd';
|
|
$inNDPOD = 1;
|
|
$mustBreakPOD = 0;
|
|
}
|
|
elsif ($lines->[$i] =~ /^\=(?:pod[ \t]+)end[ \t]+(?:nd|natural[ \t]*docs?|jd|javadocs?)[ \t]*$/i)
|
|
{
|
|
$lines->[$i] = '=end nd';
|
|
$inNDPOD = 0;
|
|
$mustBreakPOD = 0;
|
|
}
|
|
elsif ($lines->[$i] =~ /^\=cut[ \t]*$/i)
|
|
{
|
|
if ($inNDPOD)
|
|
{
|
|
$lines->[$i] = '=end nd';
|
|
$inNDPOD = 0;
|
|
$mustBreakPOD = 1;
|
|
};
|
|
}
|
|
elsif ($mustBreakPOD)
|
|
{
|
|
$lines->[$i] = '(NDPODBREAK)' . $lines->[$i];
|
|
$mustBreakPOD = 0;
|
|
};
|
|
};
|
|
};
|
|
|
|
|
|
|
|
###############################################################################
|
|
# Group: Statement Parsing Functions
|
|
# All functions here assume that the current position is at the beginning of a statement.
|
|
#
|
|
# Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as
|
|
# often as it should. We're making use of the fact that Perl will always return undef in these cases to keep the code simpler.
|
|
|
|
|
|
#
|
|
# Function: TryToGetPackage
|
|
#
|
|
# Determines whether the position is at a package declaration statement, and if so, generates a topic for it, skips it, and
|
|
# returns true.
|
|
#
|
|
sub TryToGetPackage #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
if (lc($tokens->[$$indexRef]) eq 'package')
|
|
{
|
|
my $index = $$indexRef + 1;
|
|
my $lineNumber = $$lineNumberRef;
|
|
|
|
if (!$self->TryToSkipWhitespace(\$index, \$lineNumber))
|
|
{ return undef; };
|
|
|
|
my $name;
|
|
|
|
while ($tokens->[$index] =~ /^[a-z_\:]/i)
|
|
{
|
|
$name .= $tokens->[$index];
|
|
$index++;
|
|
};
|
|
|
|
if (!defined $name)
|
|
{ return undef; };
|
|
|
|
my $autoTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_CLASS(), $name,
|
|
undef, undef,
|
|
undef,
|
|
undef, undef, $$lineNumberRef);
|
|
$self->AddAutoTopic($autoTopic);
|
|
|
|
NaturalDocs::Parser->OnClass($autoTopic->Symbol());
|
|
|
|
$self->SetPackage($autoTopic->Symbol(), $$lineNumberRef);
|
|
|
|
$$indexRef = $index;
|
|
$$lineNumberRef = $lineNumber;
|
|
$self->SkipRestOfStatement($indexRef, $lineNumberRef);
|
|
|
|
return 1;
|
|
};
|
|
|
|
return undef;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToGetBase
|
|
#
|
|
# Determines whether the position is at a package base declaration statement, and if so, calls
|
|
# <NaturalDocs::Parser->OnClassParent()>.
|
|
#
|
|
# Supported Syntaxes:
|
|
#
|
|
# > use base [list of strings]
|
|
# > @ISA = [list of strings]
|
|
# > @[package]::ISA = [list of strings]
|
|
# > our @ISA = [list of strings]
|
|
#
|
|
sub TryToGetBase #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
my ($index, $lineNumber, $class, $parents);
|
|
|
|
if (lc($tokens->[$$indexRef]) eq 'use')
|
|
{
|
|
$index = $$indexRef + 1;
|
|
$lineNumber = $$lineNumberRef;
|
|
|
|
if (!$self->TryToSkipWhitespace(\$index, \$lineNumber) ||
|
|
lc($tokens->[$index]) ne 'base')
|
|
{ return undef; }
|
|
|
|
$index++;
|
|
$self->TryToSkipWhitespace(\$index, \$lineNumber);
|
|
|
|
$parents = $self->TryToGetListOfStrings(\$index, \$lineNumber);
|
|
}
|
|
|
|
else
|
|
{
|
|
$index = $$indexRef;
|
|
$lineNumber = $$lineNumberRef;
|
|
|
|
if (lc($tokens->[$index]) eq 'our')
|
|
{
|
|
$index++;
|
|
$self->TryToSkipWhitespace(\$index, \$lineNumber);
|
|
};
|
|
|
|
if ($tokens->[$index] eq '@')
|
|
{
|
|
$index++;
|
|
|
|
while ($index < scalar @$tokens)
|
|
{
|
|
if ($tokens->[$index] eq 'ISA')
|
|
{
|
|
$index++;
|
|
$self->TryToSkipWhitespace(\$index, \$lineNumber);
|
|
|
|
if ($tokens->[$index] eq '=')
|
|
{
|
|
$index++;
|
|
$self->TryToSkipWhitespace(\$index, \$lineNumber);
|
|
|
|
$parents = $self->TryToGetListOfStrings(\$index, \$lineNumber);
|
|
}
|
|
else
|
|
{ last; };
|
|
}
|
|
|
|
# If token isn't ISA...
|
|
elsif ($tokens->[$index] =~ /^[a-z0-9_:]/i)
|
|
{
|
|
$class .= $tokens->[$index];
|
|
$index++;
|
|
}
|
|
else
|
|
{ last; };
|
|
};
|
|
};
|
|
};
|
|
|
|
if (defined $parents)
|
|
{
|
|
if (defined $class)
|
|
{
|
|
$class =~ s/::$//;
|
|
my @classIdentifiers = split(/::/, $class);
|
|
$class = NaturalDocs::SymbolString->Join(@classIdentifiers);
|
|
}
|
|
else
|
|
{ $class = $self->CurrentScope(); };
|
|
|
|
foreach my $parent (@$parents)
|
|
{
|
|
my @parentIdentifiers = split(/::/, $parent);
|
|
my $parentSymbol = NaturalDocs::SymbolString->Join(@parentIdentifiers);
|
|
|
|
NaturalDocs::Parser->OnClassParent($class, $parentSymbol, undef, undef, ::RESOLVE_ABSOLUTE());
|
|
};
|
|
|
|
$$indexRef = $index;
|
|
$$lineNumberRef = $lineNumber;
|
|
$self->SkipRestOfStatement($indexRef, $lineNumberRef);
|
|
|
|
return 1;
|
|
}
|
|
else
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToGetFunction
|
|
#
|
|
# Determines whether the position is at a function declaration statement, and if so, generates a topic for it, skips it, and
|
|
# returns true.
|
|
#
|
|
sub TryToGetFunction #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
if ( lc($tokens->[$$indexRef]) eq 'sub')
|
|
{
|
|
my $prototypeStart = $$indexRef;
|
|
my $prototypeStartLine = $$lineNumberRef;
|
|
my $prototypeEnd = $$indexRef + 1;
|
|
my $prototypeEndLine = $$lineNumberRef;
|
|
|
|
if ( !$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine) ||
|
|
$tokens->[$prototypeEnd] !~ /^[a-z_]/i )
|
|
{ return undef; };
|
|
|
|
my $name = $tokens->[$prototypeEnd];
|
|
$prototypeEnd++;
|
|
|
|
# We parsed 'sub [name]'. Now keep going until we find a semicolon or a brace.
|
|
|
|
for (;;)
|
|
{
|
|
if ($prototypeEnd >= scalar @$tokens)
|
|
{ return undef; }
|
|
|
|
# End if we find a semicolon, since it means we found a predeclaration rather than an actual function.
|
|
elsif ($tokens->[$prototypeEnd] eq ';')
|
|
{ return undef; }
|
|
|
|
elsif ($tokens->[$prototypeEnd] eq '{')
|
|
{
|
|
# Found it!
|
|
|
|
my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) );
|
|
|
|
$self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FUNCTION(), $name,
|
|
$self->CurrentScope(), undef,
|
|
$prototype,
|
|
undef, undef, $prototypeStartLine));
|
|
|
|
$$indexRef = $prototypeEnd;
|
|
$$lineNumberRef = $prototypeEndLine;
|
|
|
|
$self->SkipRestOfStatement($indexRef, $lineNumberRef);
|
|
|
|
return 1;
|
|
}
|
|
|
|
else
|
|
{ $self->GenericSkip(\$prototypeEnd, \$prototypeEndLine, 0, 1); };
|
|
};
|
|
}
|
|
else
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToGetVariable
|
|
#
|
|
# Determines if the position is at a variable declaration statement, and if so, generates a topic for it, skips it, and returns
|
|
# true.
|
|
#
|
|
# Supported Syntaxes:
|
|
#
|
|
# - Supports variables declared with "my", "our", and "local".
|
|
# - Supports multiple declarations in one statement, such as "my ($x, $y);".
|
|
# - Supports types and attributes.
|
|
#
|
|
sub TryToGetVariable #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
my $firstToken = lc( $tokens->[$$indexRef] );
|
|
|
|
if ($firstToken eq 'my' || $firstToken eq 'our' || $firstToken eq 'local')
|
|
{
|
|
my $prototypeStart = $$indexRef;
|
|
my $prototypeStartLine = $$lineNumberRef;
|
|
my $prototypeEnd = $$indexRef + 1;
|
|
my $prototypeEndLine = $$lineNumberRef;
|
|
|
|
$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine);
|
|
|
|
|
|
# Get the type if present.
|
|
|
|
my $type;
|
|
|
|
if ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i)
|
|
{
|
|
do
|
|
{
|
|
$type .= $tokens->[$prototypeEnd];
|
|
$prototypeEnd++;
|
|
}
|
|
while ($tokens->[$prototypeEnd] =~ /^[a-z\:]/i);
|
|
|
|
if (!$self->TryToSkipWhitespace(\$prototypeEnd, \$prototypeEndLine))
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
# Get the name, or possibly names.
|
|
|
|
if ($tokens->[$prototypeEnd] eq '(')
|
|
{
|
|
# If there's multiple variables, we'll need to build a custom prototype for each one. $firstToken already has the
|
|
# declaring word. We're going to store each name in @names, and we're going to use $prototypeStart and
|
|
# $prototypeEnd to capture any properties appearing after the list.
|
|
|
|
my $name;
|
|
my @names;
|
|
my $hasComma = 0;
|
|
|
|
$prototypeStart = $prototypeEnd + 1;
|
|
$prototypeStartLine = $prototypeEndLine;
|
|
|
|
for (;;)
|
|
{
|
|
$self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);
|
|
|
|
$name = $self->TryToGetVariableName(\$prototypeStart, \$prototypeStartLine);
|
|
|
|
if (!defined $name)
|
|
{ return undef; };
|
|
|
|
push @names, $name;
|
|
|
|
$self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);
|
|
|
|
# We can have multiple commas in a row. We can also have trailing commas. However, the parenthesis must
|
|
# not start with a comma or be empty, hence this logic does not appear earlier.
|
|
while ($tokens->[$prototypeStart] eq ',')
|
|
{
|
|
$prototypeStart++;
|
|
$self->TryToSkipWhitespace(\$prototypeStart, \$prototypeStartLine);
|
|
|
|
$hasComma = 1;
|
|
}
|
|
|
|
if ($tokens->[$prototypeStart] eq ')')
|
|
{
|
|
$prototypeStart++;
|
|
last;
|
|
}
|
|
elsif (!$hasComma)
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
# Now find the end of the prototype.
|
|
|
|
$prototypeEnd = $prototypeStart;
|
|
$prototypeEndLine = $prototypeStartLine;
|
|
|
|
while ($prototypeEnd < scalar @$tokens &&
|
|
$tokens->[$prototypeEnd] !~ /^[\;\=]/)
|
|
{
|
|
$prototypeEnd++;
|
|
};
|
|
|
|
|
|
my $prototypePrefix = $firstToken . ' ';
|
|
if (defined $type)
|
|
{ $prototypePrefix .= $type . ' '; };
|
|
|
|
my $prototypeSuffix = ' ' . $self->CreateString($prototypeStart, $prototypeEnd);
|
|
|
|
foreach $name (@names)
|
|
{
|
|
my $prototype = $self->NormalizePrototype( $prototypePrefix . $name . $prototypeSuffix );
|
|
|
|
$self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name,
|
|
$self->CurrentScope(), undef,
|
|
$prototype,
|
|
undef, undef, $prototypeStartLine));
|
|
};
|
|
|
|
$self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine);
|
|
|
|
$$indexRef = $prototypeEnd;
|
|
$$lineNumberRef = $prototypeEndLine;
|
|
}
|
|
|
|
else # no parenthesis
|
|
{
|
|
my $name = $self->TryToGetVariableName(\$prototypeEnd, \$prototypeEndLine);
|
|
|
|
if (!defined $name)
|
|
{ return undef; };
|
|
|
|
while ($prototypeEnd < scalar @$tokens &&
|
|
$tokens->[$prototypeEnd] !~ /^[\;\=]/)
|
|
{
|
|
$prototypeEnd++;
|
|
};
|
|
|
|
my $prototype = $self->NormalizePrototype( $self->CreateString($prototypeStart, $prototypeEnd) );
|
|
|
|
$self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New(::TOPIC_VARIABLE(), $name,
|
|
$self->CurrentScope(), undef,
|
|
$prototype,
|
|
undef, undef, $prototypeStartLine));
|
|
|
|
$self->SkipRestOfStatement(\$prototypeEnd, \$prototypeEndLine);
|
|
|
|
$$indexRef = $prototypeEnd;
|
|
$$lineNumberRef = $prototypeEndLine;
|
|
};
|
|
|
|
return 1;
|
|
}
|
|
else
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToGetVariableName
|
|
#
|
|
# Determines if the position is at a variable name, and if so, skips it and returns the name.
|
|
#
|
|
sub TryToGetVariableName #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
my $name;
|
|
|
|
if ($tokens->[$$indexRef] =~ /^[\$\@\%\*]/)
|
|
{
|
|
$name .= $tokens->[$$indexRef];
|
|
$$indexRef++;
|
|
|
|
$self->TryToSkipWhitespace($indexRef, $lineNumberRef);
|
|
|
|
if ($tokens->[$$indexRef] =~ /^[a-z_]/i)
|
|
{
|
|
$name .= $tokens->[$$indexRef];
|
|
$$indexRef++;
|
|
}
|
|
else
|
|
{ return undef; };
|
|
};
|
|
|
|
return $name;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToGetListOfStrings
|
|
#
|
|
# Attempts to retrieve a list of strings from the current position. Returns an arrayref of them if any are found, or undef if none.
|
|
# It stops the moment it reaches a non-string, so "string1, variable, string2" will only return string1.
|
|
#
|
|
# Supported Syntaxes:
|
|
#
|
|
# - Supports parenthesis.
|
|
# - Supports all string forms supported by <TryToSkipString()>.
|
|
# - Supports qw() string arrays.
|
|
#
|
|
sub TryToGetListOfStrings #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
my $parenthesis = 0;
|
|
my $strings;
|
|
|
|
while ($$indexRef < scalar @$tokens)
|
|
{
|
|
# We'll tolerate parenthesis.
|
|
if ($tokens->[$$indexRef] eq '(')
|
|
{
|
|
$$indexRef++;
|
|
$parenthesis++;
|
|
}
|
|
elsif ($tokens->[$$indexRef] eq ')')
|
|
{
|
|
if ($parenthesis == 0)
|
|
{ last; };
|
|
|
|
$$indexRef++;
|
|
$parenthesis--;
|
|
}
|
|
elsif ($tokens->[$$indexRef] eq ',')
|
|
{
|
|
$$indexRef++;
|
|
}
|
|
else
|
|
{
|
|
my ($startContent, $endContent);
|
|
my $symbolIndex = $$indexRef;
|
|
|
|
if ($self->TryToSkipString($indexRef, $lineNumberRef, \$startContent, \$endContent))
|
|
{
|
|
my $content = $self->CreateString($startContent, $endContent);
|
|
|
|
if (!defined $strings)
|
|
{ $strings = [ ]; };
|
|
|
|
if (lc($tokens->[$symbolIndex]) eq 'qw')
|
|
{
|
|
$content =~ tr/ \t\n/ /s;
|
|
$content =~ s/^ //;
|
|
|
|
my @qwStrings = split(/ /, $content);
|
|
|
|
push @$strings, @qwStrings;
|
|
}
|
|
else
|
|
{
|
|
push @$strings, $content;
|
|
};
|
|
}
|
|
else
|
|
{ last; };
|
|
};
|
|
|
|
$self->TryToSkipWhitespace($indexRef, $lineNumberRef);
|
|
};
|
|
|
|
return $strings;
|
|
};
|
|
|
|
|
|
###############################################################################
|
|
# Group: Low Level Parsing Functions
|
|
|
|
|
|
#
|
|
# Function: GenericSkip
|
|
#
|
|
# Advances the position one place through general code.
|
|
#
|
|
# - If the position is on a comment or string, it will skip it completely.
|
|
# - If the position is on an opening symbol, it will skip until the past the closing symbol.
|
|
# - If the position is on a regexp or quote-like operator, it will skip it completely.
|
|
# - If the position is on a backslash, it will skip it and the following token.
|
|
# - If the position is on whitespace (including comments), it will skip it completely.
|
|
# - Otherwise it skips one token.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# indexRef - A reference to the current index.
|
|
# lineNumberRef - A reference to the current line number.
|
|
# noRegExps - If set, does not test for regular expressions.
|
|
#
|
|
sub GenericSkip #(indexRef, lineNumberRef, noRegExps)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n")
|
|
{ $$indexRef += 2; }
|
|
|
|
# Note that we don't want to count backslashed ()[]{} since they could be in regexps. Also, ()[] are valid variable names
|
|
# when preceded by a string.
|
|
|
|
# We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway.
|
|
elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef))
|
|
{
|
|
$$indexRef++;
|
|
$self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}', $noRegExps, $allowStringedClosingParens);
|
|
}
|
|
elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
|
|
{
|
|
# Temporarily allow stringed closing parenthesis if it looks like we're in an anonymous function declaration with Perl's
|
|
# cheap version of prototypes, such as "my $_declare = sub($) {}".
|
|
my $tempAllowStringedClosingParens = $allowStringedClosingParens;
|
|
if (!$allowStringedClosingParens)
|
|
{
|
|
my $tempIndex = $$indexRef - 1;
|
|
if ($tempIndex >= 0 && $tokens->[$tempIndex] =~ /^[ \t]/)
|
|
{ $tempIndex--; }
|
|
if ($tempIndex >= 0 && $tokens->[$tempIndex] eq 'sub')
|
|
{ $tempAllowStringedClosingParens = 1; }
|
|
}
|
|
|
|
$$indexRef++;
|
|
|
|
do
|
|
{ $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')', $noRegExps, $tempAllowStringedClosingParens); }
|
|
while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1) && !$tempAllowStringedClosingParens);
|
|
}
|
|
elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
|
|
{
|
|
$$indexRef++;
|
|
|
|
do
|
|
{ $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']', $noRegExps, $allowStringedClosingParens); }
|
|
while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1));
|
|
}
|
|
|
|
elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) ||
|
|
$self->TryToSkipString($indexRef, $lineNumberRef) ||
|
|
$self->TryToSkipHereDocDeclaration($indexRef, $lineNumberRef) ||
|
|
(!$noRegExps && $self->TryToSkipRegexp($indexRef, $lineNumberRef) ) )
|
|
{
|
|
}
|
|
|
|
else
|
|
{ $$indexRef++; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: GenericSkipUntilAfter
|
|
#
|
|
# Advances the position via <GenericSkip()> until a specific token is reached and passed.
|
|
#
|
|
sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token, noRegExps, allowStringedClosingParens)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef, $token, $noRegExps, $allowStringedClosingParens) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token)
|
|
{ $self->GenericSkip($indexRef, $lineNumberRef, $noRegExps, $allowStringedClosingParens); };
|
|
|
|
if ($tokens->[$$indexRef] eq "\n")
|
|
{ $$lineNumberRef++; };
|
|
$$indexRef++;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: GenericRegexpSkip
|
|
#
|
|
# Advances the position one place through regexp code.
|
|
#
|
|
# - If the position is on an opening symbol, it will skip until the past the closing symbol.
|
|
# - If the position is on a backslash, it will skip it and the following token.
|
|
# - If the position is on whitespace (not including comments), it will skip it completely.
|
|
# - Otherwise it skips one token.
|
|
#
|
|
# Also differs from <GenericSkip()> in that the parenthesis in $( and $) do count against the scope, where they wouldn't
|
|
# normally.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# indexRef - A reference to the current index.
|
|
# lineNumberRef - A reference to the current line number.
|
|
# inBrackets - Whether we're in brackets or not. If true, we don't care about matching braces and parenthesis.
|
|
#
|
|
sub GenericRegexpSkip #(indexRef, lineNumberRef, inBrackets)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef, $inBrackets) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
if ($tokens->[$$indexRef] eq "\\" && $$indexRef + 1 < scalar @$tokens && $tokens->[$$indexRef+1] ne "\n")
|
|
{ $$indexRef += 2; }
|
|
|
|
# We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway.
|
|
elsif ($tokens->[$$indexRef] eq '{' && !$self->IsBackslashed($$indexRef) && !$inBrackets)
|
|
{
|
|
$$indexRef++;
|
|
$self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, '}');
|
|
}
|
|
elsif ($tokens->[$$indexRef] eq '(' && !$self->IsBackslashed($$indexRef) && !$inBrackets)
|
|
{
|
|
$$indexRef++;
|
|
$self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ')');
|
|
}
|
|
elsif ($tokens->[$$indexRef] eq '[' && !$self->IsBackslashed($$indexRef) && !$self->IsStringed($$indexRef))
|
|
{
|
|
$$indexRef++;
|
|
|
|
do
|
|
{ $self->GenericRegexpSkipUntilAfter($indexRef, $lineNumberRef, ']'); }
|
|
while ($$indexRef < scalar @$tokens && $self->IsStringed($$indexRef - 1));
|
|
}
|
|
|
|
elsif ($tokens->[$$indexRef] eq "\n")
|
|
{
|
|
$$lineNumberRef++;
|
|
$$indexRef++;
|
|
}
|
|
|
|
else
|
|
{ $$indexRef++; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: GenericRegexpSkipUntilAfter
|
|
#
|
|
# Advances the position via <GenericRegexpSkip()> until a specific token is reached and passed.
|
|
#
|
|
sub GenericRegexpSkipUntilAfter #(indexRef, lineNumberRef, token)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef, $token) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
my $inBrackets = ( $token eq ']' );
|
|
|
|
while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token)
|
|
{ $self->GenericRegexpSkip($indexRef, $lineNumberRef, $inBrackets); };
|
|
|
|
if ($tokens->[$$indexRef] eq "\n")
|
|
{ $$lineNumberRef++; };
|
|
$$indexRef++;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: SkipRestOfStatement
|
|
#
|
|
# Advances the position via <GenericSkip()> until after the end of the current statement, which is defined as a semicolon or
|
|
# a brace group. Of course, either of those appearing inside parenthesis, a nested brace group, etc. don't count.
|
|
#
|
|
sub SkipRestOfStatement #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
while ($$indexRef < scalar @$tokens &&
|
|
$tokens->[$$indexRef] ne ';' &&
|
|
!($tokens->[$$indexRef] eq '{' && !$self->IsStringed($$indexRef)) )
|
|
{
|
|
$self->GenericSkip($indexRef, $lineNumberRef);
|
|
};
|
|
|
|
if ($tokens->[$$indexRef] eq ';')
|
|
{ $$indexRef++; }
|
|
elsif ($tokens->[$$indexRef] eq '{')
|
|
{ $self->GenericSkip($indexRef, $lineNumberRef); };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToSkipWhitespace
|
|
#
|
|
# If the current position is on whitespace it skips them and returns true. If there are a number of these in a row, it skips them
|
|
# all.
|
|
#
|
|
# Supported Syntax:
|
|
#
|
|
# - Whitespace
|
|
# - Line break
|
|
# - All comment forms supported by <TryToSkipComment()>
|
|
# - Here Doc content
|
|
#
|
|
sub TryToSkipWhitespace #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
my $result;
|
|
|
|
while ($$indexRef < scalar @$tokens)
|
|
{
|
|
if ($self->TryToSkipHereDocContent($indexRef, $lineNumberRef) ||
|
|
$self->TryToSkipComment($indexRef, $lineNumberRef))
|
|
{
|
|
$result = 1;
|
|
}
|
|
elsif ($tokens->[$$indexRef] =~ /^[ \t]/)
|
|
{
|
|
$$indexRef++;
|
|
$result = 1;
|
|
}
|
|
elsif ($tokens->[$$indexRef] eq "\n")
|
|
{
|
|
$$indexRef++;
|
|
$$lineNumberRef++;
|
|
$result = 1;
|
|
}
|
|
else
|
|
{ last; };
|
|
};
|
|
|
|
return $result;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToSkipComment
|
|
# If the current position is on a comment, skip past it and return true.
|
|
#
|
|
sub TryToSkipComment #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
|
|
return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) ||
|
|
$self->TryToSkipPODComment($indexRef, $lineNumberRef) );
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToSkipLineComment
|
|
# If the current position is on a line comment symbol, skip past it and return true.
|
|
#
|
|
sub TryToSkipLineComment #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
# Note that $#var is not a comment.
|
|
if ($tokens->[$$indexRef] eq '#' && !$self->IsStringed($$indexRef))
|
|
{
|
|
$self->SkipRestOfLine($indexRef, $lineNumberRef);
|
|
return 1;
|
|
}
|
|
else
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToSkipPODComment
|
|
# If the current position is on a POD comment symbol, skip past it and return true.
|
|
#
|
|
sub TryToSkipPODComment #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
# Note that whitespace is not allowed before the equals sign. It must directly start a line.
|
|
if ($tokens->[$$indexRef] eq '=' &&
|
|
( $$indexRef == 0 || $tokens->[$$indexRef - 1] eq "\n" ) &&
|
|
$tokens->[$$indexRef + 1] =~ /^[a-z]/i )
|
|
{
|
|
# Skip until =cut or (NDPODBREAK). Note that it's theoretically possible for =cut to appear without a prior POD directive.
|
|
|
|
do
|
|
{
|
|
if ($tokens->[$$indexRef] eq '=' && lc( $tokens->[$$indexRef + 1] ) eq 'cut')
|
|
{
|
|
$self->SkipRestOfLine($indexRef, $lineNumberRef);
|
|
last;
|
|
}
|
|
elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens &&
|
|
$tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')')
|
|
{
|
|
$$indexRef += 3;
|
|
last;
|
|
}
|
|
else
|
|
{
|
|
$self->SkipRestOfLine($indexRef, $lineNumberRef);
|
|
};
|
|
}
|
|
while ($$indexRef < scalar @$tokens);
|
|
|
|
return 1;
|
|
}
|
|
|
|
# It's also possible that (NDPODBREAK) will appear without any opening pod statement because "=begin nd" and "=cut" will
|
|
# still result in one. We need to pick off the stray (NDPODBREAK).
|
|
elsif ($tokens->[$$indexRef] eq '(' && $$indexRef + 2 < scalar @$tokens &&
|
|
$tokens->[$$indexRef+1] eq 'NDPODBREAK' && $tokens->[$$indexRef+2] eq ')')
|
|
{
|
|
$$indexRef += 3;
|
|
return 1;
|
|
}
|
|
|
|
else
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToSkipString
|
|
# If the current position is on a string delimiter, skip past the string and return true.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# indexRef - A reference to the index of the position to start at.
|
|
# lineNumberRef - A reference to the line number of the position.
|
|
# startContentIndexRef - A reference to the variable in which to store the index of the first content token. May be undef.
|
|
# endContentIndexRef - A reference to the variable in which to store the index of the end of the content, which is one past
|
|
# the last content token. may be undef.
|
|
#
|
|
# Returns:
|
|
#
|
|
# Whether the position was at a string. The index, line number, and content index variabls will only be changed if true.
|
|
#
|
|
# Syntax Support:
|
|
#
|
|
# - Supports quotes, apostrophes, backticks, q(), qq(), qx(), and qw().
|
|
# - All symbols are supported for the letter forms.
|
|
#
|
|
sub TryToSkipString #(indexRef, lineNumberRef, startContentIndexRef, endContentIndexRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef, $startContentIndexRef, $endContentIndexRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
# The three string delimiters. All three are Perl variables when preceded by a dollar sign.
|
|
if (!$self->IsStringed($$indexRef) &&
|
|
( $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'', '\'', $startContentIndexRef, $endContentIndexRef) ||
|
|
$self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"', '"', $startContentIndexRef, $endContentIndexRef) ||
|
|
$self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '`', '`', $startContentIndexRef, $endContentIndexRef) ) )
|
|
{
|
|
return 1;
|
|
}
|
|
elsif ($tokens->[$$indexRef] =~ /^(?:q|qq|qx|qw)$/i &&
|
|
($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*]$/))
|
|
{
|
|
$$indexRef++;
|
|
|
|
$self->TryToSkipWhitespace($indexRef, $lineNumberRef);
|
|
|
|
my $openingSymbol = $tokens->[$$indexRef];
|
|
my $closingSymbol;
|
|
|
|
if ($openingSymbol eq '{')
|
|
{ $closingSymbol = '}'; }
|
|
elsif ($openingSymbol eq '(')
|
|
{ $closingSymbol = ')'; }
|
|
elsif ($openingSymbol eq '[')
|
|
{ $closingSymbol = ']'; }
|
|
elsif ($openingSymbol eq '<')
|
|
{ $closingSymbol = '>'; }
|
|
else
|
|
{ $closingSymbol = $openingSymbol; };
|
|
|
|
$self->SUPER::TryToSkipString($indexRef, $lineNumberRef, $openingSymbol, $closingSymbol,
|
|
$startContentIndexRef, $endContentIndexRef);
|
|
|
|
return 1;
|
|
}
|
|
else
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToSkipHereDocDeclaration
|
|
#
|
|
# If the current position is on a Here Doc declaration, add its terminators to <hereDocTerminators> and skip it.
|
|
#
|
|
# Syntax Support:
|
|
#
|
|
# - Supports <<EOF
|
|
# - Supports << "String" with all string forms supported by <TryToSkipString()>.
|
|
#
|
|
sub TryToSkipHereDocDeclaration #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
my $index = $$indexRef;
|
|
my $lineNumber = $$lineNumberRef;
|
|
|
|
if ($tokens->[$index] eq '<' && $tokens->[$index + 1] eq '<')
|
|
{
|
|
$index += 2;
|
|
my $success;
|
|
|
|
# No whitespace allowed with the bare word.
|
|
if ($tokens->[$index] =~ /^[a-z0-9_]/i)
|
|
{
|
|
push @hereDocTerminators, [ $tokens->[$index] ];
|
|
$index++;
|
|
$success = 1;
|
|
}
|
|
else
|
|
{
|
|
$self->TryToSkipWhitespace(\$index, \$lineNumber);
|
|
|
|
my ($contentStart, $contentEnd);
|
|
if ($self->TryToSkipString(\$index, \$lineNumber, \$contentStart, \$contentEnd))
|
|
{
|
|
push @hereDocTerminators, [ @{$tokens}[$contentStart..$contentEnd - 1] ];
|
|
$success = 1;
|
|
};
|
|
};
|
|
|
|
if ($success)
|
|
{
|
|
$$indexRef = $index;
|
|
$$lineNumberRef = $lineNumber;
|
|
|
|
return 1;
|
|
};
|
|
};
|
|
|
|
return 0;
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToSkipHereDocContent
|
|
#
|
|
# If the current position is at the beginning of a line and there are entries in <hereDocTerminators>, skips lines until all the
|
|
# terminators are exhausted or we reach the end of the file.
|
|
#
|
|
# Returns:
|
|
#
|
|
# Whether the position was on Here Doc content.
|
|
#
|
|
sub TryToSkipHereDocContent #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
# We don't use IsFirstLineToken() because it really needs to be the first line token. Whitespace is not allowed.
|
|
if ($$indexRef > 0 && $tokens->[$$indexRef - 1] eq "\n")
|
|
{
|
|
my $success = (scalar @hereDocTerminators > 0);
|
|
|
|
while (scalar @hereDocTerminators && $$indexRef < scalar @$tokens)
|
|
{
|
|
my $terminatorIndex = 0;
|
|
|
|
while ($hereDocTerminators[0]->[$terminatorIndex] eq $tokens->[$$indexRef])
|
|
{
|
|
$terminatorIndex++;
|
|
$$indexRef++;
|
|
};
|
|
|
|
if ($terminatorIndex == scalar @{$hereDocTerminators[0]} &&
|
|
($tokens->[$$indexRef] eq "\n" || ($tokens->[$$indexRef] =~ /^[ \t]/ && $tokens->[$$indexRef + 1] eq "\n")) )
|
|
{
|
|
shift @hereDocTerminators;
|
|
$$indexRef++;
|
|
$$lineNumberRef++;
|
|
}
|
|
else
|
|
{ $self->SkipRestOfLine($indexRef, $lineNumberRef); };
|
|
};
|
|
|
|
return $success;
|
|
}
|
|
|
|
else
|
|
{ return 0; };
|
|
};
|
|
|
|
|
|
#
|
|
# Function: TryToSkipRegexp
|
|
# If the current position is on a regular expression or a quote-like operator, skip past it and return true.
|
|
#
|
|
# Syntax Support:
|
|
#
|
|
# - Supports //, ??, m//, qr//, s///, tr///, and y///.
|
|
# - All symbols are supported for the letter forms.
|
|
# - ?? is *not* supported because it could cause problems with ?: statements. The generic parser has a good chance of
|
|
# successfully stumbling through a regex, whereas the regex code will almost certainly see the rest of the file as part of it.
|
|
#
|
|
sub TryToSkipRegexp #(indexRef, lineNumberRef)
|
|
{
|
|
my ($self, $indexRef, $lineNumberRef) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
my $isRegexp;
|
|
|
|
# If it's a supported character sequence that's not a variable (ex $qr)...
|
|
if ($tokens->[$$indexRef] =~ /^(?:m|qr|s|tr|y)$/i &&
|
|
($$indexRef == 0 || $tokens->[$$indexRef - 1] !~ /^[\$\%\@\*\-]$/) )
|
|
{ $isRegexp = 1; }
|
|
|
|
elsif ($tokens->[$$indexRef] eq '/' && !$self->IsStringed($$indexRef))
|
|
{
|
|
# This is a bit of a hack. If we find a random slash, it could be a divide operator or a bare regexp. Find the first previous
|
|
# non-whitespace token and if it's text, a closing brace, or a string, assume it's a divide operator. (Strings don't make
|
|
# much pratical sense there but a regexp would be impossible.) Otherwise assume it's a regexp.
|
|
|
|
# We make a special consideration for split() appearing without parenthesis. If the previous token is split and it's not a
|
|
# variable, assume it is a regexp even though it fails the above test.
|
|
|
|
my $index = $$indexRef - 1;
|
|
|
|
while ($index >= 0 && $tokens->[$index] =~ /^(?: |\t|\n)/)
|
|
{ $index--; };
|
|
|
|
if ($index < 0 || $tokens->[$index] !~ /^[a-zA-Z0-9_\)\]\}\'\"\`]/ ||
|
|
($tokens->[$index] =~ /^split|grep$/ && $index > 0 && $tokens->[$index-1] !~ /^[\$\%\@\*]$/) )
|
|
{ $isRegexp = 1; };
|
|
};
|
|
|
|
if ($isRegexp)
|
|
{
|
|
my $operator = lc($tokens->[$$indexRef]);
|
|
my $index = $$indexRef;
|
|
my $lineNumber = $$lineNumberRef;
|
|
|
|
if ($operator =~ /^[\?\/]/)
|
|
{ $operator = 'm'; }
|
|
else
|
|
{
|
|
$index++;
|
|
|
|
# Believe it or not, s#...# is allowed. We can't pass over number signs here.
|
|
if ($tokens->[$index] ne '#')
|
|
{ $self->TryToSkipWhitespace(\$index, \$lineNumber); };
|
|
};
|
|
|
|
if ($tokens->[$index] =~ /^\w/)
|
|
{ return undef; };
|
|
if ($tokens->[$index] eq '=' && $tokens->[$index+1] eq '>')
|
|
{ return undef; };
|
|
|
|
my $openingSymbol = $tokens->[$index];
|
|
my $closingSymbol;
|
|
|
|
if ($openingSymbol eq '{')
|
|
{ $closingSymbol = '}'; }
|
|
elsif ($openingSymbol eq '(')
|
|
{ $closingSymbol = ')'; }
|
|
elsif ($openingSymbol eq '[')
|
|
{ $closingSymbol = ']'; }
|
|
elsif ($openingSymbol eq '<')
|
|
{ $closingSymbol = '>'; }
|
|
else
|
|
{ $closingSymbol = $openingSymbol; };
|
|
|
|
$index++;
|
|
|
|
$self->GenericRegexpSkipUntilAfter(\$index, \$lineNumber, $closingSymbol);
|
|
|
|
$$indexRef = $index;
|
|
$$lineNumberRef = $lineNumber;
|
|
|
|
if ($operator =~ /^(?:s|tr|y)$/)
|
|
{
|
|
if ($openingSymbol ne $closingSymbol)
|
|
{
|
|
$self->TryToSkipWhitespace($indexRef, $lineNumberRef);
|
|
|
|
$openingSymbol = $tokens->[$index];
|
|
|
|
if ($openingSymbol eq '{')
|
|
{ $closingSymbol = '}'; }
|
|
elsif ($openingSymbol eq '(')
|
|
{ $closingSymbol = ')'; }
|
|
elsif ($openingSymbol eq '[')
|
|
{ $closingSymbol = ']'; }
|
|
elsif ($openingSymbol eq '<')
|
|
{ $closingSymbol = '>'; }
|
|
else
|
|
{ $closingSymbol = $openingSymbol; };
|
|
|
|
$$indexRef++;
|
|
};
|
|
|
|
if ($operator eq 's')
|
|
{
|
|
$self->GenericSkipUntilAfter($indexRef, $lineNumberRef, $closingSymbol, 1);
|
|
}
|
|
else # ($operator eq 'tr' || $operator eq 'y')
|
|
{
|
|
while ($$indexRef < scalar @$tokens &&
|
|
($tokens->[$$indexRef] ne $closingSymbol || $self->IsBackslashed($$indexRef)) )
|
|
{
|
|
if ($tokens->[$$indexRef] eq "\n")
|
|
{ $$lineNumberRef++; };
|
|
$$indexRef++;
|
|
};
|
|
|
|
$$indexRef++;
|
|
};
|
|
};
|
|
|
|
# We want to skip any letters after the regexp. Otherwise something like tr/a/b/s; could have the trailing s; interpreted
|
|
# as another regexp. Whitespace is not allowed between the closing symbol and the letters.
|
|
|
|
if ($tokens->[$$indexRef] =~ /^[a-z]/i)
|
|
{ $$indexRef++; };
|
|
|
|
return 1;
|
|
};
|
|
|
|
return undef;
|
|
};
|
|
|
|
|
|
|
|
###############################################################################
|
|
# Group: Support Functions
|
|
|
|
|
|
#
|
|
# Function: IsStringed
|
|
#
|
|
# Returns whether the position is after a string (dollar sign) character. Returns false if it's preceded by two dollar signs so
|
|
# "if ($x == $$)" doesn't skip the closing parenthesis as stringed.
|
|
#
|
|
# Parameters:
|
|
#
|
|
# index - The index of the postition.
|
|
#
|
|
sub IsStringed #(index)
|
|
{
|
|
my ($self, $index) = @_;
|
|
my $tokens = $self->Tokens();
|
|
|
|
if ($index > 0 && $tokens->[$index - 1] eq '$' && !($index > 1 && $tokens->[$index - 2] eq '$'))
|
|
{ return 1; }
|
|
else
|
|
{ return undef; };
|
|
};
|
|
|
|
|
|
1;
|