############################################################################### # # Class: NaturalDocs::Languages::ActionScript # ############################################################################### # # A subclass to handle the language variations of Flash ActionScript. # ############################################################################### # This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure # Natural Docs is licensed under the GPL use strict; use integer; package NaturalDocs::Languages::ActionScript; use base 'NaturalDocs::Languages::Advanced'; ################################################################################ # Group: Constants and Types # # Constants: XML Tag Type # # XML_OPENING_TAG - The tag is an opening one, such as . # XML_CLOSING_TAG - The tag is a closing one, such as . # XML_SELF_CONTAINED_TAG - The tag is self contained, such as . # use constant XML_OPENING_TAG => 1; use constant XML_CLOSING_TAG => 2; use constant XML_SELF_CONTAINED_TAG => 3; ################################################################################ # Group: Package Variables # # hash: classModifiers # An existence hash of all the acceptable class modifiers. The keys are in all lowercase. # my %classModifiers = ( 'dynamic' => 1, 'intrinsic' => 1, 'final' => 1, 'internal' => 1, 'public' => 1 ); # # hash: memberModifiers # An existence hash of all the acceptable class member modifiers. The keys are in all lowercase. # my %memberModifiers = ( 'public' => 1, 'private' => 1, 'protected' => 1, 'static' => 1, 'internal' => 1, 'override' => 1 ); # # hash: declarationEnders # An existence hash of all the tokens that can end a declaration. This is important because statements don't require a semicolon # to end. The keys are in all lowercase. # my %declarationEnders = ( ';' => 1, '}' => 1, '{' => 1, 'public' => 1, 'private' => 1, 'protected' => 1, 'static' => 1, 'internal' => 1, 'dynamic' => 1, 'intrinsic' => 1, 'final' => 1, 'override' => 1, 'class' => 1, 'interface' => 1, 'var' => 1, 'function' => 1, 'const' => 1, 'namespace' => 1, 'import' => 1 ); # # var: isEscaped # Whether the current file being parsed uses escapement. # my $isEscaped; ################################################################################ # Group: Interface Functions # # Function: PackageSeparator # Returns the package separator symbol. # sub PackageSeparator { return '.'; }; # # Function: EnumValues # Returns the that describes how the language handles enums. # sub EnumValues { return ::ENUM_GLOBAL(); }; # # Function: ParseParameterLine # Parses a prototype parameter line and returns it as a object. # sub ParseParameterLine #(line) { my ($self, $line) = @_; if ($line =~ /^ ?\.\.\.\ (.+)$/) { # This puts them in the wrong fields as $1 should be the name and ... should be the type. However, this is necessary # because the order in the source is reversed from other parameter declarations and it's more important for the output # to match the source. return NaturalDocs::Languages::Prototype::Parameter->New($1, undef, '...', undef, undef, undef); } else { return $self->ParsePascalParameterLine($line); }; }; # # Function: TypeBeforeParameter # Returns whether the type appears before the parameter in prototypes. # sub TypeBeforeParameter { return 0; }; # # Function: PreprocessFile # # If the file is escaped, strips out all unescaped code. Will translate any unescaped comments into comments surrounded by # "\x1C\x1D\x1E\x1F" and "\x1F\x1E\x1D" characters, so chosen because they are the same character lengths as # and will not appear in normal code. # sub PreprocessFile { my ($self, $lines) = @_; if (!$isEscaped) { return; }; use constant MODE_UNESCAPED_REGULAR => 1; use constant MODE_UNESCAPED_PI => 2; use constant MODE_UNESCAPED_CDATA => 3; use constant MODE_UNESCAPED_COMMENT => 4; use constant MODE_ESCAPED_UNKNOWN_CDATA => 5; use constant MODE_ESCAPED_CDATA => 6; use constant MODE_ESCAPED_NO_CDATA => 7; my $mode = MODE_UNESCAPED_REGULAR; for (my $i = 0; $i < scalar @$lines; $i++) { my @tokens = split(/(<[ \t]*\/?[ \t]*mx:Script[^>]*>|<\?|\?>|<\!--|-->|<\!\[CDATA\[|\]\]\>)/, $lines->[$i]); my $newLine; foreach my $token (@tokens) { if ($mode == MODE_UNESCAPED_REGULAR) { if ($token eq '') { $mode = MODE_UNESCAPED_REGULAR; }; } elsif ($mode == MODE_UNESCAPED_CDATA) { if ($token eq ']]>') { $mode = MODE_UNESCAPED_REGULAR; }; } elsif ($mode == MODE_UNESCAPED_COMMENT) { if ($token eq '-->') { $mode = MODE_UNESCAPED_REGULAR; $newLine .= "\x1F\x1E\x1D"; } else { $newLine .= $token; }; } elsif ($mode == MODE_ESCAPED_UNKNOWN_CDATA) { if ($token eq '') { $mode = MODE_UNESCAPED_REGULAR; $newLine .= '; '; } else { $newLine .= $token; }; } else #($mode == MODE_ESCAPED_NO_CDATA) { if ($token =~ /^<[ \t]*\/[ \t]*mx:Script/) { $mode = MODE_UNESCAPED_REGULAR; $newLine .= '; '; } else { $newLine .= $token; }; }; }; $lines->[$i] = $newLine; }; }; # # Function: ParseFile # # Parses the passed source file, sending comments acceptable for documentation to OnComment()>. # # Parameters: # # sourceFile - The to parse. # topicList - A reference to the list of being built by the file. # # Returns: # # The array ( autoTopics, scopeRecord ). # # autoTopics - An arrayref of automatically generated topics from the file, or undef if none. # scopeRecord - An arrayref of , or undef if none. # sub ParseFile #(sourceFile, topicsList) { my ($self, $sourceFile, $topicsList) = @_; # The \x1# comment symbols are inserted by PreprocessFile() to stand in for XML comments in escaped files. my @parseParameters = ( [ '//' ], [ '/*', '*/', "\x1C\x1D\x1E\x1F", "\x1F\x1E\x1D" ], [ '///' ], [ '/**', '*/' ] ); my $extension = lc(NaturalDocs::File->ExtensionOf($sourceFile)); $isEscaped = ($extension eq 'mxml'); $self->ParseForCommentsAndTokens($sourceFile, @parseParameters); my $tokens = $self->Tokens(); my $index = 0; my $lineNumber = 1; while ($index < scalar @$tokens) { if ($self->TryToSkipWhitespace(\$index, \$lineNumber) || $self->TryToGetImport(\$index, \$lineNumber) || $self->TryToGetClass(\$index, \$lineNumber) || $self->TryToGetFunction(\$index, \$lineNumber) || $self->TryToGetVariable(\$index, \$lineNumber) ) { # The functions above will handle everything. } elsif ($tokens->[$index] eq '{') { $self->StartScope('}', $lineNumber, undef, undef, undef); $index++; } elsif ($tokens->[$index] eq '}') { if ($self->ClosingScopeSymbol() eq '}') { $self->EndScope($lineNumber); }; $index++; } else { $self->SkipToNextStatement(\$index, \$lineNumber); }; }; # Don't need to keep these around. $self->ClearTokens(); my $autoTopics = $self->AutoTopics(); my $scopeRecord = $self->ScopeRecord(); if (defined $scopeRecord && !scalar @$scopeRecord) { $scopeRecord = undef; }; return ( $autoTopics, $scopeRecord ); }; ################################################################################ # Group: Statement Parsing Functions # All functions here assume that the current position is at the beginning of a statement. # # Note for developers: I am well aware that the code in these functions do not check if we're past the end of the tokens as # often as it should. We're making use of the fact that Perl will always return undef in these cases to keep the code simpler. # # Function: TryToGetIdentifier # # Determines whether the position is at an identifier, and if so, skips it and returns the complete identifier as a string. Returns # undef otherwise. # # Parameters: # # indexRef - A reference to the current token index. # lineNumberRef - A reference to the current line number. # allowStar - If set, allows the last identifier to be a star. # sub TryToGetIdentifier #(indexRef, lineNumberRef, allowStar) { my ($self, $indexRef, $lineNumberRef, $allowStar) = @_; my $tokens = $self->Tokens(); my $index = $$indexRef; use constant MODE_IDENTIFIER_START => 1; use constant MODE_IN_IDENTIFIER => 2; use constant MODE_AFTER_STAR => 3; my $identifier; my $mode = MODE_IDENTIFIER_START; while ($index < scalar @$tokens) { if ($mode == MODE_IDENTIFIER_START) { if ($tokens->[$index] =~ /^[a-z\$\_]/i) { $identifier .= $tokens->[$index]; $index++; $mode = MODE_IN_IDENTIFIER; } elsif ($allowStar && $tokens->[$index] eq '*') { $identifier .= '*'; $index++; $mode = MODE_AFTER_STAR; } else { return undef; }; } elsif ($mode == MODE_IN_IDENTIFIER) { if ($tokens->[$index] eq '.') { $identifier .= '.'; $index++; $mode = MODE_IDENTIFIER_START; } elsif ($tokens->[$index] =~ /^[a-z0-9\$\_]/i) { $identifier .= $tokens->[$index]; $index++; } else { last; }; } else #($mode == MODE_AFTER_STAR) { if ($tokens->[$index] =~ /^[a-z0-9\$\_\.]/i) { return undef; } else { last; }; }; }; # We need to check again because we may have run out of tokens after a dot. if ($mode != MODE_IDENTIFIER_START) { $$indexRef = $index; return $identifier; } else { return undef; }; }; # # Function: TryToGetImport # # Determines whether the position is at a import statement, and if so, adds it as a Using statement to the current scope, skips # it, and returns true. # sub TryToGetImport #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); my $index = $$indexRef; my $lineNumber = $$lineNumberRef; if ($tokens->[$index] ne 'import') { return undef; }; $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); my $identifier = $self->TryToGetIdentifier(\$index, \$lineNumber, 1); if (!$identifier) { return undef; }; # Currently we implement importing by stripping the last package level and treating it as a using. So "import p1.p2.p3" makes # p1.p2 the using path, which is over-tolerant but that's okay. "import p1.p2.*" is treated the same way, but in this case it's # not over-tolerant. If there's no dot, there's no point to including it. if (index($identifier, '.') != -1) { $identifier =~ s/\.[^\.]+$//; $self->AddUsing( NaturalDocs::SymbolString->FromText($identifier) ); }; $$indexRef = $index; $$lineNumberRef = $lineNumber; return 1; }; # # Function: TryToGetClass # # Determines whether the position is at a class declaration statement, and if so, generates a topic for it, skips it, and # returns true. # # Supported Syntaxes: # # - Classes # - Interfaces # - Classes and interfaces with _global # sub TryToGetClass #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); my $index = $$indexRef; my $lineNumber = $$lineNumberRef; my @modifiers; while ($tokens->[$index] =~ /^[a-z]/i && exists $classModifiers{lc($tokens->[$index])} ) { push @modifiers, lc($tokens->[$index]); $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); }; my $type; if ($tokens->[$index] eq 'class' || $tokens->[$index] eq 'interface') { $type = $tokens->[$index]; $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); } else { return undef; }; my $className = $self->TryToGetIdentifier(\$index, \$lineNumber); if (!$className) { return undef; }; $self->TryToSkipWhitespace(\$index, \$lineNumber); my @parents; if ($tokens->[$index] eq 'extends') { $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); my $parent = $self->TryToGetIdentifier(\$index, \$lineNumber); if (!$parent) { return undef; }; push @parents, $parent; $self->TryToSkipWhitespace(\$index, \$lineNumber); }; if ($type eq 'class' && $tokens->[$index] eq 'implements') { $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); for (;;) { my $parent = $self->TryToGetIdentifier(\$index, \$lineNumber); if (!$parent) { return undef; }; push @parents, $parent; $self->TryToSkipWhitespace(\$index, \$lineNumber); if ($tokens->[$index] ne ',') { last; } else { $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); }; }; }; if ($tokens->[$index] ne '{') { return undef; }; $index++; # If we made it this far, we have a valid class declaration. my $topicType; if ($type eq 'interface') { $topicType = ::TOPIC_INTERFACE(); } else { $topicType = ::TOPIC_CLASS(); }; $className =~ s/^_global.//; my $autoTopic = NaturalDocs::Parser::ParsedTopic->New($topicType, $className, undef, $self->CurrentUsing(), undef, undef, undef, $$lineNumberRef); $self->AddAutoTopic($autoTopic); NaturalDocs::Parser->OnClass($autoTopic->Package()); foreach my $parent (@parents) { NaturalDocs::Parser->OnClassParent($autoTopic->Package(), NaturalDocs::SymbolString->FromText($parent), undef, $self->CurrentUsing(), ::RESOLVE_ABSOLUTE()); }; $self->StartScope('}', $lineNumber, $autoTopic->Package()); $$indexRef = $index; $$lineNumberRef = $lineNumber; return 1; }; # # Function: TryToGetFunction # # Determines if the position is on a function declaration, and if so, generates a topic for it, skips it, and returns true. # # Supported Syntaxes: # # - Functions # - Constructors # - Properties # - Functions with _global # - Functions with namespaces # sub TryToGetFunction #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); my $index = $$indexRef; my $lineNumber = $$lineNumberRef; my $startIndex = $index; my $startLine = $lineNumber; my @modifiers; my $namespace; while ($tokens->[$index] =~ /^[a-z]/i) { if ($tokens->[$index] eq 'function') { last; } elsif (exists $memberModifiers{lc($tokens->[$index])}) { push @modifiers, lc($tokens->[$index]); $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); } elsif (!$namespace) { do { $namespace .= $tokens->[$index]; $index++; } while ($tokens->[$index] =~ /^[a-z0-9_]/i); $self->TryToSkipWhitespace(\$index, \$lineNumber); } else { last; }; }; if ($tokens->[$index] ne 'function') { return undef; }; $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); my $type; if ($tokens->[$index] eq 'get' || $tokens->[$index] eq 'set') { # This can either be a property ("function get Something()") or a function name ("function get()"). my $nextIndex = $index; my $nextLineNumber = $lineNumber; $nextIndex++; $self->TryToSkipWhitespace(\$nextIndex, \$nextLineNumber); if ($tokens->[$nextIndex] eq '(') { $type = ::TOPIC_FUNCTION(); # Ignore the movement and let the code ahead pick it up as the name. } else { $type = ::TOPIC_PROPERTY(); $index = $nextIndex; $lineNumber = $nextLineNumber; }; } else { $type = ::TOPIC_FUNCTION(); }; my $name = $self->TryToGetIdentifier(\$index, \$lineNumber); if (!$name) { return undef; }; $self->TryToSkipWhitespace(\$index, \$lineNumber); if ($tokens->[$index] ne '(') { return undef; }; $index++; $self->GenericSkipUntilAfter(\$index, \$lineNumber, ')'); $self->TryToSkipWhitespace(\$index, \$lineNumber); if ($tokens->[$index] eq ':') { $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); $self->TryToGetIdentifier(\$index, \$lineNumber, 1); $self->TryToSkipWhitespace(\$index, \$lineNumber); }; my $prototype = $self->NormalizePrototype( $self->CreateString($startIndex, $index) ); if ($tokens->[$index] eq '{') { $self->GenericSkip(\$index, \$lineNumber); } elsif (!exists $declarationEnders{$tokens->[$index]}) { return undef; }; my $scope = $self->CurrentScope(); if ($name =~ s/^_global.//) { $scope = undef; }; if ($namespace) { $scope = NaturalDocs::SymbolString->Join($scope, $namespace); }; $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New($type, $name, $scope, $self->CurrentUsing(), $prototype, undef, undef, $startLine)); # We succeeded if we got this far. $$indexRef = $index; $$lineNumberRef = $lineNumber; return 1; }; # # Function: TryToGetVariable # # Determines if the position is on a variable declaration statement, and if so, generates a topic for each variable, skips the # statement, and returns true. # # Supported Syntaxes: # # - Variables # - Variables with _global # - Variables with type * (untyped) # - Constants # - Variables and constants with namespaces # sub TryToGetVariable #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); my $index = $$indexRef; my $lineNumber = $$lineNumberRef; my $startIndex = $index; my $startLine = $lineNumber; my @modifiers; my $namespace; while ($tokens->[$index] =~ /^[a-z]/i) { if ($tokens->[$index] eq 'var' || $tokens->[$index] eq 'const') { last; } elsif (exists $memberModifiers{lc($tokens->[$index])}) { push @modifiers, lc($tokens->[$index]); $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); } elsif (!$namespace) { do { $namespace .= $tokens->[$index]; $index++; } while ($tokens->[$index] =~ /^[a-z0-9_]/i); $self->TryToSkipWhitespace(\$index, \$lineNumber); } else { last; }; }; my $type; if ($tokens->[$index] eq 'var') { $type = ::TOPIC_VARIABLE(); } elsif ($tokens->[$index] eq 'const') { $type = ::TOPIC_CONSTANT(); } else { return undef; }; $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); my $endTypeIndex = $index; my @names; my @types; for (;;) { my $name = $self->TryToGetIdentifier(\$index, \$lineNumber); if (!$name) { return undef; }; $self->TryToSkipWhitespace(\$index, \$lineNumber); my $type; if ($tokens->[$index] eq ':') { $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); $type = ': ' . $self->TryToGetIdentifier(\$index, \$lineNumber, 1); $self->TryToSkipWhitespace(\$index, \$lineNumber); }; if ($tokens->[$index] eq '=') { do { $self->GenericSkip(\$index, \$lineNumber); } while ($tokens->[$index] ne ',' && !exists $declarationEnders{$tokens->[$index]} && $index < scalar @$tokens); }; push @names, $name; push @types, $type; if ($tokens->[$index] eq ',') { $index++; $self->TryToSkipWhitespace(\$index, \$lineNumber); } elsif (exists $declarationEnders{$tokens->[$index]}) { last; } else { return undef; }; }; # We succeeded if we got this far. my $prototypePrefix = $self->CreateString($startIndex, $endTypeIndex); for (my $i = 0; $i < scalar @names; $i++) { my $prototype = $self->NormalizePrototype( $prototypePrefix . ' ' . $names[$i] . $types[$i]); my $scope = $self->CurrentScope(); if ($names[$i] =~ s/^_global.//) { $scope = undef; }; if ($namespace) { $scope = NaturalDocs::SymbolString->Join($scope, $namespace); }; $self->AddAutoTopic(NaturalDocs::Parser::ParsedTopic->New($type, $names[$i], $scope, $self->CurrentUsing(), $prototype, undef, undef, $startLine)); }; $$indexRef = $index; $$lineNumberRef = $lineNumber; return 1; }; ################################################################################ # Group: Low Level Parsing Functions # # Function: GenericSkip # # Advances the position one place through general code. # # - If the position is on a string, it will skip it completely. # - If the position is on an opening symbol, it will skip until the past the closing symbol. # - If the position is on whitespace (including comments), it will skip it completely. # - Otherwise it skips one token. # # Parameters: # # indexRef - A reference to the current index. # lineNumberRef - A reference to the current line number. # sub GenericSkip #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); # We can ignore the scope stack because we're just skipping everything without parsing, and we need recursion anyway. if ($tokens->[$$indexRef] eq '{') { $$indexRef++; $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); } elsif ($tokens->[$$indexRef] eq '(') { $$indexRef++; $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ')'); } elsif ($tokens->[$$indexRef] eq '[') { $$indexRef++; $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, ']'); } elsif ($self->TryToSkipWhitespace($indexRef, $lineNumberRef) || $self->TryToSkipString($indexRef, $lineNumberRef) || $self->TryToSkipRegExp($indexRef, $lineNumberRef) || $self->TryToSkipXML($indexRef, $lineNumberRef) ) { } else { $$indexRef++; }; }; # # Function: GenericSkipUntilAfter # # Advances the position via until a specific token is reached and passed. # sub GenericSkipUntilAfter #(indexRef, lineNumberRef, token) { my ($self, $indexRef, $lineNumberRef, $token) = @_; my $tokens = $self->Tokens(); while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne $token) { $self->GenericSkip($indexRef, $lineNumberRef); }; if ($tokens->[$$indexRef] eq "\n") { $$lineNumberRef++; }; $$indexRef++; }; # # Function: IndiscriminateSkipUntilAfterSequence # # Advances the position indiscriminately until a specific token sequence is reached and passed. # sub IndiscriminateSkipUntilAfterSequence #(indexRef, lineNumberRef, token, token, ...) { my ($self, $indexRef, $lineNumberRef, @sequence) = @_; my $tokens = $self->Tokens(); while ($$indexRef < scalar @$tokens && !$self->IsAtSequence($$indexRef, @sequence)) { if ($tokens->[$$indexRef] eq "\n") { $$lineNumberRef++; }; $$indexRef++; }; if ($self->IsAtSequence($$indexRef, @sequence)) { $$indexRef += scalar @sequence; foreach my $token (@sequence) { if ($token eq "\n") { $$lineNumberRef++; }; }; }; }; # # Function: SkipToNextStatement # # Advances the position via until the next statement, which is defined as anything in not # appearing in brackets or strings. It will always advance at least one token. # sub SkipToNextStatement #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); if ($tokens->[$$indexRef] eq ';') { $$indexRef++; } else { do { $self->GenericSkip($indexRef, $lineNumberRef); } while ( $$indexRef < scalar @$tokens && !exists $declarationEnders{$tokens->[$$indexRef]} ); }; }; # # Function: TryToSkipRegExp # If the current position is on a regular expression, skip past it and return true. # sub TryToSkipRegExp #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); if ($tokens->[$$indexRef] eq '/') { # A slash can either start a regular expression or be a divide symbol. Skip backwards to see what the previous symbol is. my $index = $$indexRef - 1; while ($index >= 0 && $tokens->[$index] =~ /^(?: |\t|\n)/) { $index--; }; if ($index < 0 || $tokens->[$index] !~ /^\=\(\[\,]/) { return 0; }; $$indexRef++; while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne '/') { if ($tokens->[$$indexRef] eq '\\') { $$indexRef += 2; } elsif ($tokens->[$$indexRef] eq "\n") { $$indexRef++; $$lineNumberRef++; } else { $$indexRef++; } }; if ($$indexRef < scalar @$tokens) { $$indexRef++; if ($tokens->[$$indexRef] =~ /^[gimsx]+$/i) { $$indexRef++; }; }; return 1; } else { return 0; }; }; # # Function: TryToSkipXML # If the current position is on an XML literal, skip past it and return true. # sub TryToSkipXML #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); if ($tokens->[$$indexRef] eq '<') { # A < can either start an XML literal or be a comparison or shift operator. First check the next character for << or <=. my $index = $$indexRef + 1; while ($index < scalar @$tokens && $tokens->[$index] =~ /^[\=\<]$/) { return 0; }; # Next try the previous character. $index = $$indexRef - 1; while ($index >= 0 && $tokens->[$index] =~ /^[ |\t|\n]/) { $index--; }; if ($index < 0 || $tokens->[$index] !~ /^[\=\(\[\,\>]/) { return 0; }; } else { return 0; }; # Only handle the tag here if it's not an irregular XML section. if (!$self->TryToSkipIrregularXML($indexRef, $lineNumberRef)) { my @tagStack; my ($tagType, $tagIdentifier) = $self->GetAndSkipXMLTag($indexRef, $lineNumberRef); if ($tagType == XML_OPENING_TAG) { push @tagStack, $tagIdentifier; }; while (scalar @tagStack && $$indexRef < scalar @$tokens) { $self->SkipToNextXMLTag($indexRef, $lineNumberRef); ($tagType, $tagIdentifier) = $self->GetAndSkipXMLTag($indexRef, $lineNumberRef); if ($tagType == XML_OPENING_TAG) { push @tagStack, $tagIdentifier; } elsif ($tagType == XML_CLOSING_TAG && $tagIdentifier eq $tagStack[-1]) { pop @tagStack; }; }; }; return 1; }; # # Function: TryToSkipIrregularXML # # If the current position is on an irregular XML tag, skip past it and return true. Irregular XML tags are defined as # # CDATA - # Comments - # PI - # sub TryToSkipIrregularXML #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; if ($self->IsAtSequence($$indexRef, '<', '!', '[', 'CDATA', '[')) { $$indexRef += 5; $self->IndiscriminateSkipUntilAfterSequence($indexRef, $lineNumberRef, ']', ']', '>'); return 1; } elsif ($self->IsAtSequence($$indexRef, '<', '!', '-', '-')) { $$indexRef += 4; $self->IndiscriminateSkipUntilAfterSequence($indexRef, $lineNumberRef, '-', '-', '>'); return 1; } elsif ($self->IsAtSequence($$indexRef, '<', '?')) { $$indexRef += 2; $self->IndiscriminateSkipUntilAfterSequence($indexRef, $lineNumberRef, '?', '>'); return 1; } else { return 0; }; }; # # Function: GetAndSkipXMLTag # # Processes the XML tag at the current position, moves beyond it, and returns information about it. Assumes the position is on # the opening angle bracket of the tag and the tag is a normal XML tag, not one of the ones handled by # . # # Parameters: # # indexRef - A reference to the index of the position of the opening angle bracket. # lineNumberRef - A reference to the line number of the position of the opening angle bracket. # # Returns: # # The array ( tagType, name ). # # tagType - One of the constants. # identifier - The identifier of the tag. If it's an empty tag (<> or ), this will be "(anonymous)". # sub GetAndSkipXMLTag #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); if ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] ne '<') { die "Tried to call GetXMLTag when the position isn't on an opening bracket."; }; # Get the anonymous ones out of the way so we don't have to worry about them below, since they're rather exceptional. if ($self->IsAtSequence($$indexRef, '<', '>')) { $$indexRef += 2; return ( XML_OPENING_TAG, '(anonymous)' ); } elsif ($self->IsAtSequence($$indexRef, '<', '/', '>')) { $$indexRef += 3; return ( XML_CLOSING_TAG, '(anonymous)' ); }; # Grab the identifier. my $tagType = XML_OPENING_TAG; my $identifier; $$indexRef++; if ($tokens->[$$indexRef] eq '/') { $$indexRef++; $tagType = XML_CLOSING_TAG; }; $self->TryToSkipXMLWhitespace($indexRef, $lineNumberRef); # The identifier could be a native expression in braces. if ($tokens->[$$indexRef] eq '{') { my $startOfIdentifier = $$indexRef; $$indexRef++; $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); $identifier = $self->CreateString($startOfIdentifier, $$indexRef); } # Otherwise just grab content until whitespace or the end of the tag. else { while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] !~ /^[\/\>\ \t]$/) { $identifier .= $tokens->[$$indexRef]; $$indexRef++; }; }; # Skip to the end of the tag. while ($$indexRef < scalar @$tokens && $tokens->[$$indexRef] !~ /^[\/\>]$/) { if ($tokens->[$$indexRef] eq '{') { $$indexRef++; $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); } elsif ($self->TryToSkipXMLWhitespace($indexRef, $lineNumberRef)) { } # We don't need to do special handling for attribute quotes or anything like that because there's no backslashing in # XML. It's all handled with entity characters. else { $$indexRef++; }; }; if ($tokens->[$$indexRef] eq '/') { if ($tagType == XML_OPENING_TAG) { $tagType = XML_SELF_CONTAINED_TAG; }; $$indexRef++; }; if ($tokens->[$$indexRef] eq '>') { $$indexRef++; }; if (!$identifier) { $identifier = '(anonymous)'; }; return ( $tagType, $identifier ); }; # # Function: SkipToNextXMLTag # Skips to the next normal XML tag. It will not stop at elements handled by . Note that if the # position is already at an XML tag, it will not move. # sub SkipToNextXMLTag #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); while ($$indexRef < scalar @$tokens) { if ($tokens->[$$indexRef] eq '{') { $$indexRef++; $self->GenericSkipUntilAfter($indexRef, $lineNumberRef, '}'); } elsif ($self->TryToSkipIrregularXML($indexRef, $lineNumberRef)) { } elsif ($tokens->[$$indexRef] eq '<') { last; } else { if ($tokens->[$$indexRef] eq "\n") { $$lineNumberRef++; }; $$indexRef++; }; }; }; # # Function: TryToSkipXMLWhitespace # If the current position is on XML whitespace, skip past it and return true. # sub TryToSkipXMLWhitespace #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); my $result; while ($$indexRef < scalar @$tokens) { if ($tokens->[$$indexRef] =~ /^[ \t]/) { $$indexRef++; $result = 1; } elsif ($tokens->[$$indexRef] eq "\n") { $$indexRef++; $$lineNumberRef++; $result = 1; } else { last; }; }; return $result; }; # # Function: TryToSkipString # If the current position is on a string delimiter, skip past the string and return true. # # Parameters: # # indexRef - A reference to the index of the position to start at. # lineNumberRef - A reference to the line number of the position. # # Returns: # # Whether the position was at a string. # # Syntax Support: # # - Supports quotes and apostrophes. # sub TryToSkipString #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; return ($self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '\'') || $self->SUPER::TryToSkipString($indexRef, $lineNumberRef, '"') ); }; # # Function: TryToSkipWhitespace # If the current position is on a whitespace token, a line break token, or a comment, it skips them and returns true. If there are # a number of these in a row, it skips them all. # sub TryToSkipWhitespace #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); my $result; while ($$indexRef < scalar @$tokens) { if ($tokens->[$$indexRef] =~ /^[ \t]/) { $$indexRef++; $result = 1; } elsif ($tokens->[$$indexRef] eq "\n") { $$indexRef++; $$lineNumberRef++; $result = 1; } elsif ($self->TryToSkipComment($indexRef, $lineNumberRef)) { $result = 1; } else { last; }; }; return $result; }; # # Function: TryToSkipComment # If the current position is on a comment, skip past it and return true. # sub TryToSkipComment #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; return ( $self->TryToSkipLineComment($indexRef, $lineNumberRef) || $self->TryToSkipMultilineComment($indexRef, $lineNumberRef) ); }; # # Function: TryToSkipLineComment # If the current position is on a line comment symbol, skip past it and return true. # sub TryToSkipLineComment #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); if ($tokens->[$$indexRef] eq '/' && $tokens->[$$indexRef+1] eq '/') { $self->SkipRestOfLine($indexRef, $lineNumberRef); return 1; } else { return undef; }; }; # # Function: TryToSkipMultilineComment # If the current position is on an opening comment symbol, skip past it and return true. # sub TryToSkipMultilineComment #(indexRef, lineNumberRef) { my ($self, $indexRef, $lineNumberRef) = @_; my $tokens = $self->Tokens(); if ($tokens->[$$indexRef] eq '/' && $tokens->[$$indexRef+1] eq '*') { $self->SkipUntilAfter($indexRef, $lineNumberRef, '*', '/'); return 1; } else { return undef; }; }; 1;