ddnet/docs/tool/Modules/NaturalDocs/Languages/Simple.pm

###############################################################################
#
#   Class: NaturalDocs::Languages::Simple
#
###############################################################################
#
#   A class containing the characteristics of a particular programming language for basic support within Natural Docs.
#   Also serves as a base class for languages that break from general conventions, such as not having parameter lists use
#   parenthesis and commas.
#
###############################################################################

# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
# Natural Docs is licensed under the GPL

use strict;
use integer;

package NaturalDocs::Languages::Simple;

use base 'NaturalDocs::Languages::Base';
use base 'Exporter';

our @EXPORT = ( 'ENDER_ACCEPT', 'ENDER_IGNORE', 'ENDER_ACCEPT_AND_CONTINUE', 'ENDER_REVERT_TO_ACCEPTED' );


use NaturalDocs::DefineMembers 'LINE_COMMENT_SYMBOLS', 'LineCommentSymbols()', 'SetLineCommentSymbols() duparrayref',
                                                 'BLOCK_COMMENT_SYMBOLS', 'BlockCommentSymbols()',
                                                                                              'SetBlockCommentSymbols() duparrayref',
                                                 'PROTOTYPE_ENDERS',
                                                 'LINE_EXTENDER', 'LineExtender()', 'SetLineExtender()',
                                                 'PACKAGE_SEPARATOR', 'PackageSeparator()',
                                                 'PACKAGE_SEPARATOR_WAS_SET', 'PackageSeparatorWasSet()',
                                                 'ENUM_VALUES', 'EnumValues()',
                                                 'ENUM_VALUES_WAS_SET', 'EnumValuesWasSet()';

#
#   Function: New
#
#   Creates and returns a new object.
#
#   Parameters:
#
#       name - The name of the language.
#
sub New #(name)
    {
    my ($selfPackage, $name) = @_;

    my $object = $selfPackage->SUPER::New($name);

    $object->[ENUM_VALUES] = ::ENUM_GLOBAL();
    $object->[PACKAGE_SEPARATOR] = '.';

    return $object;
    };


#
#   Functions: Members
#
#   LineCommentSymbols - Returns an arrayref of symbols that start a line comment, or undef if none.
#   SetLineCommentSymbols - Replaces the arrayref of symbols that start a line comment.
#   BlockCommentSymbols - Returns an arrayref of start/end symbol pairs that specify a block comment, or undef if none.  Pairs
#                                        are specified with two consecutive array entries.
#   SetBlockCommentSymbols - Replaces the arrayref of start/end symbol pairs that specify a block comment.  Pairs are
#                                             specified with two consecutive array entries.
#   LineExtender - Returns the symbol to ignore a line break in languages where line breaks are significant.
#   SetLineExtender - Replaces the symbol to ignore a line break in languages where line breaks are significant.
#   PackageSeparator - Returns the package separator symbol.
#   PackageSeparatorWasSet - Returns whether the package separator symbol was ever changed from the default.
#

#
#   Function: SetPackageSeparator
#   Replaces the language's package separator string.
#
sub SetPackageSeparator #(separator)
    {
    my ($self, $separator) = @_;
    $self->[PACKAGE_SEPARATOR] = $separator;
    $self->[PACKAGE_SEPARATOR_WAS_SET] = 1;
    };


#
#   Functions: Members
#
#   EnumValues - Returns the <EnumValuesType> that describes how the language handles enums.
#   EnumValuesWasSet - Returns whether <EnumValues> was ever changed from the default.


#
#   Function: SetEnumValues
#   Replaces the <EnumValuesType> that describes how the language handles enums.
#
sub SetEnumValues #(EnumValuesType newBehavior)
    {
    my ($self, $behavior) = @_;
    $self->[ENUM_VALUES] = $behavior;
    $self->[ENUM_VALUES_WAS_SET] = 1;
    };


#
#   Function: PrototypeEndersFor
#
#   Returns an arrayref of prototype ender symbols for the passed <TopicType>, or undef if none.
#
sub PrototypeEndersFor #(type)
    {
    my ($self, $type) = @_;

    if (defined $self->[PROTOTYPE_ENDERS])
        {  return $self->[PROTOTYPE_ENDERS]->{$type};  }
    else
        {  return undef;  };
    };


#
#   Function: SetPrototypeEndersFor
#
#   Replaces the arrayref of prototype ender symbols for the passed <TopicType>.
#
sub SetPrototypeEndersFor #(type, enders)
    {
    my ($self, $type, $enders) = @_;

    if (!defined $self->[PROTOTYPE_ENDERS])
        {  $self->[PROTOTYPE_ENDERS] = { };  };

    if (!defined $enders)
        {  delete $self->[PROTOTYPE_ENDERS]->{$type};  }
    else
        {
        $self->[PROTOTYPE_ENDERS]->{$type} = [ @$enders ];
        };
    };


###############################################################################
# Group: Parsing Functions


#
#   Function: ParseFile
#
#   Parses the passed source file, sending comments acceptable for documentation to <NaturalDocs::Parser->OnComment()>
#   and all other sections to <OnCode()>.
#
#   Parameters:
#
#       sourceFile - The <FileName> of the source file to parse.
#       topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
#
#   Returns:
#
#       Since this class cannot automatically document the code or generate a scope record, it always returns ( undef, undef ).
#
sub ParseFile #(sourceFile, topicsList)
    {
    my ($self, $sourceFile, $topicsList) = @_;

    open(SOURCEFILEHANDLE, '<' . $sourceFile)
        or die "Couldn't open input file " . $sourceFile . "\n";

    my @commentLines;
    my @codeLines;
    my $lastCommentTopicCount = 0;

    if ($self->Name() eq 'Text File')
        {
        my $line = <SOURCEFILEHANDLE>;

        # On the very first line, remove a Unicode BOM if present.  Information on it available at:
        # http://www.unicode.org/faq/utf_bom.html#BOM
        $line =~ s/^\xEF\xBB\xBF//;

        while ($line)
            {
            ::XChomp(\$line);
            push @commentLines, $line;
            $line = <SOURCEFILEHANDLE>;
            };

        NaturalDocs::Parser->OnComment(\@commentLines, 1);
        }

    else
        {
        my $line = <SOURCEFILEHANDLE>;
        my $lineNumber = 1;

        # On the very first line, remove a Unicode BOM if present.  Information on it available at:
        # http://www.unicode.org/faq/utf_bom.html#BOM
        $line =~ s/^\xEF\xBB\xBF//;

        while (defined $line)
            {
            ::XChomp(\$line);
            my $originalLine = $line;


            # Retrieve single line comments.  This leaves $line at the next line.

            if ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()))
                {
                do
                    {
                    push @commentLines, $line;
                    $line = <SOURCEFILEHANDLE>;

                    if (!defined $line)
                        {  goto EndDo;  };

                    ::XChomp(\$line);
                    }
                while ($self->StripOpeningSymbols(\$line, $self->LineCommentSymbols()));

                EndDo:  # I hate Perl sometimes.
                }


            # Retrieve multiline comments.  This leaves $line at the next line.

            elsif (my $closingSymbol = $self->StripOpeningBlockSymbols(\$line, $self->BlockCommentSymbols()))
                {
                # Note that it is possible for a multiline comment to start correctly but not end so.  We want those comments to stay in
                # the code.  For example, look at this prototype with this splint annotation:
                #
                # int get_array(integer_t id,
                #                    /*@out@*/ array_t array);
                #
                # The annotation starts correctly but doesn't end so because it is followed by code on the same line.

                my $lineRemainder;

                for (;;)
                    {
                    $lineRemainder = $self->StripClosingSymbol(\$line, $closingSymbol);

                    push @commentLines, $line;

                    #  If we found an end comment symbol...
                    if (defined $lineRemainder)
                        {  last;  };

                    $line = <SOURCEFILEHANDLE>;

                    if (!defined $line)
                        {  last;  };

                    ::XChomp(\$line);
                    };

                if ($lineRemainder !~ /^[ \t]*$/)
                    {
                    # If there was something past the closing symbol this wasn't an acceptable comment, so move the lines to code.
                    push @codeLines, @commentLines;
                    @commentLines = ( );
                    };

                $line = <SOURCEFILEHANDLE>;
                }


            # Otherwise just add it to the code.

            else
                {
                push @codeLines, $line;
                $line = <SOURCEFILEHANDLE>;
                };


            # If there were comments, send them to Parser->OnComment().

            if (scalar @commentLines)
                {
                # First process any code lines before the comment.
                if (scalar @codeLines)
                    {
                    $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
                    $lineNumber += scalar @codeLines;
                    @codeLines = ( );
                    };

                $lastCommentTopicCount = NaturalDocs::Parser->OnComment(\@commentLines, $lineNumber);
                $lineNumber += scalar @commentLines;
                @commentLines = ( );
                };

            };  # while (defined $line)


        # Clean up any remaining code.
        if (scalar @codeLines)
            {
            $self->OnCode(\@codeLines, $lineNumber, $topicsList, $lastCommentTopicCount);
            @codeLines = ( );
            };

        };

    close(SOURCEFILEHANDLE);

    return ( undef, undef );
    };


#
#   Function: OnCode
#
#   Called whenever a section of code is encountered by the parser.  Is used to find the prototype of the last topic created.
#
#   Parameters:
#
#       codeLines - The source code as an arrayref of lines.
#       codeLineNumber - The line number of the first line of code.
#       topicList - A reference to the list of <NaturalDocs::Parser::ParsedTopics> being built by the file.
#       lastCommentTopicCount - The number of Natural Docs topics that were created by the last comment.
#
sub OnCode #(codeLines, codeLineNumber, topicList, lastCommentTopicCount)
    {
    my ($self, $codeLines, $codeLineNumber, $topicList, $lastCommentTopicCount) = @_;

    if ($lastCommentTopicCount && defined $self->PrototypeEndersFor($topicList->[-1]->Type()))
        {
        my $lineIndex = 0;
        my $prototype;

        # Skip all blank lines before a prototype.
        while ($lineIndex < scalar @$codeLines && $codeLines->[$lineIndex] =~ /^[ \t]*$/)
            {  $lineIndex++;  };

        my @tokens;
        my $tokenIndex = 0;

        my @brackets;
        my $enders = $self->PrototypeEndersFor($topicList->[-1]->Type());

        # Add prototype lines until we reach the end of the prototype or the end of the code lines.
        while ($lineIndex < scalar @$codeLines)
            {
            my $line = $self->RemoveLineExtender($codeLines->[$lineIndex] . "\n");

            push @tokens, $line =~ /([^\(\)\[\]\{\}\<\>]+|.)/g;

            while ($tokenIndex < scalar @tokens)
                {
                # If we're not inside brackets, check for ender symbols.
                if (!scalar @brackets)
                    {
                    my $startingIndex = 0;
                    my $testPrototype;

                    for (;;)
                        {
                        my ($enderIndex, $ender) = ::FindFirstSymbol($tokens[$tokenIndex], $enders, $startingIndex);

                        if ($enderIndex == -1)
                            {  last;  }
                        else
                            {
                            # We do this here so we don't duplicate prototype for every single token.  Just the first time an ender symbol
                            # is found in one.
                            if (!defined $testPrototype)
                                {  $testPrototype = $prototype;  };

                            $testPrototype .= substr($tokens[$tokenIndex], $startingIndex, $enderIndex - $startingIndex);

                            my $enderResult;

                            # If the ender is all text and the character preceding or following it is as well, ignore it.
                            if ($ender =~ /^[a-z0-9]+$/i &&
                                ( ($enderIndex > 0 && substr($tokens[$tokenIndex], $enderIndex - 1, 1) =~ /^[a-z0-9_]$/i) ||
                                   substr($tokens[$tokenIndex], $enderIndex + length($ender), 1) =~ /^[a-z0-9_]$/i ) )
                                {  $enderResult = ENDER_IGNORE();  }
                            else
                                {  $enderResult = $self->OnPrototypeEnd($topicList->[-1]->Type(), \$testPrototype, $ender);  }

                            if ($enderResult == ENDER_IGNORE())
                                {
                                $testPrototype .= $ender;
                                $startingIndex = $enderIndex + length($ender);
                                }
                            elsif ($enderResult == ENDER_REVERT_TO_ACCEPTED())
                                {
                                return;
                                }
                            else # ENDER_ACCEPT || ENDER_ACCEPT_AND_CONTINUE
                                {
                                my $titleInPrototype = $topicList->[-1]->Title();

                                # Strip parenthesis so Function(2) and Function(int, int) will still match Function(anything).
                                $titleInPrototype =~ s/[\t ]*\([^\(]*$//;

                                if (index($testPrototype, $titleInPrototype) != -1)
                                    {
                                    $topicList->[-1]->SetPrototype( $self->NormalizePrototype($testPrototype) );
                                    };

                                if ($enderResult == ENDER_ACCEPT())
                                    {  return;  }
                                else # ENDER_ACCEPT_AND_CONTINUE
                                    {
                                    $testPrototype .= $ender;
                                    $startingIndex = $enderIndex + length($ender);
                                    };
                                };
                            };
                        };
                    }

                # If we are inside brackets, check for closing symbols.
                elsif ( ($tokens[$tokenIndex] eq ')' && $brackets[-1] eq '(') ||
                         ($tokens[$tokenIndex] eq ']' && $brackets[-1] eq '[') ||
                         ($tokens[$tokenIndex] eq '}' && $brackets[-1] eq '{') ||
                         ($tokens[$tokenIndex] eq '>' && $brackets[-1] eq '<') )
                    {
                    pop @brackets;
                    };

                # Check for opening brackets.
                if ($tokens[$tokenIndex] =~ /^[\(\[\{\<]$/)
                    {
                    push @brackets, $tokens[$tokenIndex];
                    };

                $prototype .= $tokens[$tokenIndex];
                $tokenIndex++;
                };

            $lineIndex++;
            };

        # If we got out of that while loop by running out of lines, there was no prototype.
        };
    };


use constant ENDER_ACCEPT => 1;
use constant ENDER_IGNORE => 2;
use constant ENDER_ACCEPT_AND_CONTINUE => 3;
use constant ENDER_REVERT_TO_ACCEPTED => 4;

#
#   Function: OnPrototypeEnd
#
#   Called whenever the end of a prototype is found so that there's a chance for derived classes to mark false positives.
#
#   Parameters:
#
#       type - The <TopicType> of the prototype.
#       prototypeRef - A reference to the prototype so far, minus the ender in dispute.
#       ender - The ender symbol.
#
#   Returns:
#
#       ENDER_ACCEPT - The ender is accepted and the prototype is finished.
#       ENDER_IGNORE - The ender is rejected and parsing should continue.  Note that the prototype will be rejected as a whole
#                                  if all enders are ignored before reaching the end of the code.
#       ENDER_ACCEPT_AND_CONTINUE - The ender is accepted so the prototype may stand as is.  However, the prototype might
#                                                          also continue on so continue parsing.  If there is no accepted ender between here and
#                                                          the end of the code this version will be accepted instead.
#       ENDER_REVERT_TO_ACCEPTED - The expedition from ENDER_ACCEPT_AND_CONTINUE failed.  Use the last accepted
#                                                        version and end parsing.
#
sub OnPrototypeEnd #(type, prototypeRef, ender)
    {
    return ENDER_ACCEPT();
    };


#
#   Function: RemoveLineExtender
#
#   If the passed line has a line extender, returns it without the extender or the line break that follows.  If it doesn't, or there are
#   no line extenders defined, returns the passed line unchanged.
#
sub RemoveLineExtender #(line)
    {
    my ($self, $line) = @_;

    if (defined $self->LineExtender())
        {
        my $lineExtenderIndex = rindex($line, $self->LineExtender());

        if ($lineExtenderIndex != -1 &&
            substr($line, $lineExtenderIndex + length($self->LineExtender())) =~ /^[ \t]*\n$/)
            {
            $line = substr($line, 0, $lineExtenderIndex) . ' ';
            };
        };

    return $line;
    };


1;