############################################################################### # # Package: NaturalDocs::Parser # ############################################################################### # # A package that coordinates source file parsing between the -derived objects and its own # sub-packages such as . Also handles sending symbols to and # other generic topic processing. # # Usage and Dependencies: # # - Prior to use, , , , , # and must be initialized. and # do not have to be fully resolved. # # - Aside from that, the package is ready to use right away. It does not have its own initialization function. # ############################################################################### # This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure # Natural Docs is licensed under the GPL use NaturalDocs::Parser::ParsedTopic; use NaturalDocs::Parser::Native; use NaturalDocs::Parser::JavaDoc; use strict; use integer; package NaturalDocs::Parser; ############################################################################### # Group: Variables # # var: sourceFile # # The source currently being parsed. # my $sourceFile; # # var: language # # The language object for the file, derived from . # my $language; # # Array: parsedFile # # An array of objects. # my @parsedFile; # # bool: parsingForInformation # Whether was called. If false, then was called. # my $parsingForInformation; ############################################################################### # Group: Functions # # Function: ParseForInformation # # Parses the input file for information. Will update the information about the file in and # . # # Parameters: # # file - The to parse. # sub ParseForInformation #(file) { my ($self, $file) = @_; $sourceFile = $file; $parsingForInformation = 1; # Watch this parse so we detect any changes. NaturalDocs::SymbolTable->WatchFileForChanges($sourceFile); NaturalDocs::ClassHierarchy->WatchFileForChanges($sourceFile); NaturalDocs::SourceDB->WatchFileForChanges($sourceFile); my $defaultMenuTitle = $self->Parse(); foreach my $topic (@parsedFile) { # Add a symbol for the topic. my $type = $topic->Type(); if ($type eq ::TOPIC_ENUMERATION()) { $type = ::TOPIC_TYPE(); }; NaturalDocs::SymbolTable->AddSymbol($topic->Symbol(), $sourceFile, $type, $topic->Prototype(), $topic->Summary()); # You can't put the function call directly in a while with a regex. It has to sit in a variable to work. my $body = $topic->Body(); # If it's a list or enum topic, add a symbol for each description list entry. if ($topic->IsList() || $topic->Type() eq ::TOPIC_ENUMERATION()) { # We'll hijack the enum constants to apply to non-enum behavior too. my $behavior; if ($topic->Type() eq ::TOPIC_ENUMERATION()) { $type = ::TOPIC_CONSTANT(); $behavior = $language->EnumValues(); } elsif (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() == ::SCOPE_ALWAYS_GLOBAL()) { $behavior = ::ENUM_GLOBAL(); } else { $behavior = ::ENUM_UNDER_PARENT(); }; while ($body =~ /([^<]+)<\/ds>
(.*?)<\/dd>/g) { my ($listTextSymbol, $listSummary) = ($1, $2); $listTextSymbol = NaturalDocs::NDMarkup->RestoreAmpChars($listTextSymbol); my $listSymbol = NaturalDocs::SymbolString->FromText($listTextSymbol); if ($behavior == ::ENUM_UNDER_PARENT()) { $listSymbol = NaturalDocs::SymbolString->Join($topic->Package(), $listSymbol); } elsif ($behavior == ::ENUM_UNDER_TYPE()) { $listSymbol = NaturalDocs::SymbolString->Join($topic->Symbol(), $listSymbol); }; NaturalDocs::SymbolTable->AddSymbol($listSymbol, $sourceFile, $type, undef, $self->GetSummaryFromDescriptionList($listSummary)); }; }; # Add references in the topic. while ($body =~ //g) { my $linkText = NaturalDocs::NDMarkup->RestoreAmpChars($1); my $linkSymbol = NaturalDocs::SymbolString->FromText($linkText); NaturalDocs::SymbolTable->AddReference(::REFERENCE_TEXT(), $linkSymbol, $topic->Package(), $topic->Using(), $sourceFile); }; # Add images in the topic. while ($body =~ //g) { my $target = NaturalDocs::NDMarkup->RestoreAmpChars($1); NaturalDocs::ImageReferenceTable->AddReference($sourceFile, $target); }; }; # Handle any changes to the file. NaturalDocs::ClassHierarchy->AnalyzeChanges(); NaturalDocs::SymbolTable->AnalyzeChanges(); NaturalDocs::SourceDB->AnalyzeWatchedFileChanges(); # Update project on the file's characteristics. my $hasContent = (scalar @parsedFile > 0); NaturalDocs::Project->SetHasContent($sourceFile, $hasContent); if ($hasContent) { NaturalDocs::Project->SetDefaultMenuTitle($sourceFile, $defaultMenuTitle); }; # We don't need to keep this around. @parsedFile = ( ); }; # # Function: ParseForBuild # # Parses the input file for building, returning it as a arrayref. # # Note that all new and changed files should be parsed for symbols via before calling this function on # *any* file. The reason is that needs to know about all the symbol definitions and references to # resolve them properly. # # Parameters: # # file - The to parse for building. # # Returns: # # An arrayref of the source file as objects. # sub ParseForBuild #(file) { my ($self, $file) = @_; $sourceFile = $file; $parsingForInformation = undef; $self->Parse(); return \@parsedFile; }; ############################################################################### # Group: Interface Functions # # Function: OnComment # # The function called by -derived objects when their parsers encounter a comment # suitable for documentation. # # Parameters: # # commentLines - An arrayref of the comment's lines. The language's comment symbols should be converted to spaces, # and there should be no line break characters at the end of each line. *The original memory will be # changed.* # lineNumber - The line number of the first of the comment lines. # isJavaDoc - Whether the comment is in JavaDoc format. # # Returns: # # The number of topics created by this comment, or zero if none. # sub OnComment #(string[] commentLines, int lineNumber, bool isJavaDoc) { my ($self, $commentLines, $lineNumber, $isJavaDoc) = @_; $self->CleanComment($commentLines); # We check if it's definitely Natural Docs content first. This overrides all else, since it's possible that a comment could start # with a topic line yet have something that looks like a JavaDoc tag. Natural Docs wins in this case. if (NaturalDocs::Parser::Native->IsMine($commentLines, $isJavaDoc)) { return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); } elsif (NaturalDocs::Parser::JavaDoc->IsMine($commentLines, $isJavaDoc)) { return NaturalDocs::Parser::JavaDoc->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); } # If the content is ambiguous and it's a JavaDoc-styled comment, treat it as Natural Docs content. elsif ($isJavaDoc) { return NaturalDocs::Parser::Native->ParseComment($commentLines, $isJavaDoc, $lineNumber, \@parsedFile); } }; # # Function: OnClass # # A function called by -derived objects when their parsers encounter a class declaration. # # Parameters: # # class - The of the class encountered. # sub OnClass #(class) { my ($self, $class) = @_; if ($parsingForInformation) { NaturalDocs::ClassHierarchy->AddClass($sourceFile, $class); }; }; # # Function: OnClassParent # # A function called by -derived objects when their parsers encounter a declaration of # inheritance. # # Parameters: # # class - The of the class we're in. # parent - The of the class it inherits. # scope - The package that the reference appeared in. # using - An arrayref of package that the reference has access to via "using" statements. # resolvingFlags - Any to be used when resolving the reference. is added # automatically since that would never apply to source code. # sub OnClassParent #(class, parent, scope, using, resolvingFlags) { my ($self, $class, $parent, $scope, $using, $resolvingFlags) = @_; if ($parsingForInformation) { NaturalDocs::ClassHierarchy->AddParentReference($sourceFile, $class, $parent, $scope, $using, $resolvingFlags | ::RESOLVE_NOPLURAL()); }; }; ############################################################################### # Group: Support Functions # Function: Parse # # Opens the source file and parses process. Most of the actual parsing is done in ParseFile()> # and , though. # # *Do not call externally.* Rather, call or . # # Returns: # # The default menu title of the file. Will be the if nothing better is found. # sub Parse { my ($self) = @_; NaturalDocs::Error->OnStartParsing($sourceFile); $language = NaturalDocs::Languages->LanguageOf($sourceFile); NaturalDocs::Parser::Native->Start(); @parsedFile = ( ); my ($autoTopics, $scopeRecord) = $language->ParseFile($sourceFile, \@parsedFile); $self->AddToClassHierarchy(); $self->BreakLists(); if (defined $autoTopics) { if (defined $scopeRecord) { $self->RepairPackages($autoTopics, $scopeRecord); }; $self->MergeAutoTopics($language, $autoTopics); }; $self->RemoveRemainingHeaderlessTopics(); # We don't need to do this if there aren't any auto-topics because the only package changes would be implied by the comments. if (defined $autoTopics) { $self->AddPackageDelineators(); }; if (!NaturalDocs::Settings->NoAutoGroup()) { $self->MakeAutoGroups($autoTopics); }; # Set the menu title. my $defaultMenuTitle = $sourceFile; if (scalar @parsedFile) { my $addFileTitle; if (NaturalDocs::Settings->OnlyFileTitles()) { # We still want to use the title from the topics if the first one is a file. if ($parsedFile[0]->Type() eq ::TOPIC_FILE()) { $addFileTitle = 0; } else { $addFileTitle = 1; }; } elsif (scalar @parsedFile == 1 || NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst()) { $addFileTitle = 0; } else { $addFileTitle = 1; }; if (!$addFileTitle) { $defaultMenuTitle = $parsedFile[0]->Title(); } else { # If the title ended up being the file name, add a leading section for it. unshift @parsedFile, NaturalDocs::Parser::ParsedTopic->New(::TOPIC_FILE(), (NaturalDocs::File->SplitPath($sourceFile))[2], undef, undef, undef, undef, undef, 1, undef); }; }; NaturalDocs::Error->OnEndParsing($sourceFile); return $defaultMenuTitle; }; # # Function: CleanComment # # Removes any extraneous formatting and whitespace from the comment. Eliminates comment boxes, horizontal lines, trailing # whitespace from lines, and expands all tab characters. It keeps leading whitespace, though, since it may be needed for # example code, and blank lines, since the original line numbers are needed. # # Parameters: # # commentLines - An arrayref of the comment lines to clean. *The original memory will be changed.* Lines should have the # language's comment symbols replaced by spaces and not have a trailing line break. # sub CleanComment #(commentLines) { my ($self, $commentLines) = @_; use constant DONT_KNOW => 0; use constant IS_UNIFORM => 1; use constant IS_UNIFORM_IF_AT_END => 2; use constant IS_NOT_UNIFORM => 3; my $leftSide = DONT_KNOW; my $rightSide = DONT_KNOW; my $leftSideChar; my $rightSideChar; my $index = 0; my $tabLength = NaturalDocs::Settings->TabLength(); while ($index < scalar @$commentLines) { # Strip trailing whitespace from the original. $commentLines->[$index] =~ s/[ \t]+$//; # Expand tabs in the original. This method is almost six times faster than Text::Tabs' method. my $tabIndex = index($commentLines->[$index], "\t"); while ($tabIndex != -1) { substr( $commentLines->[$index], $tabIndex, 1, ' ' x ($tabLength - ($tabIndex % $tabLength)) ); $tabIndex = index($commentLines->[$index], "\t", $tabIndex); }; # Make a working copy and strip leading whitespace as well. This has to be done after tabs are expanded because # stripping indentation could change how far tabs are expanded. my $line = $commentLines->[$index]; $line =~ s/^ +//; # If the line is blank... if (!length $line) { # If we have a potential vertical line, this only acceptable if it's at the end of the comment. if ($leftSide == IS_UNIFORM) { $leftSide = IS_UNIFORM_IF_AT_END; }; if ($rightSide == IS_UNIFORM) { $rightSide = IS_UNIFORM_IF_AT_END; }; } # If there's at least four symbols in a row, it's a horizontal line. The second regex supports differing edge characters. It # doesn't matter if any of this matches the left and right side symbols. The length < 256 is a sanity check, because that # regexp has caused the perl regexp engine to choke on an insane line someone sent me from an automatically generated # file. It had over 10k characters on the first line, and most of them were 0x00. elsif ($line =~ /^([^a-zA-Z0-9 ])\1{3,}$/ || (length $line < 256 && $line =~ /^([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/) ) { # Ignore it. This has no effect on the vertical line detection. We want to keep it in the output though in case it was # in a code section. } # If the line is not blank or a horizontal line... else { # More content means any previous blank lines are no longer tolerated in vertical line detection. They are only # acceptable at the end of the comment. if ($leftSide == IS_UNIFORM_IF_AT_END) { $leftSide = IS_NOT_UNIFORM; }; if ($rightSide == IS_UNIFORM_IF_AT_END) { $rightSide = IS_NOT_UNIFORM; }; # Detect vertical lines. Lines are only lines if they are followed by whitespace or a connected horizontal line. # Otherwise we may accidentally detect lines from short comments that just happen to have every first or last # character the same. if ($leftSide != IS_NOT_UNIFORM) { if ($line =~ /^([^a-zA-Z0-9])\1*(?: |$)/) { if ($leftSide == DONT_KNOW) { $leftSide = IS_UNIFORM; $leftSideChar = $1; } else # ($leftSide == IS_UNIFORM) Other choices already ruled out. { if ($leftSideChar ne $1) { $leftSide = IS_NOT_UNIFORM; }; }; } # We'll tolerate the lack of symbols on the left on the first line, because it may be a # /* Function: Whatever # * Description. # */ # comment which would have the leading /* blanked out. elsif ($index != 0) { $leftSide = IS_NOT_UNIFORM; }; }; if ($rightSide != IS_NOT_UNIFORM) { if ($line =~ / ([^a-zA-Z0-9])\1*$/) { if ($rightSide == DONT_KNOW) { $rightSide = IS_UNIFORM; $rightSideChar = $1; } else # ($rightSide == IS_UNIFORM) Other choices already ruled out. { if ($rightSideChar ne $1) { $rightSide = IS_NOT_UNIFORM; }; }; } else { $rightSide = IS_NOT_UNIFORM; }; }; # We'll remove vertical lines later if they're uniform throughout the entire comment. }; $index++; }; if ($leftSide == IS_UNIFORM_IF_AT_END) { $leftSide = IS_UNIFORM; }; if ($rightSide == IS_UNIFORM_IF_AT_END) { $rightSide = IS_UNIFORM; }; $index = 0; my $inCodeSection = 0; while ($index < scalar @$commentLines) { # Clear horizontal lines only if we're not in a code section. if ($commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1{3,}$/ || ( length $commentLines->[$index] < 256 && $commentLines->[$index] =~ /^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$/ ) ) { if (!$inCodeSection) { $commentLines->[$index] = ''; } } else { # Clear vertical lines. if ($leftSide == IS_UNIFORM) { # This works because every line should either start this way, be blank, or be the first line that doesn't start with a # symbol. $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*//; }; if ($rightSide == IS_UNIFORM) { $commentLines->[$index] =~ s/ *([^a-zA-Z0-9 ])\1*$//; }; # Clear horizontal lines again if there were vertical lines. This catches lines that were separated from the verticals by # whitespace. if (($leftSide == IS_UNIFORM || $rightSide == IS_UNIFORM) && !$inCodeSection) { $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1{3,}$//; $commentLines->[$index] =~ s/^ *([^a-zA-Z0-9 ])\1*([^a-zA-Z0-9 ])\2{3,}([^a-zA-Z0-9 ])\3*$//; }; # Check for the start and end of code sections. Note that this doesn't affect vertical line removal. if (!$inCodeSection && $commentLines->[$index] =~ /^ *\( *(?:(?:start|begin)? +)?(?:table|code|example|diagram) *\)$/i ) { $inCodeSection = 1; } elsif ($inCodeSection && $commentLines->[$index] =~ /^ *\( *(?:end|finish|done)(?: +(?:table|code|example|diagram))? *\)$/i) { $inCodeSection = 0; } } $index++; }; }; ############################################################################### # Group: Processing Functions # # Function: RepairPackages # # Recalculates the packages for all comment topics using the auto-topics and the scope record. Call this *before* calling # . # # Parameters: # # autoTopics - A reference to the list of automatically generated . # scopeRecord - A reference to an array of . # sub RepairPackages #(autoTopics, scopeRecord) { my ($self, $autoTopics, $scopeRecord) = @_; my $topicIndex = 0; my $autoTopicIndex = 0; my $scopeIndex = 0; my $topic = $parsedFile[0]; my $autoTopic = $autoTopics->[0]; my $scopeChange = $scopeRecord->[0]; my $currentPackage; my $inFakePackage; while (defined $topic) { # First update the scope via the record if its defined and has the lowest line number. if (defined $scopeChange && $scopeChange->LineNumber() <= $topic->LineNumber() && (!defined $autoTopic || $scopeChange->LineNumber() <= $autoTopic->LineNumber()) ) { $currentPackage = $scopeChange->Scope(); $scopeIndex++; $scopeChange = $scopeRecord->[$scopeIndex]; # Will be undef when past end. $inFakePackage = undef; } # Next try to end a fake scope with an auto topic if its defined and has the lowest line number. elsif (defined $autoTopic && $autoTopic->LineNumber() <= $topic->LineNumber()) { if ($inFakePackage) { $currentPackage = $autoTopic->Package(); $inFakePackage = undef; }; $autoTopicIndex++; $autoTopic = $autoTopics->[$autoTopicIndex]; # Will be undef when past end. } # Finally try to handle the topic, since it has the lowest line number. Check for Type() because headerless topics won't have # one. else { my $scope; if ($topic->Type()) { $scope = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope(); } else { $scope = ::SCOPE_NORMAL(); }; if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END()) { # They should already have the correct class and scope. $currentPackage = $topic->Package(); $inFakePackage = 1; } else { # Fix the package of everything else. # Note that the first function or variable topic to appear in a fake package will assume that package even if it turns out # to be incorrect in the actual code, since the topic will come before the auto-topic. This will be corrected in # MergeAutoTopics(). $topic->SetPackage($currentPackage); }; $topicIndex++; $topic = $parsedFile[$topicIndex]; # Will be undef when past end. }; }; }; # # Function: MergeAutoTopics # # Merges the automatically generated topics into the file. If an auto-topic matches an existing topic, it will have it's prototype # and package transferred. If it doesn't, the auto-topic will be inserted into the list unless # DocumentedOnly()> is set. If an existing topic doesn't have a title, it's assumed to be a headerless # comment and will be merged with the next auto-topic or discarded. # # Parameters: # # language - The -derived class for the file. # autoTopics - A reference to the list of automatically generated topics. # sub MergeAutoTopics #(language, autoTopics) { my ($self, $language, $autoTopics) = @_; my $topicIndex = 0; my $autoTopicIndex = 0; # Keys are topic types, values are existence hashrefs of titles. my %topicsInLists; while ($topicIndex < scalar @parsedFile && $autoTopicIndex < scalar @$autoTopics) { my $topic = $parsedFile[$topicIndex]; my $autoTopic = $autoTopics->[$autoTopicIndex]; my $cleanTitle = $topic->Title(); $cleanTitle =~ s/[\t ]*\([^\(]*$//; # Add the auto-topic if it's higher in the file than the current topic. if ($autoTopic->LineNumber() < $topic->LineNumber()) { if (exists $topicsInLists{$autoTopic->Type()} && exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}) { # Remove it from the list so a second one with the same name will be added. delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}; } elsif (!NaturalDocs::Settings->DocumentedOnly()) { splice(@parsedFile, $topicIndex, 0, $autoTopic); $topicIndex++; }; $autoTopicIndex++; } # Remove a headerless topic if there's another topic between it and the next auto-topic. elsif (!$topic->Title() && $topicIndex + 1 < scalar @parsedFile && $parsedFile[$topicIndex+1]->LineNumber() < $autoTopic->LineNumber()) { splice(@parsedFile, $topicIndex, 1); } # Transfer information if we have a match or a headerless topic. elsif ( !$topic->Title() || ($topic->Type() == $autoTopic->Type() && index($autoTopic->Title(), $cleanTitle) != -1) ) { $topic->SetType($autoTopic->Type()); $topic->SetPrototype($autoTopic->Prototype()); $topic->SetUsing($autoTopic->Using()); if (!$topic->Title()) { $topic->SetTitle($autoTopic->Title()); }; if (NaturalDocs::Topics->TypeInfo($topic->Type())->Scope() != ::SCOPE_START()) { $topic->SetPackage($autoTopic->Package()); } elsif ($autoTopic->Package() ne $topic->Package()) { my @autoPackageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($autoTopic->Package()); my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($topic->Package()); while (scalar @autoPackageIdentifiers && $autoPackageIdentifiers[-1] eq $packageIdentifiers[-1]) { pop @autoPackageIdentifiers; pop @packageIdentifiers; }; if (scalar @autoPackageIdentifiers) { $topic->SetPackage( NaturalDocs::SymbolString->Join(@autoPackageIdentifiers) ); }; }; $topicIndex++; $autoTopicIndex++; } # Extract topics in lists. elsif ($topic->IsList()) { if (!exists $topicsInLists{$topic->Type()}) { $topicsInLists{$topic->Type()} = { }; }; my $body = $topic->Body(); while ($body =~ /([^<]+)<\/ds>/g) { $topicsInLists{$topic->Type()}->{NaturalDocs::NDMarkup->RestoreAmpChars($1)} = 1; }; $topicIndex++; } # Otherwise there's no match. Skip the topic. The auto-topic will be added later. else { $topicIndex++; } }; # Add any auto-topics remaining. if (!NaturalDocs::Settings->DocumentedOnly()) { while ($autoTopicIndex < scalar @$autoTopics) { my $autoTopic = $autoTopics->[$autoTopicIndex]; if (exists $topicsInLists{$autoTopic->Type()} && exists $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}) { # Remove it from the list so a second one with the same name will be added. delete $topicsInLists{$autoTopic->Type()}->{$autoTopic->Title()}; } else { push(@parsedFile, $autoTopic); }; $autoTopicIndex++; }; }; }; # # Function: RemoveRemainingHeaderlessTopics # # After is done, this function removes any remaining headerless topics from the file. If they don't merge # into anything, they're not valid topics. # sub RemoveRemainingHeaderlessTopics { my ($self) = @_; my $index = 0; while ($index < scalar @parsedFile) { if ($parsedFile[$index]->Title()) { $index++; } else { splice(@parsedFile, $index, 1); }; }; }; # # Function: MakeAutoGroups # # Creates group topics for files that do not have them. # sub MakeAutoGroups { my ($self) = @_; # No groups only one topic. if (scalar @parsedFile < 2) { return; }; my $index = 0; my $startStretch = 0; # Skip the first entry if its the page title. if (NaturalDocs::Topics->TypeInfo( $parsedFile[0]->Type() )->PageTitleIfFirst()) { $index = 1; $startStretch = 1; }; # Make auto-groups for each stretch between scope-altering topics. while ($index < scalar @parsedFile) { my $scope = NaturalDocs::Topics->TypeInfo($parsedFile[$index]->Type())->Scope(); if ($scope == ::SCOPE_START() || $scope == ::SCOPE_END()) { if ($index > $startStretch) { $index += $self->MakeAutoGroupsFor($startStretch, $index); }; $startStretch = $index + 1; }; $index++; }; if ($index > $startStretch) { $self->MakeAutoGroupsFor($startStretch, $index); }; }; # # Function: MakeAutoGroupsFor # # Creates group topics for sections of files that do not have them. A support function for . # # Parameters: # # startIndex - The index to start at. # endIndex - The index to end at. Not inclusive. # # Returns: # # The number of group topics added. # sub MakeAutoGroupsFor #(startIndex, endIndex) { my ($self, $startIndex, $endIndex) = @_; # No groups if any are defined already. for (my $i = $startIndex; $i < $endIndex; $i++) { if ($parsedFile[$i]->Type() eq ::TOPIC_GROUP()) { return 0; }; }; use constant COUNT => 0; use constant TYPE => 1; use constant SECOND_TYPE => 2; use constant SIZE => 3; # This is an array of ( count, type, secondType ) triples. Count and Type will always be filled in; count is the number of # consecutive topics of type. On the second pass, if small groups are combined secondType will be filled in. There will not be # more than two types per group. my @groups; my $groupIndex = 0; # First pass: Determine all the groups. my $i = $startIndex; my $currentType; while ($i < $endIndex) { if (!defined $currentType || ($parsedFile[$i]->Type() ne $currentType && $parsedFile[$i]->Type() ne ::TOPIC_GENERIC()) ) { if (defined $currentType) { $groupIndex += SIZE; }; $currentType = $parsedFile[$i]->Type(); $groups[$groupIndex + COUNT] = 1; $groups[$groupIndex + TYPE] = $currentType; } else { $groups[$groupIndex + COUNT]++; }; $i++; }; # Second pass: Combine groups based on "noise". Noise means types go from A to B to A at least once, and there are at least # two groups in a row with three or less, and at least one of those groups is two or less. So 3, 3, 3 doesn't count as noise, but # 3, 2, 3 does. $groupIndex = 0; # While there are at least three groups left... while ($groupIndex < scalar @groups - (2 * SIZE)) { # If the group two places in front of this one has the same type... if ($groups[$groupIndex + (2 * SIZE) + TYPE] eq $groups[$groupIndex + TYPE]) { # It means we went from A to B to A, which partially qualifies as noise. my $firstType = $groups[$groupIndex + TYPE]; my $secondType = $groups[$groupIndex + SIZE + TYPE]; if (NaturalDocs::Topics->TypeInfo($firstType)->CanGroupWith($secondType) || NaturalDocs::Topics->TypeInfo($secondType)->CanGroupWith($firstType)) { my $hasNoise; my $hasThrees; my $hasTwosOrOnes; my $endIndex = $groupIndex; while ($endIndex < scalar @groups && ($groups[$endIndex + TYPE] eq $firstType || $groups[$endIndex + TYPE] eq $secondType)) { if ($groups[$endIndex + COUNT] > 3) { # They must be consecutive to count. $hasThrees = 0; $hasTwosOrOnes = 0; } elsif ($groups[$endIndex + COUNT] == 3) { $hasThrees = 1; if ($hasTwosOrOnes) { $hasNoise = 1; }; } else # < 3 { if ($hasThrees || $hasTwosOrOnes) { $hasNoise = 1; }; $hasTwosOrOnes = 1; }; $endIndex += SIZE; }; if (!$hasNoise) { $groupIndex = $endIndex - SIZE; } else # hasNoise { $groups[$groupIndex + SECOND_TYPE] = $secondType; for (my $noiseIndex = $groupIndex + SIZE; $noiseIndex < $endIndex; $noiseIndex += SIZE) { $groups[$groupIndex + COUNT] += $groups[$noiseIndex + COUNT]; }; splice(@groups, $groupIndex + SIZE, $endIndex - $groupIndex - SIZE); $groupIndex += SIZE; }; } else # They can't group together { $groupIndex += SIZE; }; } else { $groupIndex += SIZE; }; }; # Finally, create group topics for the parsed file. $groupIndex = 0; $i = $startIndex; while ($groupIndex < scalar @groups) { if ($groups[$groupIndex + TYPE] ne ::TOPIC_GENERIC()) { my $topic = $parsedFile[$i]; my $title = NaturalDocs::Topics->NameOfType($groups[$groupIndex + TYPE], 1); if (defined $groups[$groupIndex + SECOND_TYPE]) { $title .= ' and ' . NaturalDocs::Topics->NameOfType($groups[$groupIndex + SECOND_TYPE], 1); }; splice(@parsedFile, $i, 0, NaturalDocs::Parser::ParsedTopic->New(::TOPIC_GROUP(), $title, $topic->Package(), $topic->Using(), undef, undef, undef, $topic->LineNumber()) ); $i++; }; $i += $groups[$groupIndex + COUNT]; $groupIndex += SIZE; }; return (scalar @groups / SIZE); }; # # Function: AddToClassHierarchy # # Adds any class topics to the class hierarchy, since they may not have been called with if they didn't match up to # an auto-topic. # sub AddToClassHierarchy { my ($self) = @_; foreach my $topic (@parsedFile) { if ($topic->Type() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->ClassHierarchy()) { if ($topic->IsList()) { my $body = $topic->Body(); while ($body =~ /([^<]+)<\/ds>/g) { $self->OnClass( NaturalDocs::SymbolString->FromText( NaturalDocs::NDMarkup->RestoreAmpChars($1) ) ); }; } else { $self->OnClass($topic->Package()); }; }; }; }; # # Function: AddPackageDelineators # # Adds section and class topics to make sure the package is correctly represented in the documentation. Should be called last in # this process. # sub AddPackageDelineators { my ($self) = @_; my $index = 0; my $currentPackage; # Values are the arrayref [ title, type ]; my %usedPackages; while ($index < scalar @parsedFile) { my $topic = $parsedFile[$index]; if ($topic->Package() ne $currentPackage) { $currentPackage = $topic->Package(); my $scopeType = NaturalDocs::Topics->TypeInfo($topic->Type())->Scope(); if ($scopeType == ::SCOPE_START()) { $usedPackages{$currentPackage} = [ $topic->Title(), $topic->Type() ]; } elsif ($scopeType == ::SCOPE_END()) { my $newTopic; if (!defined $currentPackage) { $newTopic = NaturalDocs::Parser::ParsedTopic->New(::TOPIC_SECTION(), 'Global', undef, undef, undef, undef, undef, $topic->LineNumber(), undef); } else { my ($title, $body, $summary, $type); my @packageIdentifiers = NaturalDocs::SymbolString->IdentifiersOf($currentPackage); if (exists $usedPackages{$currentPackage}) { $title = $usedPackages{$currentPackage}->[0]; $type = $usedPackages{$currentPackage}->[1]; $body = '

(continued)

'; $summary = '(continued)'; } else { $title = join($language->PackageSeparator(), @packageIdentifiers); $type = ::TOPIC_CLASS(); # Body and summary stay undef. $usedPackages{$currentPackage} = $title; }; my @titleIdentifiers = NaturalDocs::SymbolString->IdentifiersOf( NaturalDocs::SymbolString->FromText($title) ); for (my $i = 0; $i < scalar @titleIdentifiers; $i++) { pop @packageIdentifiers; }; $newTopic = NaturalDocs::Parser::ParsedTopic->New($type, $title, NaturalDocs::SymbolString->Join(@packageIdentifiers), undef, undef, $summary, $body, $topic->LineNumber(), undef); } splice(@parsedFile, $index, 0, $newTopic); $index++; } }; $index++; }; }; # # Function: BreakLists # # Breaks list topics into individual topics. # sub BreakLists { my $self = shift; my $index = 0; while ($index < scalar @parsedFile) { my $topic = $parsedFile[$index]; if ($topic->IsList() && NaturalDocs::Topics->TypeInfo( $topic->Type() )->BreakLists()) { my $body = $topic->Body(); my @newTopics; my $newBody; my $bodyIndex = 0; for (;;) { my $startList = index($body, '
', $bodyIndex); if ($startList == -1) { last; }; $newBody .= substr($body, $bodyIndex, $startList - $bodyIndex); my $endList = index($body, '
', $startList); my $listBody = substr($body, $startList, $endList - $startList); while ($listBody =~ /([^<]+)<\/ds>
(.*?)<\/dd>/g) { my ($symbol, $description) = ($1, $2); push @newTopics, NaturalDocs::Parser::ParsedTopic->New( $topic->Type(), $symbol, $topic->Package(), $topic->Using(), undef, $self->GetSummaryFromDescriptionList($description), '

' . $description . '

', $topic->LineNumber(), undef ); }; $bodyIndex = $endList + 5; }; $newBody .= substr($body, $bodyIndex); # Remove trailing headings. $newBody =~ s/(?:[^<]+<\/h>)+$//; # Remove empty headings. $newBody =~ s/(?:[^<]+<\/h>)+([^<]+<\/h>)/$1/g; if ($newBody) { unshift @newTopics, NaturalDocs::Parser::ParsedTopic->New( ::TOPIC_GROUP(), $topic->Title(), $topic->Package(), $topic->Using(), undef, $self->GetSummaryFromBody($newBody), $newBody, $topic->LineNumber(), undef ); }; splice(@parsedFile, $index, 1, @newTopics); $index += scalar @newTopics; } else # not a list { $index++; }; }; }; # # Function: GetSummaryFromBody # # Returns the summary text from the topic body. # # Parameters: # # body - The complete topic body, in . # # Returns: # # The topic summary, or undef if none. # sub GetSummaryFromBody #(body) { my ($self, $body) = @_; my $summary; # Extract the first sentence from the leading paragraph, if any. We'll tolerate a single header beforehand, but nothing else. if ($body =~ /^(?:[^<]*<\/h>)?

(.*?)(<\/p>|[\.\!\?](?:[\)\}\'\ ]|"|>))/x) { $summary = $1; if ($2 ne '

') { $summary .= $2; }; }; return $summary; }; # # Function: GetSummaryFromDescriptionList # # Returns the summary text from a description list entry. # # Parameters: # # description - The description in . Should be the content between the
tags only. # # Returns: # # The description summary, or undef if none. # sub GetSummaryFromDescriptionList #(description) { my ($self, $description) = @_; my $summary; if ($description =~ /^(.*?)($|[\.\!\?](?:[\)\}\'\ ]|"|>))/) { $summary = $1 . $2; }; return $summary; }; 1;