ddnet/docs/tool/Modules/NaturalDocs/ConfigFile.pm
2008-08-02 08:21:29 +00:00

498 lines
13 KiB
Perl

###############################################################################
#
# Package: NaturalDocs::ConfigFile
#
###############################################################################
#
# A package to manage Natural Docs' configuration files.
#
# Usage:
#
# - Only one configuration file can be managed with this package at a time. You must close the file before opening another
# one.
#
###############################################################################
# This file is part of Natural Docs, which is Copyright (C) 2003-2008 Greg Valure
# Natural Docs is licensed under the GPL
use strict;
use integer;
package NaturalDocs::ConfigFile;
#
# Topic: Format
#
# All configuration files are text files.
#
# > # [comment]
#
# Comments start with the # character.
#
# > Format: [version]
#
# All configuration files *must* have a format line as its first line containing content. Whitespace and comments are permitted
# ahead of it.
#
# > [keyword]: [value]
#
# Keywords can only contain <CFChars>. Keywords are not case sensitive. Values can be anything and run until the end of
# the line or a comment.
#
# > [value]
#
# Lines that don't start with a valid keyword format are considered to be all value.
#
# > [line] { [line] } [line]
#
# Files supporting brace groups (specified in <Open()>) may also have braces that can appear anywhere. It allows more than
# one thing to appear per line, which isn't supported otherwise. Consequently, values may not have braces.
#
#
# Type: CFChars
#
# The characters that can appear in configuration file keywords and user-defined element names: letters, numbers, spaces,
# dashes, slashes, apostrophes, and periods.
#
# Although the list above is exhaustive, it should be noted that you especially can *not* use colons (messes up keyword: value
# sequences) commas (messes up item, item, item list sequences) and hashes (messes up comment detection.)
#
# You can search the source code for [CFChars] to find all the instances where this definition is used.
#
###############################################################################
# Group: Variables
#
# handle: CONFIG_FILEHANDLE
#
# The file handle used for the configuration file.
#
#
# string: file
#
# The <FileName> for the current configuration file being parsed.
#
my $file;
#
# array: errors
#
# An array of errors added by <AddError()>. Every odd entry is the line number, and every even entry following is the
# error message.
#
my @errors;
#
# var: lineNumber
#
# The current line number for the configuration file.
#
my $lineNumber;
#
# bool: hasBraceGroups
#
# Whether the file has brace groups or not.
#
my $hasBraceGroups;
#
# array: virtualLines
#
# An array of virtual lines if a line from the file contained more than one.
#
# Files with brace groups may have more than one virtual line per actual file line, such as "Group: A { Group: B". When that
# happens, any extra virtual lines are put into here so they can be returned on the next call.
#
my @virtualLines;
###############################################################################
# Group: Functions
#
# Function: Open
#
# Opens a configuration file for parsing and returns the format <VersionInt>.
#
# Parameters:
#
# file - The <FileName> to parse.
# hasBraceGroups - Whether the file supports brace groups or not. If so, lines with braces will be split apart behind the
# scenes.
#
# Returns:
#
# The <VersionInt> of the file, or undef if the file doesn't exist.
#
sub Open #(file, hasBraceGroups)
{
my $self;
($self, $file, $hasBraceGroups) = @_;
@errors = ( );
# It will be incremented to one when the first line is read from the file.
$lineNumber = 0;
open(CONFIG_FILEHANDLE, '<' . $file) or return undef;
# Get the format line.
my ($keyword, $value, $comment) = $self->GetLine();
if ($keyword eq 'format')
{ return NaturalDocs::Version->FromString($value); }
else
{ die "The first content line in " . $file . " must be the Format: line.\n"; };
};
#
# Function: Close
#
# Closes the current configuration file.
#
sub Close
{
my $self = shift;
close(CONFIG_FILEHANDLE);
};
#
# Function: GetLine
#
# Returns the next line containing content, or an empty array if none.
#
# Returns:
#
# Returns the array ( keyword, value, comment ), or an empty array if none. All tabs will be converted to spaces, and all
# whitespace will be condensed into a single space.
#
# keyword - The keyword part of the line, if any. Is converted to lowercase and doesn't include the colon. If the file supports
# brace groups, opening and closing braces will be returned as keywords.
# value - The value part of the line, minus any whitespace. Keeps its original case.
# comment - The comment following the line, if any. This includes the # symbol and a leading space if there was
# any whitespace, since it may be significant. Otherwise undef. Used for lines where the # character needs to be
# accepted as part of the value.
#
sub GetLine
{
my $self = shift;
my ($line, $comment);
# Get the next line with content.
do
{
# Get the next line.
my $isFileLine;
if (scalar @virtualLines)
{
$line = shift @virtualLines;
$isFileLine = 0;
}
else
{
$line = <CONFIG_FILEHANDLE>;
$lineNumber++;
if (!defined $line)
{ return ( ); };
::XChomp(\$line);
# Condense spaces and tabs into a single space.
$line =~ tr/\t / /s;
$isFileLine = 1;
};
# Split off the comment.
if ($line =~ /^(.*?)( ?#.*)$/)
{ ($line, $comment) = ($1, $2); }
else
{ $comment = undef; };
# Split any brace groups.
if ($isFileLine && $hasBraceGroups && $line =~ /[\{\}]/)
{
($line, @virtualLines) = split(/([\{\}])/, $line);
$virtualLines[-1] .= $comment;
$comment = undef;
};
# Remove whitespace.
$line =~ s/^ //;
$line =~ s/ $//;
$comment =~ s/ $//;
# We want to keep the leading space on a comment.
}
while (!$line);
# Process the line.
if ($hasBraceGroups && ($line eq '{' || $line eq '}'))
{
return ($line, undef, undef);
};
if ($line =~ /^([a-z0-9\ \'\/\.\-]+?) ?: ?(.*)$/i) # [CFChars]
{
my ($keyword, $value) = ($1, $2);
return (lc($keyword), $value, $comment);
}
else
{
return (undef, $line, $comment);
};
};
#
# Function: LineNumber
#
# Returns the line number for the line last returned by <GetLine()>.
#
sub LineNumber
{ return $lineNumber; };
###############################################################################
# Group: Error Functions
#
# Function: AddError
#
# Stores an error for the current configuration file. Will be attached to the last line read by <GetLine()>.
#
# Parameters:
#
# message - The error message.
# lineNumber - The line number to use. If not specified, it will use the line number from the last call to <GetLine()>.
#
sub AddError #(message, lineNumber)
{
my ($self, $message, $messageLineNumber) = @_;
if (!defined $messageLineNumber)
{ $messageLineNumber = $lineNumber; };
push @errors, $messageLineNumber, $message;
};
#
# Function: ErrorCount
#
# Returns how many errors the configuration file has.
#
sub ErrorCount
{
return (scalar @errors) / 2;
};
#
# Function: PrintErrorsAndAnnotateFile
#
# Prints the errors to STDERR in the standard GNU format and annotates the configuration file with them. It does *not* end
# execution. <Close()> *must* be called before this function.
#
sub PrintErrorsAndAnnotateFile
{
my ($self) = @_;
if (scalar @errors)
{
open(CONFIG_FILEHANDLE, '<' . $file);
my @lines = <CONFIG_FILEHANDLE>;
close(CONFIG_FILEHANDLE);
# We need to keep track of both the real and the original line numbers. The original line numbers are for matching errors in
# the errors array, and don't include any comment lines added or deleted. Line number is the current line number including
# those comment lines for sending to the display.
my $lineNumber = 1;
my $originalLineNumber = 1;
open(CONFIG_FILEHANDLE, '>' . $file);
# We don't want to keep the old error header, if present.
if ($lines[0] =~ /^\# There (?:is an error|are \d+ errors) in this file\./)
{
shift @lines;
$originalLineNumber++;
# We want to drop the blank line after it as well.
if ($lines[0] eq "\n")
{
shift @lines;
$originalLineNumber++;
};
};
if ($self->ErrorCount() == 1)
{
print CONFIG_FILEHANDLE
"# There is an error in this file. Search for ERROR to find it.\n\n";
}
else
{
print CONFIG_FILEHANDLE
"# There are " . $self->ErrorCount() . " errors in this file. Search for ERROR to find them.\n\n";
};
$lineNumber += 2;
foreach my $line (@lines)
{
while (scalar @errors && $originalLineNumber == $errors[0])
{
my $errorLine = shift @errors;
my $errorMessage = shift @errors;
print CONFIG_FILEHANDLE "# ERROR: " . $errorMessage . "\n";
# Use the GNU error format, which should make it easier to handle errors when Natural Docs is part of a build process.
# See http://www.gnu.org/prep/standards_15.html
$errorMessage = lcfirst($errorMessage);
$errorMessage =~ s/\.$//;
print STDERR 'NaturalDocs:' . $file . ':' . $lineNumber . ': ' . $errorMessage . "\n";
$lineNumber++;
};
# We want to remove error lines from previous runs.
if (substr($line, 0, 9) ne '# ERROR: ')
{
print CONFIG_FILEHANDLE $line;
$lineNumber++;
};
$originalLineNumber++;
};
# Clean up any remaining errors.
while (scalar @errors)
{
my $errorLine = shift @errors;
my $errorMessage = shift @errors;
print CONFIG_FILEHANDLE "# ERROR: " . $errorMessage . "\n";
# Use the GNU error format, which should make it easier to handle errors when Natural Docs is part of a build process.
# See http://www.gnu.org/prep/standards_15.html
$errorMessage = lcfirst($errorMessage);
$errorMessage =~ s/\.$//;
print STDERR 'NaturalDocs:' . $file . ':' . $lineNumber . ': ' . $errorMessage . "\n";
};
close(CONFIG_FILEHANDLE);
};
};
###############################################################################
# Group: Misc Functions
#
# Function: HasOnlyCFChars
#
# Returns whether the passed string contains only <CFChars>.
#
sub HasOnlyCFChars #(string)
{
my ($self, $string) = @_;
return ($string =~ /^[a-z0-9\ \.\-\/\']*$/i); # [CFChars]
};
#
# Function: CFCharNames
#
# Returns a plain-english list of <CFChars> which can be embedded in a sentence. For example, "You can only use
# [CFCharsList()] in the name.
#
sub CFCharNames
{
# [CFChars]
return 'letters, numbers, spaces, periods, dashes, slashes, and apostrophes';
};
#
# Function: Obscure
#
# Obscures the passed text so that it is not user editable and returns it. The encoding method is not secure; it is just designed
# to be fast and to discourage user editing.
#
sub Obscure #(text)
{
my ($self, $text) = @_;
# ` is specifically chosen to encode to space because of its rarity. We don't want a trailing one to get cut off before decoding.
$text =~ tr{a-zA-Z0-9\ \\\/\.\:\_\-\`}
{pY9fGc\`R8lAoE\\uIdH6tN\/7sQjKx0B5mW\.vZ41PyFg\:CrLaO\_eUi2DhT\-nSqJkXb3MwVz\ };
return $text;
};
#
# Function: Unobscure
#
# Restores text encoded with <Obscure()> and returns it.
#
sub Unobscure #(text)
{
my ($self, $text) = @_;
$text =~ tr{pY9fGc\`R8lAoE\\uIdH6tN\/7sQjKx0B5mW\.vZ41PyFg\:CrLaO\_eUi2DhT\-nSqJkXb3MwVz\ }
{a-zA-Z0-9\ \\\/\.\:\_\-\`};
return $text;
};
1;