package AMP::Process; ###### Process class
### 	$Id: Process.pm,v 1.21 2002/10/22 15:48:07 steve Exp $
###

# === BUG: &content; doesn't expand properly -- shows as ARRAY...
#	   parser should make Node for entity refs in content text,
#	   then expansion would be trivial.  Actually may need <expand>
#	   (with <content /> = <expand><get name=content /></expand>)
# === BUG: linkifier needs to ignore non-links
# === BUG: need sitewide, dir-wide list of name=link pairs (links.cf)
# === BUG: need <get name=...>default</get>

###	This class implements file processing: template expansion, Wiki
###	parsing, and automatic cross-linking.  Process is basically an
###	stack frame for a parse tree interpretor; the parser is separate.  
###	Parse tree nodes know how to expand themselves, by calling Process
###	methods on their components.

###	A Process node is, by itself, a namespace for entities.  Processing
###	proceeds in the following conceptual stages:  
###
###	1. parsing (which constructs a parse tree)
###	2. active-tag expansion
###	3. entity expansion (in attribute values and text) 
###	4. crosslink recognition in text 
###	5. output formatting.

###	In theory, it's possible to pipeline these stages, only producing
###	fragments of a parse tree when they need to be saved in a variable
###	or grovelled-over repeatedly.  === Currently we don't.

use XML::Node;
use AMP::ParseXH;
use Site::Util;

# Note that we don't have to "use" the auxiliary parsers; instead we
# "require" them when we discover they're needed.  ParseXH is needed
# in any case to expand templates.

#use AMP::ParseWiki;

use Exporter();
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);

BEGIN {
    $VERSION=1.00;
    @ISA = qw(Exporter);
    @EXPORT = qw(
		 );
}

# Set of tags within which to suppress crosslinking.
%noxlink = ( 'a' => 1, 'title' => 1,
	    );

#############################################################################
###
### Constructor:
###

### Process->new($context, $urlpath, $filepath)
#
sub new {
    my ($class, $context, $path, $filepath) = (@_);

    my $self = { "_filePath" => $filepath,
		 "_context" => $context,
		 "_parent" => $context,
		 # set &content; to null for benefit of dir-index
		 # and others that use Process directly on template files 
		 "content" => "",
	     };

    $path =~ m|^(.*)/[^/]+/?$|;
    $self->{"_dirPath"} = $1;
    $self->{"_crosslink"} = 1;
    $self = bless $self, $class;

    return $self;
}

### $p->subProcess($parent)
#	make a subprocess, i.e. a stack frame.
sub subProcess {
    my ($parent, $node, $content) = (@_);
    my $class = ref $parent;
    $content = "" unless $content;

    my $self = {'_parent' => $parent,
		'_context' => $parent->getContext,
		'_node' => $node,
		'content' => $content,
	    };    
    $self = bless $self, $class;
    return $self;
}


#############################################################################
###
### access functions:
###

### $p->get(name) -> value
#	get a value.  Typically these are also useable as entities, but names
#	starting with non-alphanumeric characters will not be recognized as
#	entities and so can be used internally with impunity.
#
#	Climbs up the "_parent" link (subprocess to superprocess) if it finds
#	nothing at this level.  Returns undefined if no binding is found.
#
sub get {
    my ($self, $n) = (@_);
    my $v = $self->{$n};
    return $v if defined $v;

    # Look at current node's attributes, then at parent.  
    # There really ought to be a prioritized list. 

    my $p = $self->{'_node'}; # === should really use tag as a prefix
    if (ref $p) {
	$v = $p->attr($n);
	return $v if defined $v;
    }
    $p = $self->{'_parent'};
    return (ref $p)? $p->get($n) : $v;
}

sub getFilePath {
    my ($self) = (@_);
    return $self->get("_filePath");
}

sub getFullPath {
    my ($self) = (@_);
    return $ENV{DOCUMENT_ROOT} . $self->get("_filePath");
}

sub getContext {
    my ($self) = (@_);
    return $self->{"_context"};
}

sub getTagset {
    my ($self) = (@_);
    my $ts = $self->{"_tagset"};
    if (!ref $ts) {
	$ts = $self->getContext->getTagset;
	$self->{"_tagset"} = $ts;
	# print STDERR "loaded Tagset $ts\n";
	print STDERR "failed to get tagset from context\n" unless ref $ts;
    }
    return $ts;
}

sub allowCrosslinks {
    my ($self, $v) = (@_);
    $self->{"_crosslink"} = $v if defined $v;
    return $self->get("_crosslink");
}

#############################################################################
###
### Processing Files:
###

### $p->processFile($filepath) -> processed?
#	Process a single file.  Dispatch on extension.
#	Return 0 if the file cannot be processed.
#
#	If this is a prototype, $filepath will point to the prototype
#	and $p->getFullPath will point to the requested document.
#	 
sub processFile {
    my ($self, $filepath) = (@_);

    $filepath = $self->getFullPath unless -f $filepath;
    # === worry about relative $filepath ===

    if (-d $filepath) { return $self->processDir($filepath); }
    if ($filepath =~ /\.ht$/) { return $self->processHT($filepath); }
    if ($filepath =~ /\.wt$/) { return $self->processWT($filepath); }

    return $self->processText($filepath);
}

### $p->processDir($name)
# 	Process (list) a directory.
#	This is done by expanding the prototype file dir-index.ht, if it
#	exists.
#
sub processDir {
    my ($self, $filepath) = (@_);
    my $fullPath = $filepath; # === $self->getFullPath;
    my $path = $self->{'path'};

    print STDERR "*** processDir $fullPath\n";

    opendir(DIR, $filepath);
    my @files = readdir(DIR);
    closedir DIR;

    # === here's where we expand the prototype file. ===

    print "<table>\n";
    foreach my $f (sort @files) {
	next if $f =~ /^\.|CVS|~$/&& $f ne '..';
	$f .= '/' if -d "$fullPath/$f";
	my $d = (-d "$fullPath/$f")? "?dir=1" : "";
	print "<td><a href='/Ccgi/preview.cgi$path$f'>[pre]</a></td>\n";
	print "<td><code><a href='$f$d'>$f</a></code></td></tr>\n";
    }
    print "</table>\n";

    return 1;
}


### $p->processHT($filepath)
#	Process a .ht (HTML with extensions) file.  
#	Returns true on success, false if the file does not exist.
#	 
sub processHT {
    my ($self, $filepath) = (@_);

    my $parser = AMP::ParseXH->new($self->getContext, $self);
    $parser->filter(0);
    my $result = $parser->parseFile($filepath);

    # === want to insert <html>, <head>, <title>, <body> if necessary.

    for $node (@{$parser->stack}) {
	$self->expandNode($node);
    }

    return 1;
}

### $p->processINC($filepath)
#	Process a marked-up (.ht) include file, i.e. body content without
#	the <html>, <head>, or <body> elements.
#	
sub processINC {
   my ($self, $filepath) = (@_);

    my $parser = AMP::ParseXH->new($self->getContext, $self);
    $parser->filter(0);

    $parser->start_tag("html");
    # === <head><title>title</title></head><body>
    $parser->start_tag("header");
    $parser->end_tag("header");
    my $result = $parser->parseFile($filepath);
    $parser->start_tag("footer");
    $parser->end_tag("footer");
    $parser->end_tag("html");
    $parser->parse(undef); #EOF

    for $node (@{$parser->stack}) {
	$self->expandNode($node);
    }

    return 1;
}

### $p->processWT($filepath)
#	Process a .wt (WikiText) file
#	Returns true on success, false if the file does not exist.
#	 
sub processWT {
    my ($self, $filepath) = (@_);
    my $c = $self->getContext;
    return 0 unless open(IN, $filepath);

    # Need to put out the wiki header, of course.
    # The way we do this is by preloading the parser with a suitable
    # collection of active tags. 

    require AMP::ParseWiki;
    my $parser = AMP::ParseWiki->new($self->getContext, $self);
    $parser->filter(0);
    $parser->start_tag("html");
    # === <head><title>title</title></head><body>
    $parser->start_tag("header");
    $parser->end_tag("header");
    my $result = $parser->parseFile($self->getFullPath);
    $parser->start_tag("footer");
    $parser->end_tag("footer");
    $parser->end_tag("html");
    $parser->parse(undef); #EOF

    for $node (@{$parser->stack}) {
	$self->expandNode($node);
    }

    return 1;
}

### $p->processText($name)
#	Process a text file.  The result is wrapped in a <pre> element.
#	Crosslinking and entity insertion are done, but only a limited
#	amount of markup is inserted; nothing that will change the format.
#	
#	Returns true on success, false if the file does not exist.
#	 
sub processText {
    my ($self, $name) = (@_);
    my $c = $self->getContext;
    return 0 unless open(IN, $self->getFullPath);

    # === really need header and footer as usual.

    print "<hr><pre>";
    while (<IN>) {
	print $self->crosslink($self->expandEntities(entityEncode($_))); 
    }
    print "</pre><hr>";

    return 1;
}


#############################################################################
###
### Processing Skeleton Directories:
###
###	A "skeleton directory" is a prototype directory that is copied
###	into place to create a new location in the website, for example
###	a member or contribution directory.  A substitution mechanism is
###	needed that is highly unlikely to clash with any other substitution
###	mechanism in use on the site; we use pseudo-entities with funny
###	prefixes.  After the prefix, any entity defined in the context can be
###	used.  

### $p->copySkeletonDir($src, $dst, $prefix)
#	Recursively copy skeleton directory $src to directory $dst.
#	Files and directories in $src are processed iff their name
#	starts with $prefix.
#
#	Files in $src are entity-expanded (regardless of type); entities have
#	the form "&$prefix$name;".  The prefix defaults to "skel." -- note
#	that any of the usual entities can be used; the prefix ensures that
#	ordinary entities are not expanded by mistake.  In some cases it would
#	be nice to be able to use a prefix and suffix that can't be parsed as
#	an entity; for example, if it's necessary to parse the skeleton file
#	with a strict XML parser.  That, however, would require a different
#	set of functions because we also use the prefix to select the files
#	to copy.
#
#	Files with names not starting with the prefix are *not* processed.
#	This allows special files, e.g. ..cf and index.inc, to have their 
#	usual meanings.  In particular, index.inc can contain the form for
#	processing the skeleton.
#
#	Returns the number of files and directories processed, or -1 if an
#	error occurs during processing.
#
sub copySkeletonDir {
    my ($self, $src, $dst, $prefix) = @_;
    return -1 unless -d $src;
    return -1 unless makeDirPath($dst);
    return -1 unless -d $dst && -w $dst;

    $src .= '/' unless $src =~ m|/$|;
    $dst .= '/' unless $dst =~ m|/$|;

    $prefix = 'skel.' unless defined $prefix;
    my $qp  = quotemeta($prefix);		# quote prefix for use in re

    opendir(DIR, $src);
    my @files = readdir(DIR);
    closedir DIR;

    my $n = 1;
    my $r;
    foreach my $f (@files) {
	# ignore ., .., CVS, backup files.  Arguably, all dotfiles, but
	# the prefix test gets those anyway.  This line is mostly a leftover.
	next if $f =~ /^\.\.?$|^@|CVS|~$|\.(log|bak)$/;
	# ignore files with names not starting with the prefix.
	next if $f ^~ /^$qp(.+)$/;
	my $g = $1;	

	# === worry about privileges for ..cf, .htaccess, etc.

	if (-d "$src$f") {
	    $r = $self->copySkeletonDir("$src$f", "$dst$g", $prefix);
	} else {
	    $r = $self->copySkeletonFile("$src$f", "$dst$g", $prefix);
	}
	return -1 if $r < 0;
	$n += $r;
    }
    return $n;
}


sub copySkeletonFile {
    my ($self, $src, $dst, $pref) = @_;
    my $qp = quotemeta($pref);			# backslashify $pref for re

    if (!open(IN, $src)) {
	return -1;
    }
    if (!open(OUT, ">$dst")) {
	return -1;
    }
    while (<IN>) {
	s/\&$qp([a-zA-Z0-9][-_.:a-zA-Z0-9]*);/$self->getEntityValue($1)/ges;
	print OUT $_;
    }
    close OUT;
    close IN;
    return 1;
}

sub makeDirPath {
    my ($loc) = (@_);

    $loc =~ s/^\/*//;
    @path = split(/\//, $loc);
    my $path = "/$path[0]";
    if (! -d $path) {
	print STDERR "  mkDirPath:  directory $path[0] does not exist";
	return 0;
    }
    shift @path;
    if (@path == 0) { 
	print STDERR "  mkDirPath:  No path specified";
	return 0;
    }
    foreach $f (@path) {
	$path .= "/$f";
	next if (-d $path);
	if (! mkdir($path, 02777)) {
	    print STDERR "  mkDirPath:  Cannot create directory $path";
	    return 0;
	}
    }
    return 1;
}


#############################################################################
###
### Entities:
###

### $p->expandEntities($s)
#	Expand entities in $s.  
# === actually need separate ops for use in attributes and content ===
# === expandEntities and expandEntityNodes or something ===
sub expandEntities {
    my ($self, $s) = @_;

    $s =~ s/\&([a-zA-Z0-9][-_.:a-zA-Z0-9]*);/$self->getEntityValue($1)/ges;
    return $s;
}

### $p->getEntityValue($n)
#	return the value of entity "&$n;" as a string; 
#	return "&$n;" if $n is undefined and contains no "." characters,
#	the null string if undefined and "." is present -- this ensures
#	that locally-defined entities can default sensibly to "" while
#	HTML entities are passed along to the browser.
#
sub getEntityValue {
    my ($self, $n) = (@_);
    my $v = $self->get($n);
    return (defined $v)? $self->expandQuoted($v) 
	:  ($n =~ /\./)? '' : "\&$n;";
}

sub expandQuoted {
    my ($self, $v) = @_;
    return $v unless ref $v;
    if (ref $v eq ARRAY) {
	my $s = "";
	foreach $e (@$v) {
	    $s .= $self->expandQuoted($e);
	}
	return $s;
    } else {
	return $v->toString();
    }
}

### $p->getEntityBinding($n)
#	return the value of entity "&$n;"; 
#	return undefined if $n is undefined.
sub getEntityBinding {
    my ($self, $n) = (@_);
    return $self->get($n);
}


#############################################################################
###
### Crosslinking:
###

### $p->crosslink($string)
#	Insert <a> tags into $string, crosslinking it to other pages in the
#	current directory and elsewhere in the site.
sub crosslink {
    my ($self, $s) = (@_);
    if (! $self->allowCrosslinks) { return $s; }

    my $dirPath = $self->get("_dirPath");

    #=== worry about tags resulting from entity expansion ===
    #=== really need to go through and pull off tokens ===
    my $urlchar = '[-._/\+\@\?\#\=\~\%\&\;a-zA-Z0-9]';
    $s =~ s@(http\://[-._a-zA-Z0-9]+(:[0-9]+)?(/$urlchar*)?)@
	    $self->ylink($1)@geso;
    $s =~ s@([-_a-zA-Z0-9]*([/.][a-zA-Z0-9]+|/|[a-z][A-Z])[-._/a-zA-Z0-9]*)@
	    $self->xlink($1)@geso;

    return $s;
}

### === probably need something different for crosslinking in code ===

### $p->xlink($s, $url) -> string
#	Make $s a link to $url (default $s).
#	Returns a string, not a Node.
sub xlink {
    my ($self, $s, $url) = @_;
    return $s if $s=~ /^\./;
    if (!$url && $s =~ /^(.*)(\.)$/) {
	# ignore trailing "."
	return $self->xlink($1) . $2;
    }

    if (!$url && $s !~ m|/|) {			# if no slashes in $s, 
	# check for a global definition of the link (e.g. PenguinSong)
	# This includes a null definition for things like i.e[.].

	$url = $self->get("link.$s");
	if (defined($url) && $url eq '') { return $s; }

	# === check for link in standard places if !defined($url)
	# === We also need to worry about /m//$s, etc.  (URK!)
	# === check all areas?
    }
    if (!$url) {
	my $c = $self->getContext;
	$root = $c->getDocRoot;
	$path = $c->getRealDirPath;
	# === in wikis, we will need special handling for undefined links
	return $s if ($s =~ m|^/| && ! -e "$root$s");
	return $s if ($s !~ m|^/| && ! -e "$root$path/$s");
    }
    $url = $s unless $url;
    return "<a href='$url'>$s</a>";
}

sub ylink {
    my ($self, $s) = @_;
    if ($s =~ /^(.*)(\.)$/) { $s = $1; }
    return "<a href='$s'>$s</a>";
   
}

#############################################################################
###
### Tag Expansion:
###

sub expandNode {
    my ($self, $node) = (@_);
    if (ref $node) { $node->expand($self); }
    else 	   { $self->text($node); }
}

### $p->put($s)
#	Output a string $s.  This is the only method that has to be 
#	overridden in order to change from output to a file to output
#	to a string.
sub put {
    my ($self, $s) = (@_);
    print $s;
}

#############################################################################
###
### Node expansion callbacks
###
###	These routines are called by $node->expand($p)
###
###		declaration($text)
###		comment($text)
###		pi($text)
###		start_tag($tag, [attr => value, ...])
###		end_tag($tag)
###		text($text)
###
###	The default bindings build a parse tree out of XML::Node objects.
###	Overriding these in a subclass allows SAX-like on-the-fly processing.

### $self->text($text)		Text
##	The parser doesn't recognize entities, so it's up to the 
##	application to process them if necessary.
sub text {
    my ($self, $text) = @_;
    $text = $self->expandEntities($text);
    if ($text =~ /\<[a-zA-Z]/) {
	# Don't crosslink if the expansion text contains markup
	# That's crude -- really want to reparse.
	$self->put($text);
    } else {
	$self->put($self->crosslink($text)); 
    }
}

### $self->declaration($text)	HTML declaration, e.g. doctype.
##	initial "<!" and ending ">" stripped off.
sub declaration {
    my ($self, $text) = @_;
    if (ref $text) { $text = join('', @$text); }
    $self-> put("<!$text>");		# === really ought to get the tag...
}

### $self->comment($text)	Comment.
##	The leading and trailing "<!--" and "-->" have been stripped off.
sub comment {
    my ($self, $text) = @_;
    if (ref $text) { $text = join('', @$text); }
    $self->put("<!--$text-->");
}

### $self->pi($text)		Processing Instruction
sub pi {
    my ($self, $text) = @_;
    if (ref $text) { $text = join('', @$text); }
    $self->put("<?$text?>");
}

### $self->cdata($text)		Character Data Section
##	Most applications will treat this the same as text
sub cdata {
    my ($self, $text) = @_;
    if (ref $text) { $text = join('', @$text); }
    $self->put("<![CDATA[$text]]>");
}

### $self->nodelist([node...])
sub nodelist {
    my ($self, $nodes) = @_;
    for $node (@$nodes) { $self->expandNode($node) }
}

### $self->end_tag()		End tag
sub end_tag {
    my ($self, $tag) = @_;
    $self->put("</$tag>");
}

### $self->start_tag(tag, {attrs})	Start tag
##	The attributes are passed in a hash.
##	Called with additional attribute _empty=>'/' if it ended in '/>'.
sub start_tag {
    my ($self, $tag, $attrs) = @_;
    # === called only when expanding on the fly ===
}

### $self->element($tag, $node, $content)
#	$node->expand needs $tag and $content, so it's better to
#	pass them along than to extract them a second time.
#
#	If $tag has a definition in the tagset, we expand it.  Otherwise
#	we simply output the element as-is.
sub element {
    my ($self, $tag, $node, $content) = @_;

    my $c = $self->getContext;
    my $ts = $self->getTagset;
    my $root =  $c->getDocRoot;
    my $site = $c->getSiteDir;

    # See whether there's a handler (actually, any definition) for $tag
    my $h  = $ts? $ts->getHandler($tag) : '';
    if (! ref $h) {
	# There's no definition, so we simply output the element
	$self->outputElement($tag, $node, $content);
	
    } elsif (ref $h eq 'CODE') {
	# The definition is a reference to a subroutine (i.e. a primitive)
	# Pass it the same argument list we just got. 

	&$h($self, $tag, $node, $content);

    } elsif (ref $h eq 'HASH') {
	# we have a definition which is a hash, which we assume contains
	# default values for the node's attributes.
	$self->outputElement($tag, $node, $content, $h);

    } elsif (ref($h) eq 'ARRAY') { 
	# The definition is an array of Nodes.  Process them.
	my $p = $self->subProcess($node, $content);
	$p->nodelist($h);
    } else { 
	# Assume the definition is a node.

	my $p = $self->subProcess($node, $content);
	$h->expand($p);
    }
}

### $self->outputElement($tag, $node, $content, {defaultAttrs})
#	Output an element, expanding its content.
#	Each of the default attributes is output provided a corresponding
#	actual attribute was present.  This is mainly used for default
#	colors and fonts.
#
#	Normally called from Process::element for an undefined element,
#	but may also be called from handler functions. 
#
sub outputElement {
    my ($self, $tag, $node, $content, $defaults) = @_;

    print "<$tag";
    my @attrNames = @{$node->attr_names};
    for my $a (@attrNames) {
	my $v = $node->attr($a);
	my $q = ($v =~ /\"/)? "'" : '"';
	$self->put(" $a=$q" . $self->expandEntities($v) . $q);
    }
    if (ref $defaults) {
	for my $k (keys(%$defaults)) {
	    my $v = $defaults->{$k};
	    next if defined ($node->attr($k));
	    my $q = ($v =~ /\"/)? "'" : '"';
	    $self->put(" $k=$q" . $self->expandEntities($v) . $q);
	}
    }
    if ($node->is_empty) {
	$self->put(" />");
    } else {
	$self->put(">");
	for my $n (@$content) {
	    # if we're inside an <a> tag we'd better not crosslink!
	    # there are some others, too; <title> for example.
	    my $p = $self;
	    if ($noxlink{$tag}) {
		$p = $self->subProcess();
		$p->allowCrosslinks(0);
	    }
	    if (ref $n) { $n->expand($p); }
	    else 	{ $p->text($n); }
	}
	$self->put("</$tag>");
    }
}


#############################################################################
'Package loaded';

