, <body> if necessary. for $node (@{$parser->stack}) { $self->expandNode($node); } return 1; } ### $p->processINC($filepath) # Process a marked-up (.ht) include file, i.e. body content without # the <html>, <head>, or <body> elements. # sub processINC { my ($self, $filepath) = (@_); my $parser = AMP::ParseXH->new($self->getContext, $self); $parser->filter(0); $parser->start

package AMP::Process; ###### Process class ### $Id: Process.pm,v 1.21 2002/10/22 15:48:07 steve Exp $ ### # === BUG: &content; doesn't expand properly -- shows as ARRAY... # parser should make Node for entity refs in content text, # then expansion would be trivial. Actually may need # (with = ) # === BUG: linkifier needs to ignore non-links # === BUG: need sitewide, dir-wide list of name=link pairs (links.cf) # === BUG: need default ### This class implements file processing: template expansion, Wiki ### parsing, and automatic cross-linking. Process is basically an ### stack frame for a parse tree interpretor; the parser is separate. ### Parse tree nodes know how to expand themselves, by calling Process ### methods on their components. ### A Process node is, by itself, a namespace for entities. Processing ### proceeds in the following conceptual stages: ### ### 1. parsing (which constructs a parse tree) ### 2. active-tag expansion ### 3. entity expansion (in attribute values and text) ### 4. crosslink recognition in text ### 5. output formatting. ### In theory, it's possible to pipeline these stages, only producing ### fragments of a parse tree when they need to be saved in a variable ### or grovelled-over repeatedly. === Currently we don't. use XML::Node; use AMP::ParseXH; use Site::Util; # Note that we don't have to "use" the auxiliary parsers; instead we # "require" them when we discover they're needed. ParseXH is needed # in any case to expand templates. #use AMP::ParseWiki; use Exporter(); use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); BEGIN { $VERSION=1.00; @ISA = qw(Exporter); @EXPORT = qw( ); } # Set of tags within which to suppress crosslinking. %noxlink = ( 'a' => 1, 'title' => 1, ); ############################################################################# ### ### Constructor: ### ### Process->new($context, $urlpath, $filepath) # sub new { my ($class, $context, $path, $filepath) = (@_); my $self = { "_filePath" => $filepath, "_context" => $context, "_parent" => $context, # set &content; to null for benefit of dir-index # and others that use Process directly on template files "content" => "", }; $path =~ m|^(.*)/[^/]+/?$|; $self->{"_dirPath"} = $1; $self->{"_crosslink"} = 1; $self = bless $self, $class; return $self; } ### $p->subProcess($parent) # make a subprocess, i.e. a stack frame. sub subProcess { my ($parent, $node, $content) = (@_); my $class = ref $parent; $content = "" unless $content; my $self = {'_parent' => $parent, '_context' => $parent->getContext, '_node' => $node, 'content' => $content, }; $self = bless $self, $class; return $self; } ############################################################################# ### ### access functions: ### ### $p->get(name) -> value # get a value. Typically these are also useable as entities, but names # starting with non-alphanumeric characters will not be recognized as # entities and so can be used internally with impunity. # # Climbs up the "_parent" link (subprocess to superprocess) if it finds # nothing at this level. Returns undefined if no binding is found. # sub get { my ($self, $n) = (@_); my $v = $self->{$n}; return $v if defined $v; # Look at current node's attributes, then at parent. # There really ought to be a prioritized list. my $p = $self->{'_node'}; # === should really use tag as a prefix if (ref $p) { $v = $p->attr($n); return $v if defined $v; } $p = $self->{'_parent'}; return (ref $p)? $p->get($n) : $v; } sub getFilePath { my ($self) = (@_); return $self->get("_filePath"); } sub getFullPath { my ($self) = (@_); return $ENV{DOCUMENT_ROOT} . $self->get("_filePath"); } sub getContext { my ($self) = (@_); return $self->{"_context"}; } sub getTagset { my ($self) = (@_); my $ts = $self->{"_tagset"}; if (!ref $ts) { $ts = $self->getContext->getTagset; $self->{"_tagset"} = $ts; # print STDERR "loaded Tagset $ts\n"; print STDERR "failed to get tagset from context\n" unless ref $ts; } return $ts; } sub allowCrosslinks { my ($self, $v) = (@_); $self->{"_crosslink"} = $v if defined $v; return $self->get("_crosslink"); } ############################################################################# ### ### Processing Files: ### ### $p->processFile($filepath) -> processed? # Process a single file. Dispatch on extension. # Return 0 if the file cannot be processed. # # If this is a prototype, $filepath will point to the prototype # and $p->getFullPath will point to the requested document. # sub processFile { my ($self, $filepath) = (@_); $filepath = $self->getFullPath unless -f $filepath; # === worry about relative $filepath === if (-d $filepath) { return $self->processDir($filepath); } if ($filepath =~ /\.ht$/) { return $self->processHT($filepath); } if ($filepath =~ /\.wt$/) { return $self->processWT($filepath); } return $self->processText($filepath); } ### $p->processDir($name) # Process (list) a directory. # This is done by expanding the prototype file dir-index.ht, if it # exists. # sub processDir { my ($self, $filepath) = (@_); my $fullPath = $filepath; # === $self->getFullPath; my $path = $self->{'path'}; print STDERR "*** processDir $fullPath\n"; opendir(DIR, $filepath); my @files = readdir(DIR); closedir DIR; # === here's where we expand the prototype file. === print "\n"; foreach my $f (sort @files) { next if $f =~ /^\.|CVS|~$/&& $f ne '..'; $f .= '/' if -d "$fullPath/$f"; my $d = (-d "$fullPath/$f")? "?dir=1" : ""; print "\n"; print "\n"; } print "

[pre] $f

\n"; return 1; } ### $p->processHT($filepath) # Process a .ht (HTML with extensions) file. # Returns true on success, false if the file does not exist. # sub processHT { my ($self, $filepath) = (@_); my $parser = AMP::ParseXH->new($self->getContext, $self); $parser->filter(0); my $result = $parser->parseFile($filepath); # === want to insert , , , <body> if necessary. for $node (@{$parser->stack}) { $self->expandNode($node); } return 1; } ### $p->processINC($filepath) # Process a marked-up (.ht) include file, i.e. body content without # the <html>, <head>, or <body> elements. # sub processINC { my ($self, $filepath) = (@_); my $parser = AMP::ParseXH->new($self->getContext, $self); $parser->filter(0); $parser->start_tag("html"); # === <head><title>title $parser->start_tag("header"); $parser->end_tag("header"); my $result = $parser->parseFile($filepath); $parser->start_tag("footer"); $parser->end_tag("footer"); $parser->end_tag("html"); $parser->parse(undef); #EOF for $node (@{$parser->stack}) { $self->expandNode($node); } return 1; } ### $p->processWT($filepath) # Process a .wt (WikiText) file # Returns true on success, false if the file does not exist. # sub processWT { my ($self, $filepath) = (@_); my $c = $self->getContext; return 0 unless open(IN, $filepath); # Need to put out the wiki header, of course. # The way we do this is by preloading the parser with a suitable # collection of active tags. require AMP::ParseWiki; my $parser = AMP::ParseWiki->new($self->getContext, $self); $parser->filter(0); $parser->start_tag("html"); # === title $parser->start_tag("header"); $parser->end_tag("header"); my $result = $parser->parseFile($self->getFullPath); $parser->start_tag("footer"); $parser->end_tag("footer"); $parser->end_tag("html"); $parser->parse(undef); #EOF for $node (@{$parser->stack}) { $self->expandNode($node); } return 1; } ### $p->processText($name) # Process a text file. The result is wrapped in a

 element.
#	Crosslinking and entity insertion are done, but only a limited
#	amount of markup is inserted; nothing that will change the format.
#	
#	Returns true on success, false if the file does not exist.
#	 
sub processText {
    my ($self, $name) = (@_);
    my $c = $self->getContext;
    return 0 unless open(IN, $self->getFullPath);

    # === really need header and footer as usual.

    print "
";
    while () {
	print $self->crosslink($self->expandEntities(entityEncode($_))); 
    }
    print "
";

    return 1;
}


#############################################################################
###
### Processing Skeleton Directories:
###
###	A "skeleton directory" is a prototype directory that is copied
###	into place to create a new location in the website, for example
###	a member or contribution directory.  A substitution mechanism is
###	needed that is highly unlikely to clash with any other substitution
###	mechanism in use on the site; we use pseudo-entities with funny
###	prefixes.  After the prefix, any entity defined in the context can be
###	used.  

### $p->copySkeletonDir($src, $dst, $prefix)
#	Recursively copy skeleton directory $src to directory $dst.
#	Files and directories in $src are processed iff their name
#	starts with $prefix.
#
#	Files in $src are entity-expanded (regardless of type); entities have
#	the form "&$prefix$name;".  The prefix defaults to "skel." -- note
#	that any of the usual entities can be used; the prefix ensures that
#	ordinary entities are not expanded by mistake.  In some cases it would
#	be nice to be able to use a prefix and suffix that can't be parsed as
#	an entity; for example, if it's necessary to parse the skeleton file
#	with a strict XML parser.  That, however, would require a different
#	set of functions because we also use the prefix to select the files
#	to copy.
#
#	Files with names not starting with the prefix are *not* processed.
#	This allows special files, e.g. ..cf and index.inc, to have their 
#	usual meanings.  In particular, index.inc can contain the form for
#	processing the skeleton.
#
#	Returns the number of files and directories processed, or -1 if an
#	error occurs during processing.
#
sub copySkeletonDir {
    my ($self, $src, $dst, $prefix) = @_;
    return -1 unless -d $src;
    return -1 unless makeDirPath($dst);
    return -1 unless -d $dst && -w $dst;

    $src .= '/' unless $src =~ m|/$|;
    $dst .= '/' unless $dst =~ m|/$|;

    $prefix = 'skel.' unless defined $prefix;
    my $qp  = quotemeta($prefix);		# quote prefix for use in re

    opendir(DIR, $src);
    my @files = readdir(DIR);
    closedir DIR;

    my $n = 1;
    my $r;
    foreach my $f (@files) {
	# ignore ., .., CVS, backup files.  Arguably, all dotfiles, but
	# the prefix test gets those anyway.  This line is mostly a leftover.
	next if $f =~ /^\.\.?$|^@|CVS|~$|\.(log|bak)$/;
	# ignore files with names not starting with the prefix.
	next if $f ^~ /^$qp(.+)$/;
	my $g = $1;	

	# === worry about privileges for ..cf, .htaccess, etc.

	if (-d "$src$f") {
	    $r = $self->copySkeletonDir("$src$f", "$dst$g", $prefix);
	} else {
	    $r = $self->copySkeletonFile("$src$f", "$dst$g", $prefix);
	}
	return -1 if $r < 0;
	$n += $r;
    }
    return $n;
}


sub copySkeletonFile {
    my ($self, $src, $dst, $pref) = @_;
    my $qp = quotemeta($pref);			# backslashify $pref for re

    if (!open(IN, $src)) {
	return -1;
    }
    if (!open(OUT, ">$dst")) {
	return -1;
    }
    while () {
	s/\&$qp([a-zA-Z0-9][-_.:a-zA-Z0-9]*);/$self->getEntityValue($1)/ges;
	print OUT $_;
    }
    close OUT;
    close IN;
    return 1;
}

sub makeDirPath {
    my ($loc) = (@_);

    $loc =~ s/^\/*//;
    @path = split(/\//, $loc);
    my $path = "/$path[0]";
    if (! -d $path) {
	print STDERR "  mkDirPath:  directory $path[0] does not exist";
	return 0;
    }
    shift @path;
    if (@path == 0) { 
	print STDERR "  mkDirPath:  No path specified";
	return 0;
    }
    foreach $f (@path) {
	$path .= "/$f";
	next if (-d $path);
	if (! mkdir($path, 02777)) {
	    print STDERR "  mkDirPath:  Cannot create directory $path";
	    return 0;
	}
    }
    return 1;
}


#############################################################################
###
### Entities:
###

### $p->expandEntities($s)
#	Expand entities in $s.  
# === actually need separate ops for use in attributes and content ===
# === expandEntities and expandEntityNodes or something ===
sub expandEntities {
    my ($self, $s) = @_;

    $s =~ s/\&([a-zA-Z0-9][-_.:a-zA-Z0-9]*);/$self->getEntityValue($1)/ges;
    return $s;
}

### $p->getEntityValue($n)
#	return the value of entity "&$n;" as a string; 
#	return "&$n;" if $n is undefined and contains no "." characters,
#	the null string if undefined and "." is present -- this ensures
#	that locally-defined entities can default sensibly to "" while
#	HTML entities are passed along to the browser.
#
sub getEntityValue {
    my ($self, $n) = (@_);
    my $v = $self->get($n);
    return (defined $v)? $self->expandQuoted($v) 
	:  ($n =~ /\./)? '' : "\&$n;";
}

sub expandQuoted {
    my ($self, $v) = @_;
    return $v unless ref $v;
    if (ref $v eq ARRAY) {
	my $s = "";
	foreach $e (@$v) {
	    $s .= $self->expandQuoted($e);
	}
	return $s;
    } else {
	return $v->toString();
    }
}

### $p->getEntityBinding($n)
#	return the value of entity "&$n;"; 
#	return undefined if $n is undefined.
sub getEntityBinding {
    my ($self, $n) = (@_);
    return $self->get($n);
}


#############################################################################
###
### Crosslinking:
###

### $p->crosslink($string)
#	Insert  tags into $string, crosslinking it to other pages in the
#	current directory and elsewhere in the site.
sub crosslink {
    my ($self, $s) = (@_);
    if (! $self->allowCrosslinks) { return $s; }

    my $dirPath = $self->get("_dirPath");

    #=== worry about tags resulting from entity expansion ===
    #=== really need to go through and pull off tokens ===
    my $urlchar = '[-._/\+\@\?\#\=\~\%\&\;a-zA-Z0-9]';
    $s =~ s@(http\://[-._a-zA-Z0-9]+(:[0-9]+)?(/$urlchar*)?)@
	    $self->ylink($1)@geso;
    $s =~ s@([-_a-zA-Z0-9]*([/.][a-zA-Z0-9]+|/|[a-z][A-Z])[-._/a-zA-Z0-9]*)@
	    $self->xlink($1)@geso;

    return $s;
}

### === probably need something different for crosslinking in code ===

### $p->xlink($s, $url) -> string
#	Make $s a link to $url (default $s).
#	Returns a string, not a Node.
sub xlink {
    my ($self, $s, $url) = @_;
    return $s if $s=~ /^\./;
    if (!$url && $s =~ /^(.*)(\.)$/) {
	# ignore trailing "."
	return $self->xlink($1) . $2;
    }

    if (!$url && $s !~ m|/|) {			# if no slashes in $s, 
	# check for a global definition of the link (e.g. PenguinSong)
	# This includes a null definition for things like i.e[.].

	$url = $self->get("link.$s");
	if (defined($url) && $url eq '') { return $s; }

	# === check for link in standard places if !defined($url)
	# === We also need to worry about /m//$s, etc.  (URK!)
	# === check all areas?
    }
    if (!$url) {
	my $c = $self->getContext;
	$root = $c->getDocRoot;
	$path = $c->getRealDirPath;
	# === in wikis, we will need special handling for undefined links
	return $s if ($s =~ m|^/| && ! -e "$root$s");
	return $s if ($s !~ m|^/| && ! -e "$root$path/$s");
    }
    $url = $s unless $url;
    return "$s";
}

sub ylink {
    my ($self, $s) = @_;
    if ($s =~ /^(.*)(\.)$/) { $s = $1; }
    return "$s";
   
}

#############################################################################
###
### Tag Expansion:
###

sub expandNode {
    my ($self, $node) = (@_);
    if (ref $node) { $node->expand($self); }
    else 	   { $self->text($node); }
}

### $p->put($s)
#	Output a string $s.  This is the only method that has to be 
#	overridden in order to change from output to a file to output
#	to a string.
sub put {
    my ($self, $s) = (@_);
    print $s;
}

#############################################################################
###
### Node expansion callbacks
###
###	These routines are called by $node->expand($p)
###
###		declaration($text)
###		comment($text)
###		pi($text)
###		start_tag($tag, [attr => value, ...])
###		end_tag($tag)
###		text($text)
###
###	The default bindings build a parse tree out of XML::Node objects.
###	Overriding these in a subclass allows SAX-like on-the-fly processing.

### $self->text($text)		Text
##	The parser doesn't recognize entities, so it's up to the 
##	application to process them if necessary.
sub text {
    my ($self, $text) = @_;
    $text = $self->expandEntities($text);
    if ($text =~ /\<[a-zA-Z]/) {
	# Don't crosslink if the expansion text contains markup
	# That's crude -- really want to reparse.
	$self->put($text);
    } else {
	$self->put($self->crosslink($text)); 
    }
}

### $self->declaration($text)	HTML declaration, e.g. doctype.
##	initial "" stripped off.
sub declaration {
    my ($self, $text) = @_;
    if (ref $text) { $text = join('', @$text); }
    $self-> put("");		# === really ought to get the tag...
}

### $self->comment($text)	Comment.
##	The leading and trailing "" have been stripped off.
sub comment {
    my ($self, $text) = @_;
    if (ref $text) { $text = join('', @$text); }
    $self->put("");
}

### $self->pi($text)		Processing Instruction
sub pi {
    my ($self, $text) = @_;
    if (ref $text) { $text = join('', @$text); }
    $self->put("");
}

### $self->cdata($text)		Character Data Section
##	Most applications will treat this the same as text
sub cdata {
    my ($self, $text) = @_;
    if (ref $text) { $text = join('', @$text); }
    $self->put("");
}

### $self->nodelist([node...])
sub nodelist {
    my ($self, $nodes) = @_;
    for $node (@$nodes) { $self->expandNode($node) }
}

### $self->end_tag()		End tag
sub end_tag {
    my ($self, $tag) = @_;
    $self->put("");
}

### $self->start_tag(tag, {attrs})	Start tag
##	The attributes are passed in a hash.
##	Called with additional attribute _empty=>'/' if it ended in '/>'.
sub start_tag {
    my ($self, $tag, $attrs) = @_;
    # === called only when expanding on the fly ===
}

### $self->element($tag, $node, $content)
#	$node->expand needs $tag and $content, so it's better to
#	pass them along than to extract them a second time.
#
#	If $tag has a definition in the tagset, we expand it.  Otherwise
#	we simply output the element as-is.
sub element {
    my ($self, $tag, $node, $content) = @_;

    my $c = $self->getContext;
    my $ts = $self->getTagset;
    my $root =  $c->getDocRoot;
    my $site = $c->getSiteDir;

    # See whether there's a handler (actually, any definition) for $tag
    my $h  = $ts? $ts->getHandler($tag) : '';
    if (! ref $h) {
	# There's no definition, so we simply output the element
	$self->outputElement($tag, $node, $content);
	
    } elsif (ref $h eq 'CODE') {
	# The definition is a reference to a subroutine (i.e. a primitive)
	# Pass it the same argument list we just got. 

	&$h($self, $tag, $node, $content);

    } elsif (ref $h eq 'HASH') {
	# we have a definition which is a hash, which we assume contains
	# default values for the node's attributes.
	$self->outputElement($tag, $node, $content, $h);

    } elsif (ref($h) eq 'ARRAY') { 
	# The definition is an array of Nodes.  Process them.
	my $p = $self->subProcess($node, $content);
	$p->nodelist($h);
    } else { 
	# Assume the definition is a node.

	my $p = $self->subProcess($node, $content);
	$h->expand($p);
    }
}

### $self->outputElement($tag, $node, $content, {defaultAttrs})
#	Output an element, expanding its content.
#	Each of the default attributes is output provided a corresponding
#	actual attribute was present.  This is mainly used for default
#	colors and fonts.
#
#	Normally called from Process::element for an undefined element,
#	but may also be called from handler functions. 
#
sub outputElement {
    my ($self, $tag, $node, $content, $defaults) = @_;

    print "<$tag";
    my @attrNames = @{$node->attr_names};
    for my $a (@attrNames) {
	my $v = $node->attr($a);
	my $q = ($v =~ /\"/)? "'" : '"';
	$self->put(" $a=$q" . $self->expandEntities($v) . $q);
    }
    if (ref $defaults) {
	for my $k (keys(%$defaults)) {
	    my $v = $defaults->{$k};
	    next if defined ($node->attr($k));
	    my $q = ($v =~ /\"/)? "'" : '"';
	    $self->put(" $k=$q" . $self->expandEntities($v) . $q);
	}
    }
    if ($node->is_empty) {
	$self->put(" />");
    } else {
	$self->put(">");
	for my $n (@$content) {
	    # if we're inside an  tag we'd better not crosslink!
	    # there are some others, too;  for example.
	    my $p = $self;
	    if ($noxlink{$tag}) {
		$p = $self->subProcess();
		$p->allowCrosslinks(0);
	    }
	    if (ref $n) { $n->expand($p); }
	    else 	{ $p->text($n); }
	}
	$self->put("</$tag>");
    }
}


#############################################################################
'Package loaded';