#!/bin/ksh ###################################################################### # $Id: buildindex,v 1.23 2008/02/20 14:42:51 perette Exp $ # Author: Perette Barella # Purpose: This script generates index.web or delta.web which can then # be run through the M4 macros to generate a corresponding # HTML file. # Version: @(#) $Id: buildindex,v 1.23 2008/02/20 14:42:51 perette Exp $ # Copyright: Copyright 1997-2003 Perette Barella. # All rights reserved. ###################################################################### function get_index_entries { typeset file="$1" dirname basename [ "${file:0:2}" = "./" ] && file="${file:2}" [ ! -r "$file" ] && return 1 dirname=$(dirname "$file") || return 1 basename=$(basename "$file" .web) || return 1 (cd "$dirname" && ${M4:-m4} -P -DFILE_SOURCE=$basename.web -DFILE_DEST=$basename.html ${INCLUDE}/readindex.m4) | sed -e "s&http:.*$dirname/$basename\.html &$dirname/$basename.html &" \ -e "s&http:.*$dirname/$basename\.html#\([^ ]*\) &$dirname/$basename.html#\1 &" } function get_page_title { typeset file="$1" dirname basename name [ ! -r "$file" ] && return 1 dirname=$(dirname "$file") || return 1 basename=$(basename "$file") || return 1 case "$basename" in *.web) cd "$dirname" || return 1 name=$(${M4:-m4} -P -DFILE_SOURCE=$basename -DFILE_DEST=$(basename .$basename web).html ${INCLUDE}/readtitle.m4) || return 1 ;; *.txt.gz|*.txt.Z) name=$(zegrep -v '^[ ]*$|BEGIN PGP' "$file" | head -1) [ "$name" != "" ] && name="$basename: $name" ;; *.txt|*.do) name=$(head -15 "$file" | grep '^Subject:' | sed -e 's/^Subject: *//' -e 's/[rR]e: *//g') [ "$name" = "" ] && name=$(egrep -v '^[ ]*$|BEGIN PGP' "$file" | head -1) if [ "$name" = "" -o "${name:0:2}" = "#!" ] then name=$(head -2 $file | sed -e 's/&/&/' -e 's//\>/') fi [ "$name" != "" ] && name="$basename: $name" ;; *.pdf) name=$(strings "$file" | grep 'dc:title' | sed \ -e 's/.*]*>//' \ -e 's&]*>.*&&' \ -e 's/<[^>]*>//g' \ -e 's/^[ ]*//'g \ -e 's/[ ]*$//g') [ "$name" != "" ] && name="$name (PDF)" ;; *) echo "$arg0: Don't know file format for $basename." 1>&2 return 1 ;; esac [ "$name" = "" ] && return 1 echo "$name" | sed -e 's/&/&/' -e 's//\>/' return 0 } function get_page_location { typeset file="$1" case "$file" in *.web) echo "$(dirname $file)/$(basename $file .web).html" ;; *) echo "$file" esac } # islocked - return lock status of a file # return 0 for locked, 255 for confused (didn't find lock data) # other >0 value for unlocked. islocked () { file="$1" while read aline do case "$aline" in locks\;*) # no locks return 1 ;; locks*) return 0 ;; text*) # at text section, no locks found echo "$file: Can not find lock data" 1>&2 return 255 ;; esac done < $1 echo "$file: Can not find lock data: EOF" 1>&2 return 255 } function read_rcs_last_change { # RCS stores stuff in reverse chronological order in the file. # So just # locate the first log, and return that. typeset file="$1" aline change="" collect=false while read aline do if [ "$aline" = "log" ] then collect=true elif [ "$aline" = "@" -a "$collect" = "true" ] then echo "$change" return 0 elif [ "$aline" = "text" ] then echo "$file: Got to text line." 1>&2 return 1 elif [ "$collect" = "true" ] then change="$change ${aline:1}" fi done < "$file" echo "$file: Can't read change log." 1>&2 set +x return 1 } function add_update_info { typeset file="$1" rcsfile updated change="" rcsfile="$(dirname "$file")/RCS/$(basename "$file"),v" updated=$(ls -l $file | awk '{ print $6 " " $7 " " $8 }') if [ -f "$rcsfile" ] then if islocked "$rcsfile" then change="Changes are available, but not yet marked complete." else # updated=$(grep "^date " "$rcsfile" | head -1 | # awk '{print $2}' | sed 's/;//g') change=$(read_rcs_last_change "$rcsfile") fi fi echo "
Last updated: $updated." if [ "$change" != "" ] then echo "
" echo "Changes: \`\`$change''" | sed -e 's/&/&/' -e 's//\>/' fi return 0 } # KORN SHELL DEPENDENT FUNCTION # (Fork order of pipe.) function format_content_index { typeset file url title index lastgroup lastlevel=0 currentlevel echo '" let lastlevel-- lastgroup[$lastlevel]="" done # Output new/different heirarchy levels if necessary while [ $lastlevel -lt $currentlevel ] do echo "
  • ${index[$lastlevel]}" echo "" done } function is_in_index { typeset file="$1" dirname basename dirname=$(dirname "$file") basename=$(basename "$file") # if no index page, everything is listed. [ ! -f "$dirname/index.html" ] && return 0 grep -q -w "$basename" "$dirname/index.html" return $? } function format_list { typeset class="" pagetitle location any=false suppress=false [ "$mode" = "deltas" ] && class=" CLASS=padded" [ "$1" = "-s" ] && suppress=true && shift for file in "$@" do [ ! -r "$file" ] && continue [ "$(dirname "$file")" = "$(dirname "$destfile")" -a \ "$(basename "$file")" = "$(basename "$destfile")" ] && continue if ! pagetitle=$(get_page_title "$file") then echo "$file: can't determine page title." 1>&2 pagetitle=$(basename "$file") fi if ! location=$(get_page_location "$file") then echo "$file: can't determine location." 1>&2 continue fi if [ "$mode" = "deltas" -o "$mode" = "map" ] && ! is_in_index "$location" then echo "$file: Not published, skipping." 1>&2 continue fi [ "$any" = "false" ] && ! $suppress && echo "" } function recurse_map { typeset file here filelist empty=false dirname="${1:-.}" base title if [ "$dirname" = "." ] then filelist=$(ls -1 "$dirname"/*.web "$dirname"/*.txt "$dirname"/*.do "$dirname"/*.pdf "$dirname"/*.txt.gz 2>/dev/null) else filelist=$(ls -1 "$dirname"/*.web "$dirname"/*.txt "$dirname"/*.do "$dirname"/*.pdf "$dirname"/*.txt.gz 2>/dev/null | egrep -v '/index.web$|/index.html$|/RCS$|/SCCS$|/Thumbnails$') fi [ "$filelist" = "" ] && empty=true if ! $empty then echo "