#!/usr/local/bin/perl # # @(#) Perl -- Rip Package's or script's doc to Technical text format # @(#) $Id: ripdoc.pl,v 1.19 1999/02/08 19:38:58 jaalto Exp $ # # {{{ Documentation # # File id # # Copyright (C) 1997-98 Jari Aalto # Created: 1997-02 # Keywords: Perl text conversion $ # PerlVer: 5.004 $ # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, # Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # # About program layout # # The {{ }}} marks you see in this file are party of file "fold" # conrol package called folding.el (Unix Emacs lisp package). # ftp://ftp.csd.uu.se/pub/users/andersl/beta/ to get the latest. # # There is also lines that look like # ....... &tag ... and they # are generated by Emacs Lisp package tinybm.el, which is also # document structure tool. You can jump between the blocks with # Ctrl-up and Ctrl-down keys and create those "bookmarks" with # Emacs M-x tibm-insert. See mentioned URL cs.uta.fi. # # Funny identifiers at the top of file # # The GNU RCS ident(1) program can print usefull information out # of all variables that are in format $ IDENTIFIER: text $ # See also Unix man pages for command what(1) which outputs all lines # matching @( # ). Try commands: # # % what PRGNAME # % ident PRGNAME # # Change Log: # }}} BEGIN { require 5.004 } use autouse 'Pod::Text' => qw( pod2text ); use strict; use integer; use English; use Getopt::Long; use vars qw ( $VERSION ); # This is for use of Makefile.PL and ExtUtils::MakeMaker # So that it puts the tardist number in format YYYY.MMDD # The REAL version number is defined later # The following variable is updated by my Emacs setup whenever # this file is saved $VERSION = '1999.0208'; # {{{ Initial setup # **************************************************************************** # # DESCRIPTION # # Set global variables for the program # # INPUT PARAMETERS # # none # # RETURN VALUES # # none # # **************************************************************************** sub Initialize () { use vars qw ( $PROGNAME $LIB $FILE_ID $VERSION $CONTACT $URL ); $PROGNAME = "ripdoc.pl"; $LIB = $PROGNAME; $FILE_ID = q$Id: ripdoc.pl,v 1.19 1999/02/08 19:38:58 jaalto Exp $; $VERSION = (split (' ', $FILE_ID))[2]; $CONTACT = ""; $URL = "ftp://cs.uta.fi/pub/ssjaaa/"; } # }}} # {{{ help, args # ***************************************************************** &help **** # # DESCRIPTION # # Print help and exit. # # INPUT PARAMETERS # # $msg [optional] Reason why function was called.- # # RETURN VALUES # # none # # **************************************************************************** =pod =head1 NAME ripdoc.pl - Rip documentation from the beginning of file =head1 SYNOPSIS ripdoc.pl FILE FILE .. =head1 OPTIONS =head2 General options =over 4 =item B<--doc> Use default seach start: 'Documentation|Commentary' =item B<--begin-regexp REGEXP> B<-bre REGEXP> Search any beginning regexp mathing RE istead of default 'File id|Preface' =item B<--end-regexp REGEXP> B<-ere REGEXP> Search any Ending regexp mathing RE istead of default 'Change Log|History' =item B<--ignore-regexp> B<-ire REGEXP> Ignore lines matching RE. The default value ignores shell I lines. =head2 Miscellaneous options =item B<--debug> B<-d> Turn on debug. =item B<--help> B<-h> Print help page. =item B<--verbose> B<-v> Turn on verbose messages. =item B<--Version> B<-V> Print program version and contact info. =back =head1 README =head2 General Perl and Jave are execptions among the programming languages, because they include a way to embed documentating inside program. Perl interpreter knows POD and it can ignore those lines. In other programming languages, like Emacs lisp and Shell programs, Procmail code, you don t have anything out of the box. The choices are: =over 4 =item * Separate document for the program is maintained =item * Documentation is put to the beginning of the file and distributed with the file =back This is the tool for the latter, keeping documentation with the distributed program. I extracts the documentation which follows TF (Technical format: ftp://cs.uta.fi/pub/ssjaaa/t2html.html ) guidelines. The idea is that you can generate html docs similarly that what pod2html does. The conversion goes like this: % ripdoc.pl code.sh | t2html.pl > code.html =head2 How to write documentation In order to use this program, you must write the documentation to the beginning of file in the following format: #!/bin/sh # # @(#) file.extension -- proper first line description # # Preface starts at colum 4 # # txt txt txt at column 8 # txt txt txt at column 8 # # Furher example code at column 12 # More code examples at column 12 # # Next heading # # txt txt txt at column 8 # txt txt xtx at column 8 # txt txt xtx at column 8 =head2 Finer specifications for the documenation format Program reads documentation from the beginning of file. The very first line determines the comment string in the file. the documentation start when header I or I I is found # Preface # or "File id" Documentation and ends to header # Change Log: # or "History" The I should explain how the package springed into existense and the rest of the documentation follows after that. Most important is the first line or near first, if the file is shell script, must be exactly like the following. You _must_ not use double dashes in any other heading.This gives the name of the file and description string. Use what(1) marker at the begining of sentence. # @(#) file.extension -- proper first line description =head2 Notes on documentation format Very First line determines what is the comment string that is ripped away from the beginning of lines. You must not use multiple of comment markers like above I<#######>, this will handicap ripdoc.pl. Remember to start writing headings at column four and write text at column 8. The following is not in TF format. #!/bin/sh # ####################################################### # # @(#) file.extension -- proper first line description # ####################################################### # # Preface starts at colum 1 # # txt txt txt at column 8 # txt txt txt at column 8 # # Furher example code at column 12 # More code examples at column 12 # # Next heading is here # # txt txt txt at column 8 # txt txt xtx at column 8 # txt txt xtx at column 8 =head1 SUITABILITY You can run this program to rip out documentation from any file that follows the 4 character indentation rule, which is the basis of TF (technical format). The only requirement is that the comment markers are single lined. C-styled I I combination cannot be handled, because the comment marker is determined from the beginning of line. =head1 EXAMPLES You usually can combine the procuced clear text output to a text to html fileter to generate html documentation out of the comments. % ripdoc.pl file-sh.sh | t2html.pl > file-sh.html % ripdoc.pl file-cc.cc | t2html.pl > file-cc.html =head1 SEE ALSO t2html(1), weblint(1), html2ps(1), ps2ascii(1) =head1 AVAILABILITY CPAN entry is http://www.perl.com/CPAN-local//scripts/ Reach author at jari.aalto@poboxes.com or http://www.netforward.com/poboxes/?jari.aalto =head1 SCRIPT CATEGORIES CPAN/Administrative html =head1 PREREQUISITES No additional CPAN modules required. =head1 COREQUISITES No optional CPAN modules needed. =head1 OSNAMES C =head1 VERSION $Id: ripdoc.pl,v 1.19 1999/02/08 19:38:58 jaalto Exp $ =head1 AUTHOR Copyright 1998 Jari Aalto. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself or in terms of Gnu General Public licence v2 or later. =cut sub Help (;$) { my $id = "$LIB.Help"; my $msg = shift; # optional arg, why are we here... pod2text $PROGRAM_NAME; exit 1; } # ************************************************************** &args ******* # # DESCRIPTION # # Read and interpret command line arguments ARGV. Sets global variables # # INPUT PARAMETERS # # none # # RETURN VALUES # # none # # **************************************************************************** sub HandleCommandLineArgs () { my $id = "$LIB.HandleCommandLineArgs"; use vars qw ( $DOCUMENT_OPTION $VERSION_OPTION $HELP $BEGIN_REGEXP $END_REGEXP $IGNORE_REGEXP $QUIET $debug $verb ); # ............................................... default values ... # RCS Revision "Log" ends the description. $BEGIN_REGEXP = 'File id|Preface'; $END_REGEXP = 'Change\s+Log|History|[$]Log: '; # Ignore shebang lines $IGNORE_REGEXP = '^.![/].*[/]'; $debug = 0; # .................................................... read args ... Getopt::Long::config( qw ( ignore_case require_order )); GetOptions # Getopt::Long ( "h|help" => \$HELP , "verbose" => \$verb , "Version" => \$VERSION_OPTION , "debug" => \$debug , "doc" => \$DOCUMENT_OPTION , "bre|begin-regexp=s" => \$BEGIN_REGEXP , "ere|end-regexp=s" => \$END_REGEXP , "ire|ignore-regexp=s" => \$IGNORE_REGEXP ); $VERSION_OPTION and die "$VERSION $PROGNAME $CONTACT $URL\n"; $HELP and Help(); $verb = 1 if $debug; if ( $DOCUMENT_OPTION ) { $BEGIN_REGEXP = 'Documentation:|Commentary:'; } } # }}} # {{{ Main # ############################################################ &main ### Initialize(); HandleCommandLineArgs(); my $BODY = 0; my $BODY_MATCH_REGEXP = ""; my $COMMENT = ""; my $PADDING = ""; my( $ch1, $rest, $name ); while ( <> ) { next if /$IGNORE_REGEXP/o; if ( $COMMENT eq "" ) { # Find out what commenting syntax is for this file $COMMENT = $1 if /([^\s\n]+)/; # We must preserve indentation when removing comments. $PADDING = " " x length $COMMENT; $BODY_MATCH_REGEXP = "([-a-zA-Z0-9.])([-a-zA-Z0-9.]+" . "\\s+--+\\s+.*)" ; $debug and print "INIT: [$COMMENT] [$ARG]"; } if ( $debug and /$BODY_MATCH_REGEXP/o ) { printf "!!$BODY %d [$1] [$2] $ARG", length($`); } # ..................................... first line documentation ... # Get first line name if ( not $BODY and /$BODY_MATCH_REGEXP/o and length($`) < 20 # the match to the left size must be short ) { $debug and print "BODY: $ARG"; # convert first character to uppercase. ($ch1, $rest) = ($1,$2); if ( $ch1 !~ /[a-zA-Z]/ ) { $verb and warn "$LIB: First line does not begin with letter. [$ARG] \n"; $name = "$ch1$rest"; } else { $name = uc($ch1) . $rest; } $debug and print "BODY: $name\n"; } # ....................................................... bounds ... if ( /^$COMMENT+\s+(?:$BEGIN_REGEXP)/oi ) { $BODY = 1; $debug and printf "BEG:[$&] [$ARG]"; } if ( /^$COMMENT+\s+(?:$END_REGEXP)/oi ) { $BODY = 0; $debug and printf "END:[$&] [$ARG]"; next; } if ( $BODY ) { $BODY == 1 and print "$name\n\n"; $BODY++; # .................................................... &lisp ... # Delete lisp comment and tag lines by tinybm.el # ;; ............ &thisTag ... # # Also delete 3 comment markers, because they are not # text sections # ;;;Commentary: # # Also delete folding.el tags ';; }}}' and ';; {{{' next if /;;;|;; \.\.|^ |}}}|{{{/; s/^;;+[*] _//; s/^;;+[*]//; s/^;;+/ /; # ................................................. &general ... s/^($COMMENT)/$PADDING/o; # Make sure that: # # Header 1 is here # ^^^ 3 spaces is converted to 4 spaces. if ( /^ ?(\w)(.*)/ ) { $ARG = " " . uc($1) . $2 . "\n"; } print "$ARG"; } } # }}} 0; __END__ # end of ripdoc.pl