#!/usr/bin/perl -w #============================================================================== # Script: desift.pl # Author: James Shimada # Purpose: desift is a data-driven, template-based text formatter. #============================================================================== use strict; use Getopt::Std; # variables for command-line options my $template = 'T'; my $tmplfile = 't'; my $delim = 'd'; my $skiprows = 's'; my $tagbegin = 'B'; my $tagend = 'E'; # set the default values for options my %opts = ( $tmplfile => '', $delim => '\t', $skiprows => '^$', $tagbegin => '%', $tagend => '', $template => '%0' ); my $err = ""; my %flds; #------------------------------------------------------------------------------ # Function: showUsage # Purpose: Prints usage info if --help option was supplied. # Input: None. # Output: Prints to stdout #------------------------------------------------------------------------------ sub showUsage() { print < ) { $tmpl .= $_; } close( TMPL ); } elsif( $opts{$template} ) { # automatically add a new line to the supplied template $tmpl = "$opts{$template}\n"; } # remember what fields are specified in the template my $tagre = $opts{$tagbegin} . '(\d+)' . $opts{$tagend}; while ( $tmpl =~ /$tagre/g ) { $flds{$1}++ if( $1 ); } return $tmpl; } #------------------------------------------------------------------------------ # Function: batchit # Purpose: Central text-formatting function. # Input: stdin, script-level %opts hash # Returns: prints formatted text to stdout # Sets script-level $err variable if error occured. #------------------------------------------------------------------------------ sub batchit { my $tmpl; my $tmpl2; my $fld; my @cols; my $tag; $tmpl = getTemplate(); return if( $err ); # open the data file to do work on it while ( <> ) { # trim it, chomp it, and skip blank lines chomp; # skip this line if necessary next if (/$opts{$skiprows}/); # get a fresh template for this line $tmpl2 = $tmpl; # expand the "whole line" tag $tag = $opts{$tagbegin} . 0 . $opts{$tagend}; $tmpl2 =~ s/$tag/$_/g; # split the line (default variable) on delimiter @cols = split( $opts{$delim} ); # for all fields specified in the template, expand the # field with the corresponding element in the data for $fld ( keys %flds ) { $tag = $opts{$tagbegin} . $fld . $opts{$tagend}; # if the field specified in the template does not exist # in the data, silently expand the field to null if( defined( $cols[$fld-1] )) { $tmpl2 =~ s/$tag/$cols[$fld-1]/g; } else { $tmpl2 =~ s/$tag//g; } } # and out we go print $tmpl2; } } #------------------------------------------------------------------------------ # Function: isValidOpt # Purpose: Validate the value for an option. # Input: $optkey, $optval # Returns: true if valid # Sets script-level $err variable if error occured. #------------------------------------------------------------------------------ sub isValidOpt { my ( $optkey, $optval ) = @_; #print "Is $optval a valid value for $optkey?\n"; # validate template file if( $optkey eq $tmplfile ) { if( -f $optval ) { return 1; } else { $err .= "Invalid template '$optval'. No such file.\n"; return 0; } } return 1; } #------------------------------------------------------------------------------ # Function: getArgs # Purpose: Retrieve command-line options # Input: command-line options # Returns: populates the script-level %opts hash with the options # Sets script-level $err variable if error occured. #------------------------------------------------------------------------------ sub getArgs { my( %args, $arg ); getopt( join( "", keys %opts ), \%args ); # make sure each arg submitted is recognized in a generic fashion for $arg ( keys %args ) { # print "$arg --> $args{$arg} \n"; if( !inArray( $arg, keys %opts )) { $err .= "Unrecognized option '$arg'\n"; } else { if( isValidOpt( $arg, $args{$arg} )) { $opts{$arg} = $args{$arg}; } } } # make sure the tag parts are regex-safe $opts{$tagbegin} = quotemeta( $opts{$tagbegin} ); $opts{$tagend} = quotemeta( $opts{$tagend} ); # now check any input files for errors for $arg ( @ARGV ) { if( !-f $arg ) { $err .= "Invalid input file '$arg'. No such file.\n"; } } } #------------------------------------------------------------------------------ # Function: inArray # Purpose: Test if a value is an element of an array # Input: $val, @ary # Returns: true if value is found in array #------------------------------------------------------------------------------ sub inArray { my $val = shift; for my $elem ( @_ ) { if( $val eq $elem ) { return 1; } } return 0; } #------------------------------------------------------------------------------ # Function: printErr # Purpose: Print error information and exit # Input: script-level $err variable. # Returns: prints to sterr #------------------------------------------------------------------------------ sub printErr { print STDERR $err; exit(1); } #------------------------------------------------------------------------------ #------------------------------------------------------------------------------ if( $#ARGV > -1 && $ARGV[0] eq '--help' ) { showUsage(); exit(0); } getArgs(); batchit() unless( $err ); printErr() if( $err ); exit(0); =head1 README I is a data-driven, temlate-oriented text filter. It occupies a niche somewhere between spartan shell utilities such as 'cut' and full-featured template engines. =head1 SYNOPSIS Command-line usage: perl desift.pl [options] [input-file ...] options: -d REGEX Field delimiter in input-file(s). Default is "\t" (tab). -t FILE Format data using template contained in FILE. -T TEMPLATE Format using TEMPLATE. By default, no filter applied ("%0"). -s REGEX Skip rows in input-file(s) matching REGEX. -B TAGSTART Part of template tag before field index. Default is "%". -E TAGEND Part of template tag after field index. Default is null. If no C then desift users standard input. =head1 DESCRIPTION Like cut, printf, and other multi-purpose shell scripting favorites, desift provides generic text formatting and filtering functions accessible from the shell. It uses standard input and output channels to maximize how it can be leveraged. Desift is particularly effective when combined in a pipeline with programs that query a data source or perform non-interactive text editing. It's lightweight and easy to use for impromptu tasks. While you can create separate template files to use for repeated or rigorous formatting tasks, you can also simply specify a template as a command-line option, in the same vein as sed allows you to provide a simple search and replace function as an argument to perform edits (as opposed to an external sed script file). =head1 SCRIPT CATEGORIES Unix/System_administration CPAN/Administrative CPAN =head1 AUTHOR James Shimada jshimada ( a t ) netjs ( d o t ) com www.netjs.com =cut