NAME Tags::Reader::Perl - Parse SGML/HTML/XML by each "tag". SYNOPSIS use Tags::Reader::Perl; my $obj = Tags::Reader::Perl->new; my @tokens = $obj->gettoken; $obj->set_file($file, $force); $obj->set_text($text, $force); METHODS "new()" my $obj = Tags::Reader::Perl->new; Constructor. Returns instance of object. "gettoken" my @tokens = $obj->gettoken; Get parsed token. Returns structure defining parsed token in array context. See TOKEN STRUCTURE e.g. <xml> → ('<xml>', 'xml', 1, 1) Returns parsed token in scalar mode. e.g. <xml> → '<xml>' "set_file" $obj->set_file($file, $force); Set file for parsing. If $force present, reset file for parsing if exists previous text or file. Returns undef. "set_text" $obj->set_text($text, $force); Set text for parsing. if $force present, reset text for parsing if exists previous text or file. Returns undef. TOKEN STRUCTURE Structure contains 4 fields in array: - parsed data - tag type - number of line - number of column in line Tag types are: - '[\w:]+' - element name. - '/[\w:]+' - end of element name. - '!data' - data - '![cdata[' - cdata - '!--' - comment - '?\w+' - instruction - '![\w+' - conditional - '!attlist' - DTD attlist - '!element' - DTD element - '!entity' - DTD entity - '!notation' - DTD notation ERRORS new(): From Class::Utils::set_params(): Unknown parameter '%s'. set_text(): Bad tag. Bad text. Cannot set new data if exists data. set_file(): Bad tag. Bad file. Cannot set new data if exists data. Cannot open file '%s'. EXAMPLE1 use strict; use warnings; use Encode qw(decode_utf8 encode_utf8); use Tag::Reader::Perl; # Object. my $obj = Tag::Reader::Perl->new; # Example data. my $sgml = <<'END'; <DOKUMENT> <adresa stát="cs"> <mÄ›sto> <ulice>Nová</ulice> <ÄÃslo>5</ÄÃslo> </adresa> </DOKUMENT> END # Set data to object. $obj->set_text(decode_utf8($sgml)); # Tokenize. while (my @tag = $obj->gettoken) { print "[\n"; print "\t[0]: '".encode_utf8($tag[0])."'\n"; print "\t[1]: ".encode_utf8($tag[1])."\n"; print "\t[2]: $tag[2]\n"; print "\t[3]: $tag[3]\n"; print "]\n"; } # Output: # [ # [0]: '<DOKUMENT>' # [1]: dokument # [2]: 1 # [3]: 1 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 1 # [3]: 11 # ] # [ # [0]: '<adresa stát="cs">' # [1]: adresa # [2]: 2 # [3]: 3 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 2 # [3]: 21 # ] # [ # [0]: '<mÄ›sto>' # [1]: mÄ›sto # [2]: 3 # [3]: 5 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 3 # [3]: 12 # ] # [ # [0]: '<ulice>' # [1]: ulice # [2]: 4 # [3]: 5 # ] # [ # [0]: 'Nová' # [1]: !data # [2]: 4 # [3]: 12 # ] # [ # [0]: '</ulice>' # [1]: /ulice # [2]: 4 # [3]: 16 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 4 # [3]: 24 # ] # [ # [0]: '<ÄÃslo>' # [1]: ÄÃslo # [2]: 5 # [3]: 5 # ] # [ # [0]: '5' # [1]: !data # [2]: 5 # [3]: 12 # ] # [ # [0]: '</ÄÃslo>' # [1]: /ÄÃslo # [2]: 5 # [3]: 13 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 5 # [3]: 21 # ] # [ # [0]: '</adresa>' # [1]: /adresa # [2]: 6 # [3]: 3 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 6 # [3]: 12 # ] # [ # [0]: '</DOKUMENT>' # [1]: /dokument # [2]: 7 # [3]: 1 # ] # [ # [0]: ' # ' # [1]: !data # [2]: 7 # [3]: 12 # ] DEPENDENCIES Class::Utils, Error::Pure, Readonly, SEE ALSO Tag::Reader Parse SGML/HTML/XML by each "tag". HTML::TagReader Perl extension module for reading html/sgml/xml files by tags. REPOSITORY <https://github.com/michal-josef-spacek/Tag-Reader-Perl> AUTHOR Michal Josef Å paÄek <mailto:skim@cpan.org> <http://skim.cz> LICENSE AND COPYRIGHT © Michal Josef Å paÄek 2005-2021 BSD 2-Clause License VERSION 0.02