MsOffice-Word-Surgeon-2.10000755000000000000 015004477605 16017 5ustar00unknownunknown000000000000MsOffice-Word-Surgeon-2.10/Build.PL000444000000000000 255515004414175 17450 0ustar00unknownunknown000000000000use strict; use warnings; use Module::Build; Module::Build->VERSION('0.4004'); my $builder = Module::Build->new( module_name => 'MsOffice::Word::Surgeon', license => 'artistic_2', dist_author => q{DAMI }, dist_version_from => 'lib/MsOffice/Word/Surgeon.pm', release_status => 'stable', configure_requires => { 'Module::Build' => '0.4004', }, test_requires => { 'Test::More' => '0', }, requires => { perl => '5.24.0', 'Archive::Zip' => undef, 'Carp::Object' => undef, 'Encode' => undef, 'List::Util' => undef, 'Moose' => undef, 'MooseX::StrictConstructor' => undef, 'Moose::Util::TypeConstraints' => undef, 'XML::LibXML' => undef, 'namespace::clean' => undef, 'POSIX' => undef, 'Exporter' => undef, 'match::simple' => undef, }, add_to_cleanup => [ 'MsOffice-Word-Surgeon-*' ], meta_merge => { resources => { repository => 'https://github.com/damil/MsOffice-Word-Surgeon', } }, ); $builder->add_build_element('docx'); $builder->create_build_script(); MsOffice-Word-Surgeon-2.10/Changes000444000000000000 505715004417500 17442 0ustar00unknownunknown000000000000Revision history for MsOffice-Word-Surgeon 2.10 30.04.2025 - new() and save_as() can take a filehandle as docx argument 2.09 21.12.2024 - plain_text() should decode XML entities 2.08 28.09.2024 - unification of licensing terms 2.07 28.06.2024 - additional test for replace() with xml_before - unlink_fields : better algorithm for ASK fields 2.06 03.06.2024 - refactoring for fields and bookmark boundaries - new methods replace_fields() and reveal_fields() - internals : Carp::Clan replaced by Carp::Object 2.05 02.03.2024 - fix missing XML decode/encode operations on &<> 2.04 13.02.2024 - new method reveal_bookmarks() 2.03 10.05.2023 - fix bug introduced in 2.02: ->document->contents($new_content) must erase the 'was_cleaned_up' flag 2.02 08.05.2023 - ->replace(..., keep_xml_as_is => ) is deprecated; use (cleanup_XML => ...) instead - new option 'no_caps' for automatic upcase of runs with the 'caps' property 2.01 25.01.2023 - index images by either title or description field in alternative text (because recent versions of MsWord no longer have a 'title' field) - avoid making assumptions about attribute order in XML nodes 2.0 01.05.2022 - major refactoring - support for multiple package parts (main document, headers and footers) - support for replacing or adding an image - renamed "change" ==> "revision" 1.08 21.03.2022 - additional noise reduction : soft hyphens 1.07 27.10.2021 - new method suppress_bookmarks() - fix bug : runs containing '0' should not be treated as empty - unlink_fields() must suppress the content of ASK fields - to_uppercase() must not change HTML entities like & 1.06 16.01.2021 - fix bug in ->plain_text() : translation of soft line breaks into newlines did not work 1.05 13.12.2020 - new behaviour for the replace() method : by default it automatically cleans up the XML and automatically overwrites $surgeon->contents - additional noise reduction : remove the _goBack bookmark added automatically by MsWord 1.04 10.11.2020 - the synopsis was incorrect in its example of the replace() method 1.03 13.10.2020 - better parameter checking for new(); cleaner handling of revision ids; readability of source code improved 1.02 12.10.2020 - fix bug in ->replace(): tabs or other contents outside of was lost during replacement 1.01 03.07.2020 - fix bug in "unlink_fields" : field boundaries ("begin" / "separate" / "end" ) may contain child nodes 1.0 24.11.2019 - First public release MsOffice-Word-Surgeon-2.10/MANIFEST000444000000000000 104315004420774 17276 0ustar00unknownunknown000000000000Build.PL Changes lib/MsOffice/Word/Surgeon.pm lib/MsOffice/Word/Surgeon/BookmarkBoundary.pm lib/MsOffice/Word/Surgeon/Carp.pm lib/MsOffice/Word/Surgeon/Field.pm lib/MsOffice/Word/Surgeon/PackagePart.pm lib/MsOffice/Word/Surgeon/Revision.pm lib/MsOffice/Word/Surgeon/Run.pm lib/MsOffice/Word/Surgeon/Text.pm lib/MsOffice/Word/Surgeon/Utils.pm MANIFEST This list of files README.md t/etc/MsOffice-Word-Surgeon.docx t/msoffice-word-surgeon.t t/replace_fields.t t/reveal_bookmarks.t t/zip_from_filehandle.t xt/manifest.t xt/pod.t META.yml META.json MsOffice-Word-Surgeon-2.10/META.json000444000000000000 501715004477605 17600 0ustar00unknownunknown000000000000{ "abstract" : "tamper with the guts of Microsoft docx documents, with regexes", "author" : [ "DAMI " ], "dynamic_config" : 1, "generated_by" : "Module::Build version 0.4234", "license" : [ "artistic_2" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "MsOffice-Word-Surgeon", "prereqs" : { "configure" : { "requires" : { "Module::Build" : "0.4004" } }, "runtime" : { "requires" : { "Archive::Zip" : "0", "Carp::Object" : "0", "Encode" : "0", "Exporter" : "0", "List::Util" : "0", "Moose" : "0", "Moose::Util::TypeConstraints" : "0", "MooseX::StrictConstructor" : "0", "POSIX" : "0", "XML::LibXML" : "0", "match::simple" : "0", "namespace::clean" : "0", "perl" : "v5.24.0" } }, "test" : { "requires" : { "Test::More" : "0" } } }, "provides" : { "MsOffice::Word::Surgeon" : { "file" : "lib/MsOffice/Word/Surgeon.pm", "version" : "2.10" }, "MsOffice::Word::Surgeon::BookmarkBoundary" : { "file" : "lib/MsOffice/Word/Surgeon/BookmarkBoundary.pm" }, "MsOffice::Word::Surgeon::Carp" : { "file" : "lib/MsOffice/Word/Surgeon/Carp.pm" }, "MsOffice::Word::Surgeon::Field" : { "file" : "lib/MsOffice/Word/Surgeon/Field.pm" }, "MsOffice::Word::Surgeon::PackagePart" : { "file" : "lib/MsOffice/Word/Surgeon/PackagePart.pm" }, "MsOffice::Word::Surgeon::Revision" : { "file" : "lib/MsOffice/Word/Surgeon/Revision.pm" }, "MsOffice::Word::Surgeon::Run" : { "file" : "lib/MsOffice/Word/Surgeon/Run.pm" }, "MsOffice::Word::Surgeon::Text" : { "file" : "lib/MsOffice/Word/Surgeon/Text.pm" }, "MsOffice::Word::Surgeon::Utils" : { "file" : "lib/MsOffice/Word/Surgeon/Utils.pm" } }, "release_status" : "stable", "resources" : { "license" : [ "http://www.perlfoundation.org/artistic_license_2_0" ], "repository" : { "url" : "https://github.com/damil/MsOffice-Word-Surgeon" } }, "version" : "2.10", "x_serialization_backend" : "JSON::PP version 4.16" } MsOffice-Word-Surgeon-2.10/META.yml000444000000000000 330315004477605 17424 0ustar00unknownunknown000000000000--- abstract: 'tamper with the guts of Microsoft docx documents, with regexes' author: - 'DAMI ' build_requires: Test::More: '0' configure_requires: Module::Build: '0.4004' dynamic_config: 1 generated_by: 'Module::Build version 0.4234, CPAN::Meta::Converter version 2.150010' license: artistic_2 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: MsOffice-Word-Surgeon provides: MsOffice::Word::Surgeon: file: lib/MsOffice/Word/Surgeon.pm version: '2.10' MsOffice::Word::Surgeon::BookmarkBoundary: file: lib/MsOffice/Word/Surgeon/BookmarkBoundary.pm MsOffice::Word::Surgeon::Carp: file: lib/MsOffice/Word/Surgeon/Carp.pm MsOffice::Word::Surgeon::Field: file: lib/MsOffice/Word/Surgeon/Field.pm MsOffice::Word::Surgeon::PackagePart: file: lib/MsOffice/Word/Surgeon/PackagePart.pm MsOffice::Word::Surgeon::Revision: file: lib/MsOffice/Word/Surgeon/Revision.pm MsOffice::Word::Surgeon::Run: file: lib/MsOffice/Word/Surgeon/Run.pm MsOffice::Word::Surgeon::Text: file: lib/MsOffice/Word/Surgeon/Text.pm MsOffice::Word::Surgeon::Utils: file: lib/MsOffice/Word/Surgeon/Utils.pm requires: Archive::Zip: '0' Carp::Object: '0' Encode: '0' Exporter: '0' List::Util: '0' Moose: '0' Moose::Util::TypeConstraints: '0' MooseX::StrictConstructor: '0' POSIX: '0' XML::LibXML: '0' match::simple: '0' namespace::clean: '0' perl: v5.24.0 resources: license: http://www.perlfoundation.org/artistic_license_2_0 repository: https://github.com/damil/MsOffice-Word-Surgeon version: '2.10' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' MsOffice-Word-Surgeon-2.10/README.md000444000000000000 105014233423346 17423 0ustar00unknownunknown000000000000# MsOffice-Word-Surgeon Tamper wit the guts of Microsoft docx documents, with regexes This module supports a few operations for modifying or extracting text from Microsoft Word documents in '.docx' format -- therefore the name 'surgeon'. Since a surgeon does not give life, there is no support for creating fresh documents; if you have such needs, use one of the other packages listed in the L section. To my knowledge, this is the only solution (even in other languages) for applying regular expressions to the contents of Word documents. MsOffice-Word-Surgeon-2.10/lib000755000000000000 015004477605 16565 5ustar00unknownunknown000000000000MsOffice-Word-Surgeon-2.10/lib/MsOffice000755000000000000 015004477605 20260 5ustar00unknownunknown000000000000MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word000755000000000000 015004477605 21173 5ustar00unknownunknown000000000000MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon.pm000444000000000000 4207715004420532 23325 0ustar00unknownunknown000000000000package MsOffice::Word::Surgeon; use 5.24.0; use Moose; use MooseX::StrictConstructor; use Moose::Util::TypeConstraints qw(union); use Archive::Zip qw(AZ_OK); use Encode qw(encode_utf8 decode_utf8); use Scalar::Util qw(openhandle); use MsOffice::Word::Surgeon::Carp; use MsOffice::Word::Surgeon::Revision; use MsOffice::Word::Surgeon::PackagePart; # syntactic sugar for attributes sub has_lazy ($@) {my $attr = shift; has($attr => @_, lazy => 1, builder => "_$attr")} sub has_inner ($@) {my $attr = shift; has_lazy($attr => @_, init_arg => undef)} use namespace::clean -except => 'meta'; our $VERSION = '2.10'; #====================================================================== # TYPES AND ATTRIBUTES #====================================================================== my $DocxSource = union([qw/Str FileHandle/]); # how to access the document has 'docx' => (is => 'ro', isa => $DocxSource); # filename or filehandle, or .. has_lazy 'zip' => (is => 'ro', isa => 'Archive::Zip'); # .. an already opened zip archive # syntax to show embedded fields -- used by PackagePart::replace_field has 'show_embedded_field' => (is => 'ro', isa => 'Str', default => '{%s}'); # inner attributes lazily constructed by the module has_inner 'parts' => (is => 'ro', isa => 'HashRef[MsOffice::Word::Surgeon::PackagePart]', traits => ['Hash'], handles => {part => 'get'}); has_inner 'document' => (is => 'ro', isa => 'MsOffice::Word::Surgeon::PackagePart', handles => [qw/contents original_contents indented_contents plain_text replace/]); # Note: this attribute is equivalent to $self->part('document'); made into an attribute # for convenience and for automatic delegation of methods through the 'handles' declaration # just a slot for internal storage has 'next_rev_id' => (is => 'bare', isa => 'Num', default => 1, init_arg => undef); # used by the revision() method for creating *::Revision objects -- each instance # gets a fresh value #====================================================================== # BUILDING INSTANCES #====================================================================== # syntactic sugar for supporting ->new($path) instead of ->new(docx => $path) around BUILDARGS => sub { my $orig = shift; my $class = shift; unshift @_, 'docx' if scalar(@_) % 2 and $DocxSource->check($_[0]); $class->$orig(@_); }; # make sure that the constructor got either a 'docx' or a 'zip' attribute sub BUILD { my $self = shift; my $class = ref $self; $self->{docx} || $self->{zip} or croak "$class->new() : need either 'docx' or 'zip' attribute"; not ($self->{docx} && $self->{zip}) or croak "$class->new() : can't have both 'docx' and 'zip' attributes"; } #====================================================================== # LAZY ATTRIBUTE CONSTRUCTORS #====================================================================== sub _zip { my $self = shift; my $docx = $self->docx; my ($meth, $source_name) = openhandle($docx) ? (readFromFileHandle => 'filehandle') : (read => $docx); my $zip = Archive::Zip->new; my $result = $zip->$meth($docx); $result == AZ_OK or croak "cannot unzip from $source_name"; return $zip; } sub _parts { my $self = shift; # first create a package part for the main document my $doc = MsOffice::Word::Surgeon::PackagePart->new(surgeon => $self, part_name => 'document'); # gather names of headers and footers related to that document my @headers_footers = map {$_->{Target} =~ s/\.xml$//r} grep {$_ && $_->{short_type} =~ /^(header|footer)$/} $doc->relationships->@*; # create package parts for headers and footers and assemble all parts into a hash my %parts = (document => $doc); $parts{$_} = MsOffice::Word::Surgeon::PackagePart->new(surgeon => $self, part_name => $_) for @headers_footers; return \%parts; } sub _document {shift->part('document')} #====================================================================== # ACCESSING OR CHANGING THE INTERNAL STATE #====================================================================== sub xml_member { my ($self, $member_name, $new_content) = @_; if (! defined $new_content) { # used as a reader my $bytes = $self->zip->contents($member_name) or croak "no zip member for $member_name"; return decode_utf8($bytes); } else { # used as a writer my $bytes = encode_utf8($new_content); return $self->zip->contents($member_name, $bytes); } } sub _content_types { my ($self, $new_content_types) = @_; return $self->xml_member('[Content_Types].xml', $new_content_types); } sub headers { my ($self) = @_; return sort {substr($a, 6) <=> substr($b, 6)} grep {/^header/} keys $self->parts->%*; } sub footers { my ($self) = @_; return sort {substr($a, 6) <=> substr($b, 6)} grep {/^footer/} keys $self->parts->%*; } sub new_rev_id { my ($self) = @_; return $self->{next_rev_id}++; } #====================================================================== # GENERIC PROPAGATION TO ALL PARTS #====================================================================== sub all_parts_do { my ($self, $method_name, @args) = @_; my $parts = $self->parts; # apply the method to each package part my %result; $result{$_} = $parts->{$_}->$method_name(@args) foreach keys %$parts; return \%result; } #====================================================================== # CLONING #====================================================================== sub clone { my $self = shift; # create a new Zip archive and copy all members to it my $new_zip = Archive::Zip->new; foreach my $member ($self->zip->members) { $new_zip->addMember($member); } # create a new instance of this class my $class = ref $self; my $clone = $class->new(zip => $new_zip); # other attributes will be recreated lazily within the clone .. not # the most efficient way, but it is easier and safer, otherwise there is # a risk of mixed references return $clone; } #====================================================================== # SAVING THE FILE #====================================================================== sub _update_contents_in_zip { my $self = shift; $_->_update_contents_in_zip foreach values $self->parts->%*; } sub overwrite { my $self = shift; $self->_update_contents_in_zip; $self->zip->overwrite == AZ_OK or croak "error overwriting zip archive " . $self->docx; } sub save_as { my ($self, $docx) = @_; $self->_update_contents_in_zip; my $method = openhandle($docx) ? 'writeToFileHandle' : 'writeToFileNamed'; $self->zip->$method($docx) == AZ_OK or croak "error writing zip archive to $docx"; } #====================================================================== # DELEGATION TO OTHER CLASSES #====================================================================== sub new_revision { my $self = shift; my $revision = MsOffice::Word::Surgeon::Revision->new(rev_id => $self->new_rev_id, @_); return $revision->as_xml; } 1; __END__ =encoding ISO-8859-1 =head1 NAME MsOffice::Word::Surgeon - tamper with the guts of Microsoft docx documents, with regexes =head1 SYNOPSIS my $surgeon = MsOffice::Word::Surgeon->new(docx => $filename); # extract plain text my $main_text = $surgeon->document->plain_text; my @header_texts = map {$surgeon->part($_)->plain_text} $surgeon->headers; # unlink fields $surgeon->document->unlink_fields; # reveal bookmarks $surgeon->document->reveal_bookmarks(color => 'cyan'); # anonymize my %alias = ('Claudio MONTEVERDI' => 'A_____', 'Heinrich SCHTZ' => 'B_____'); my $pattern = join "|", keys %alias; my $replacement_callback = sub { my %args = @_; my $replacement = $surgeon->new_revision(to_delete => $args{matched}, to_insert => $alias{$args{matched}}, run => $args{run}, xml_before => $args{xml_before}, ); return $replacement; }; $surgeon->all_parts_do(replace => qr[$pattern], $replacement_callback); # save the result $surgeon->overwrite; # or ->save_as($new_filename); =head1 DESCRIPTION =head2 Purpose This module supports a few operations for inspecting or modifying contents in Microsoft Word documents in '.docx' format -- therefore the name 'surgeon'. Since a surgeon does not give life, there is no support for creating fresh documents; if you have such needs, use one of the other packages listed in the L section -- or use the companion module L. Some applications for this module are : =over =item * content extraction in plain text format; =item * unlinking fields (equivalent of performing Ctrl-Shift-F9 on the whole document) =item * adding markers at bookmark start and end positions =item * regex replacements within text, for example for : =over =item * anonymization, i.e. replacement of names or addresses by aliases; =item * templating, i.e. replacement of special markup by contents coming from a data tree (see also L). =back =item * insertion of generated images (for example barcodes) -- see L; =item * pretty-printing the internal XML structure. =back =head2 The C<.docx> format The format of Microsoft C<.docx> documents is described in L and L. An excellent introduction can be found at L. Another precious source of documentation is L. Internally, a document is a zipped archive, where the member named C stores the main document contents, in XML format. =head2 Operating mode The present module does not parse all details of the whole XML structure because it only focuses on I nodes (those that contain literal text) and I nodes (those that contain text formatting properties). All remaining XML information, for example for representing sections, paragraphs, tables, etc., is stored as opaque XML fragments; these fragments are re-inserted at proper places when reassembling the whole document after having modified some text nodes. =head1 METHODS =head2 Constructor =head3 new my $surgeon = MsOffice::Word::Surgeon->new(docx => $filename_or_filehandle); # or simply : ->new($filename); Builds a new surgeon instance, initialized with the contents of the given filename or filehandle. =head2 Accessors =head3 docx Path to the C<.docx> file =head3 zip Instance of L associated with this file =head3 parts Hashref to L objects, keyed by their part name in the ZIP file. There is always a C<'document'> part. Other parts may be headers, footers, footnotes or endnotes. =head3 document Shortcut to C<< $surgeon->part('document') >> -- the L object corresponding to the main document. See the C documentation for operations on part objects. Besides, the following operations are supported directly as methods to the C<< $surgeon >> object and are automatically delegated to the C<< document >> part : C, C, C, C, C. =head3 headers my @header_parts = $surgeon->headers; Returns the ordered list of names of header members stored in the ZIP file. =head3 footers my @footer_parts = $surgeon->footers; Returns the ordered list of names of footer members stored in the ZIP file. =head2 Other methods =head3 part my $part = $surgeon->part($part_name); Returns the L object corresponding to the given part name. =head3 all_parts_do my $result = $surgeon->all_parts_do($method_name => %args); Calls the given method on all part objects. Results are accumulated in a hash, with part names as keys to the results. This is mostly used to invoke the L method, i.e. $surgeon->all_parts_do(replace => qr[$pattern], $replacement_callback, %replacement_args); =head3 xml_member my $xml = $surgeon->xml_member($member_name); # reading # or $surgeon->xml_member($member_name, $new_xml); # writing Reads or writes the given member name in the ZIP file, with utf8 decoding or encoding. =head3 save_as $surgeon->save_as($docx_file_or_filehandle); Writes the ZIP archive into the given file or filehandle. =head3 overwrite $surgeon->overwrite; Writes the updated ZIP archive into the initial file. If the initial C was given as a filehandle, use the L method instead. =head3 new_revision my $xml = $surgeon->new_revision( to_delete => $text_to_delete, to_insert => $text_to_insert, author => $author_string, date => $date_string, run => $run_object, xml_before => $xml_string, ); This method is syntactic sugar for instantiating the L class and returning XML markup for MsWord revisions (a.k.a. "tracked changes") generated by that class. Users can then manually review those revisions within MsWord and accept or reject them. This is best used in collaboration with the L method : the replacement callback can call C<< $self->new_revision(...) >> to generate revision marks in the document. Either C or C (or both) must be present. Other parameters are optional. The parameters are : =over =item to_delete The string of text to delete (usually this will be the C argument passed to the replacement callback). =item to_insert The string of new text to insert. =item author A short string that will be displayed by MsWord as the "author" of this revision. =item date A date (and optional time) in ISO format that will be displayed by MsWord as the date of this revision. The current date and time will be used by default. =item run A reference to the L object surrounding this revision. The formatting properties of that run will be copied into the C<< >> nodes of the deleted and inserted text fragments. =item xml_before An optional XML fragment to be inserted before the C<< >> node of the inserted text =back =head2 Operations on parts See the L documentation for other operations on package parts, including operations on fields, bookmarks or images. =head1 SEE ALSO The L distribution on CPAN also manipulates C documents, but with another approach : internally it uses L and XPath expressions for manipulating XML nodes. The API has some intersections with the present module, but there are also some differences : C has more support for styling, while C has more flexible mechanisms for replacing text fragments. Other programming languages also have packages for dealing with C documents; here are some references : =over =item L The C# Open XML SDK from Microsoft =item L Additional functionalities built on top of the XML SDK. =item L An open source Java library from the Apache foundation. =item L Another open source Java library, competitor to Apache POI. =item L A PHP library dealing not only with Microsoft OOXML documents but also with OASIS and RTF formats. =item L A Python library, documented at L. =back As far as I can tell, most of these libraries provide objects and methods that closely reflect the complete XML structure : for example they have classes for paragraphs, styles, fonts, inline shapes, etc. The present module is much simpler but also much more limited : it was optimised for dealing with the text contents and offers no support for presentation or paging features. However, it has the rare advantage of providing an API for regex substitutions within Word documents. The L module relies on the present module, together with the L, to implement a templating system for Word documents. =head1 AUTHOR Laurent Dami, Edami AT cpan DOT org =head1 COPYRIGHT AND LICENSE Copyright 2019-2024 by Laurent Dami. This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon000755000000000000 015004477605 22615 5ustar00unknownunknown000000000000MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon/BookmarkBoundary.pm000444000000000000 422615004461731 26556 0ustar00unknownunknown000000000000package MsOffice::Word::Surgeon::BookmarkBoundary; use 5.24.0; use Moose; use Moose::Util::TypeConstraints qw(enum); use MooseX::StrictConstructor; use namespace::clean -except => 'meta'; #====================================================================== # ATTRIBUTES #====================================================================== has 'kind' => (is => 'ro', isa => enum([qw/Start End/]), required => 1); has 'id' => (is => 'ro', isa => 'Str', required => 1); has 'xml_before' => (is => 'rw', isa => 'Str', default => ""); has 'node_xml' => (is => 'rw', isa => 'Str', default => ""); has 'name' => (is => 'ro', isa => 'Str', default => ""); #====================================================================== # METHODS #====================================================================== sub prepend_xml {my ($self, $more_xml) = @_; substr $self->{node_xml}, 0, 0, $more_xml;} sub append_xml {my ($self, $more_xml) = @_; $self->{node_xml} .= $more_xml;} 1; __END__ =encoding ISO-8859-1 =head1 NAME MsOffice::Word::Surgeon::BookmarkBoundary - internal representation for a MsWord bookmark =head1 DESCRIPTION This is used internally by L for storing bookmark fragments. =head1 METHODS =head2 new my $field = MsOffice::Word::Surgeon::Bookmark(%args); Constructor for a new bookmark object. Arguments are : =over =item kind Either C or C =item id Numerical identifier for the bookmark =item name The bookmark name. Only present in C boundaries. =item xml_before A string containing arbitrary XML preceding that bookmark in the complete document. =item node_xml The complete XML for this node. =back =head1 METHODS =head2 prepend_xml Adds an XML fragment in front of the current node_xml. =head2 append_xml Adds an XML fragment after the current node_xml. =head1 AUTHOR Laurent Dami, Edami AT cpan DOT org =head1 COPYRIGHT AND LICENSE Copyright 2024 by Laurent Dami. This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon/Carp.pm000444000000000000 124115004420532 24156 0ustar00unknownunknown000000000000package MsOffice::Word::Surgeon::Carp; use strict; use warnings; use Carp::Object -reexport => qw/carp croak/; our %CARP_OBJECT_CONSTRUCTOR = (clan => qw[^MsOffice::Word::Surgeon]); 1; __END__ =encoding ISO8859-1 =head1 NAME MsOffice::Word::Surgeon::Carp; - custom carping module for MsOffice::Word::Surgeon =head1 DESCRIPTION Used by all modules in MsOffice::Word::Surgeon for ignoring stack frames in MsOffice::Word::Surgeon while croaking or carping. See L. =head1 COPYRIGHT AND LICENSE Copyright 2024 by Laurent Dami. This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon/Field.pm000444000000000000 626015004462043 24325 0ustar00unknownunknown000000000000package MsOffice::Word::Surgeon::Field; use 5.24.0; use Moose; use Moose::Util::TypeConstraints qw(enum); use MooseX::StrictConstructor; use namespace::clean -except => 'meta'; #====================================================================== # ATTRIBUTES #====================================================================== has 'xml_before' => (is => 'ro', isa => 'Str', required => 1); has 'code' => (is => 'rw', isa => 'Str', required => 1); has 'result' => (is => 'rw', isa => 'Str', required => 1); has 'status' => (is => 'rw', isa => enum([qw/begin separate end/]), default => "end"); has 'type' => (is => 'ro', isa => 'Str', builder => '_type', lazy => 1); #====================================================================== # METHODS #====================================================================== sub _type { my ($self) = @_; my ($type) = $self->code =~ /^\s*(\w+)/; $type //= ""; return uc($type); } sub append_to_code { my ($self, $more_code) = @_; $self->{code} .= $more_code; } sub append_to_result { my ($self, $more_result) = @_; $self->{result} .= $more_result; } 1; __END__ =encoding ISO-8859-1 =head1 NAME MsOffice::Word::Surgeon::Field - internal representation for a MsWord field =head1 DESCRIPTION This is used internally by L for storing a MsWord field. =head1 METHODS =head2 new my $field = MsOffice::Word::Surgeon::Field( xml_before => $xml_string, code => $code_instruction_string, result => $xml_fragment, status => 'begin', ); Constructor for a new field object. Arguments are : =over =item xml_before A string containing arbitrary XML preceding that field in the complete document. The string may be empty but must be present. =item code A code containing the instruction string for that field. If the instruction string contains embedded fields, these are represented through the L syntax -- by default, just a pair of curly braces. =item result An XML fragment corresponding to the last update of that field in MsWord. =item status One of C, C, or C. Status C or C are intermediate, used internally during the parsing process. Normally all fields are in C status. =back =head2 add_to_code While parsing fields, additional field instruction fragments are added through this method =head2 add_to_result While parsing fields, additional XML fragments belonging to the field result are added through this method =head2 type The first instruction in the C part, eg C, C, C, C, etc. Note : in the Microsoft Word Object Model, the L attribute is an integer value in an enumerated type. Here the attribute is just an uppercase string. Lists of valild field types can be found in the Word documentation. =head1 AUTHOR Laurent Dami, Edami AT cpan DOT org =head1 COPYRIGHT AND LICENSE Copyright 2024 by Laurent Dami. This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon/PackagePart.pm000444000000000000 12067015004462074 25532 0ustar00unknownunknown000000000000package MsOffice::Word::Surgeon::PackagePart; use 5.24.0; use Moose; use MooseX::StrictConstructor; use MsOffice::Word::Surgeon::Carp; use MsOffice::Word::Surgeon::Utils qw(maybe_preserve_spaces is_at_run_level parse_attrs decode_entities encode_entities); use MsOffice::Word::Surgeon::Run; use MsOffice::Word::Surgeon::Text; use MsOffice::Word::Surgeon::Field; use MsOffice::Word::Surgeon::BookmarkBoundary; use XML::LibXML ();; use List::Util qw(max); use match::simple qw(match); # syntactic sugar for attributes sub has_inner ($@) {my $attr = shift; has($attr => @_, lazy => 1, builder => "_$attr", init_arg => undef)} # constant integers to specify indentation modes -- see L use constant XML_NO_INDENT => 0; use constant XML_SIMPLE_INDENT => 1; use namespace::clean -except => 'meta'; #====================================================================== # ATTRIBUTES #====================================================================== # attributes passed to the constructor has 'surgeon' => (is => 'ro', isa => 'MsOffice::Word::Surgeon', required => 1, weak_ref => 1); has 'part_name' => (is => 'ro', isa => 'Str', required => 1); # attributes constructed by the module -- not received through the constructor has_inner 'contents' => (is => 'rw', isa => 'Str', trigger => \&_on_new_contents); has_inner 'runs' => (is => 'ro', isa => 'ArrayRef', clearer => 'clear_runs'); has_inner 'relationships' => (is => 'ro', isa => 'ArrayRef'); has_inner 'images' => (is => 'ro', isa => 'HashRef'); has 'contents_has_changed' => (is => 'bare', isa => 'Bool', default => 0); has 'was_cleaned_up' => (is => 'bare', isa => 'Bool', default => 0); #====================================================================== # GLOBAL VARIABLES #====================================================================== # Various regexes for removing uninteresting XML information my %noise_reduction_regexes = ( proof_checking => qr(]+|noProof/)>), revision_ids => qr(\sw:rsid\w+="[^"]+"), complex_script_bold => qr(), page_breaks => qr(), language => qr( # opening tag for the text contents (.*?) # text contents -- capture in $1 # closing tag for text ]x; # split XML content into run fragments my $contents = $self->contents; my @run_fragments = split m[$run_regex], $contents, -1; # -1 : don't strip trailing items my @runs; # build internal RUN objects RUN: while (my ($xml_before_run, $props, $run_contents) = splice @run_fragments, 0, 3) { $run_contents //= ''; # split XML of this run into text fragmentsn my @txt_fragments = split m[$txt_regex], $run_contents, -1; # -1 : don't strip trailing items my @texts; # build internal TEXT objects TXT: while (my ($xml_before_text, $txt_contents) = splice @txt_fragments, 0, 2) { next TXT if !$xml_before_text && !$txt_contents; $_ //= '' for $xml_before_text, $txt_contents; decode_entities($txt_contents); push @texts, MsOffice::Word::Surgeon::Text->new(xml_before => $xml_before_text, literal_text => $txt_contents); } # assemble TEXT objects into a RUN object next RUN if !$xml_before_run && !@texts; $_ //= '' for $xml_before_run, $props; push @runs, MsOffice::Word::Surgeon::Run->new(xml_before => $xml_before_run, props => $props, inner_texts => \@texts); } return \@runs; } sub _relationships { my $self = shift; # xml that describes the relationships for this package part my $rel_xml = $self->_rels_xml; # parse the relationships and assemble into a sparse array indexed by relationship ids my @relationships; while ($rel_xml =~ m[]g) { my %attrs = parse_attrs($1); $attrs{$_} or croak "missing attribute '$_' in node" for qw/Id Type Target/; ($attrs{num} = $attrs{Id}) =~ s[^\D+][]; ($attrs{short_type} = $attrs{Type}) =~ s[^.*/][]; $relationships[$attrs{num}] = \%attrs; } return \@relationships; } sub _images { my $self = shift; # get relationship ids associated with images my %rel_image = map {$_->{Id} => $_->{Target}} grep {$_ && $_->{short_type} eq 'image'} $self->relationships->@*; # get titles and relationship ids of images found within the part contents my %image; my @drawings = $self->contents =~ m[(.*?)]g; DRAWING: foreach my $drawing (@drawings) { if ($drawing =~ m[ .*? original_contents} sub _on_new_contents { my $self = shift; $self->clear_runs; $self->{contents_has_changed} = 1; $self->{was_cleaned_up} = 0; } #====================================================================== # GENERAL METHODS #====================================================================== sub _rels_xml { # rw accessor my ($self, $new_xml) = @_; my $rels_name = sprintf "word/_rels/%s.xml.rels", $self->part_name; return $self->surgeon->xml_member($rels_name, $new_xml); } sub zip_member_name { my $self = shift; return sprintf "word/%s.xml", $self->part_name; } sub original_contents { my $self = shift; return $self->surgeon->xml_member($self->zip_member_name); } #====================================================================== # CONTENTS RESTITUTION #====================================================================== sub indented_contents { my $self = shift; my $dom = XML::LibXML->load_xml(string => $self->contents); return $dom->toString(XML_SIMPLE_INDENT); # returned as bytes sequence, not a Perl string } sub plain_text { my $self = shift; # XML contents my $txt = $self->contents; # replace opening paragraph tags by newlines $txt =~ s/(])/\n$1/g; # replace break tags by newlines $txt =~ s[][\n]g; # replace tab nodes by ASCII tabs $txt =~ s/]*>/\t/g; # remove all remaining XML tags $txt =~ s/<[^>]+>//g; # decode entities decode_entities($txt); return $txt; } #====================================================================== # MODIFYING CONTENTS #====================================================================== sub cleanup_XML { my ($self, @merge_args) = @_; # avoid doing it twice return if $self->{was_cleaned_up}; # start the cleanup $self->reduce_all_noises; my $contents = $self->contents; # unlink fields, suppress bookmarks, merge runs $self->unlink_fields; $self->suppress_bookmarks; $self->merge_runs(@merge_args); # flag the fact that the cleanup was done $self->{was_cleaned_up} = 1; } sub reduce_noise { my ($self, @noises) = @_; # gather regexes to apply, given either directly as regex refs, or as names of builtin regexes my @regexes = map {ref $_ eq 'Regexp' ? $_ : $self->noise_reduction_regex($_)} @noises; # get contents, apply all regexes, put back the modified contents. my $contents = $self->contents; no warnings 'uninitialized'; # for regexes without capture groups, $1 will be undef $contents =~ s/$_/$1/g foreach @regexes; $self->contents($contents); } sub noise_reduction_regex { my ($self, $regex_name) = @_; my $regex = $noise_reduction_regexes{$regex_name} or croak "->noise_reduction_regex('$regex_name') : unknown regex name"; return $regex; } sub reduce_all_noises { my $self = shift; $self->reduce_noise(@noise_reduction_list); } sub merge_runs { my ($self, %args) = @_; # check validity of received args state $is_valid_arg = {no_caps => 1}; my @invalid_args = grep {!$is_valid_arg->{$_}} keys %args; croak "merge_runs(): invalid arg(s): " . join ", ", @invalid_args if @invalid_args; my @new_runs; # loop over internal "run" objects foreach my $run (@{$self->runs}) { $run->remove_caps_property if $args{no_caps}; # check if the current run can be merged with the previous one if ( !$run->xml_before # no other XML markup between the 2 runs && @new_runs # there was a previous run && $new_runs[-1]->props eq $run->props # both runs have the same properties ) { # conditions are OK, so merge this run with the previous one $new_runs[-1]->merge($run); } else { # conditions not OK, just push this run without merging push @new_runs, $run; } } # reassemble the whole stuff and inject it as new contents $self->contents(join "", map {$_->as_xml} @new_runs); } sub replace { my ($self, $pattern, $replacement_callback, %replacement_args) = @_; # shared initial string for error messages my $error_msg = '->replace($pattern, $callback, %args)'; # default value for arg 'cleanup_XML', possibly from deprecated arg 'keep_xml_as_is' if (delete $replacement_args{keep_xml_as_is}) { not exists $replacement_args{cleanup_XML} or croak "$error_msg: deprecated arg 'keep_xml_as_is' conflicts with arg 'cleanup_XML'"; carp "$error_msg: arg 'keep_xml_as_is' is deprecated, use 'cleanup_XML' instead"; $replacement_args{cleanup_XML} = 0; } else { $replacement_args{cleanup_XML} //= 1; # default } # cleanup the XML structure so that replacements work better if (my $cleanup_args = $replacement_args{cleanup_XML}) { $cleanup_args = {} if ! ref $cleanup_args; ref $cleanup_args eq 'HASH' or croak "$error_msg: arg 'cleanup_XML' should be a hashref"; $self->cleanup_XML(%$cleanup_args); } # check for presences of a special option to avoid modying contents my $dont_overwrite_contents = delete $replacement_args{dont_overwrite_contents}; # apply replacements and generate new XML my $xml = join "", map {$_->replace($pattern, $replacement_callback, %replacement_args)} $self->runs->@*; # overwrite previous contents $self->contents($xml) unless $dont_overwrite_contents; return $xml; } sub _update_contents_in_zip { # called for each part before saving the zip file my $self = shift; $self->surgeon->xml_member($self->zip_member_name, $self->contents) if $self->{contents_has_changed}; } #====================================================================== # OPERATIONS ON BOOKMARKS #====================================================================== sub bookmark_boundaries { my ($self) = @_; # regex to find bookmark tags state $bookmark_rx = qr{ ( # $1: the whole tag ]*?) # $3: node attributes /> # end of tag ) # end of capture 1 }sx; # split the whole xml according to the regex. Captured groups are also added to the list my @xml_chunks = split /$bookmark_rx/, $self->contents; my $final_xml = pop @xml_chunks; # walk through the list of fragments and build BookmarkBoundary objects my @bookmark_boundaries; while (my @chunk = splice @xml_chunks, 0, 4) { my %bkmk_args; @bkmk_args{qw/xml_before node_xml kind attrs/} = @chunk; my %attrs = parse_attrs(delete $bkmk_args{attrs} // ""); $bkmk_args{id} = $attrs{'w:id'}; $bkmk_args{name} = $attrs{'w:name'} if $attrs{'w:name'}; push @bookmark_boundaries, MsOffice::Word::Surgeon::BookmarkBoundary->new(%bkmk_args); } return wantarray ? (\@bookmark_boundaries, $final_xml) : \@bookmark_boundaries; } sub suppress_bookmarks { my ($self, %options) = @_; # check if options are valid and supply defaults my @invalid_opt = grep {!/^(full_range|markup_only)$/} keys %options; croak "suppress_bookmarks: invalid options: " . join(", ", @invalid_opt) if @invalid_opt; %options = (markup_only => qr/./) if ! keys %options; # parse bookmark boundaries my ($bookmark_boundaries, $final_xml) = $self->bookmark_boundaries; # loop on bookmark boundaries my %boundary_ix_by_id; while (my ($ix, $boundary) = each @$bookmark_boundaries) { # for starting boundaries, just remember the starting index if ($boundary->kind eq 'Start') { $boundary_ix_by_id{$boundary->id} = $ix; } # for ending boundaries, do the suppression elsif ($boundary->kind eq 'End') { # try to find the corresponding bookmarkStart node. my $start_ix = $boundary_ix_by_id{$boundary->id}; # if not found, this is because the start was within a field that has been erased. So just clear the bookmarkEnd if (!defined $start_ix) { $boundary->node_xml(""); } # if found, do the normal suppression else { my $bookmark_start = $bookmark_boundaries->[$start_ix]; my $bookmark_name = $bookmark_start->name; my $should_erase_markup = match($bookmark_name, $options{markup_only}); my $should_erase_range = match($bookmark_name, $options{full_range}); if ($should_erase_markup || $should_erase_range) { # erase markup (start and end bookmarks) $_->node_xml("") for $boundary, $bookmark_start; # if required, also erase inner range if ($should_erase_range) { for my $erase_ix ($start_ix+1 .. $ix) { my $inner_boundary = $bookmark_boundaries->[$erase_ix]; !$inner_boundary->node_xml or die "cannot erase contents of bookmark '$bookmark_name' " . "because it contains the start of bookmark '". $inner_boundary->name . "'"; $inner_boundary->xml_before(""); } } } } } } # re-build the whole XML from all remaining fragments, and inject it back my $new_contents = join "", (map {$_->xml_before, $_->node_xml} @$bookmark_boundaries), $final_xml; $self->contents($new_contents); } sub reveal_bookmarks { my ($self, @marking_args) = @_; # auxiliary objects my $marker = MsOffice::Word::Surgeon::PackagePart::_BookmarkMarker->new(@marking_args); my $paragraph_tracker = MsOffice::Word::Surgeon::PackagePart::_ParaTracker->new; # parse bookmark boundaries my ($bookmark_boundaries, $final_xml) = $self->bookmark_boundaries; # loop on bookmark boundaries my @bookmark_name_by_id; foreach my $boundary (@$bookmark_boundaries) { # count opening and closing paragraphs in xml before this node $paragraph_tracker->count_paragraphs($boundary->xml_before); # add visible runs before or after bookmark nodes if ($boundary->kind eq 'Start') { $bookmark_name_by_id[$boundary->id] = $boundary->name; $boundary->prepend_xml($paragraph_tracker->maybe_add_paragraph($marker->mark($boundary->name, 0))); } elsif ($boundary->kind eq 'End') { my $bookmark_name = $bookmark_name_by_id[$boundary->id]; $boundary->append_xml($paragraph_tracker->maybe_add_paragraph($marker->mark($bookmark_name, 1))); } } # re-build the whole XML and inject it back my $new_contents = join "", (map {$_->xml_before, $_->node_xml} @$bookmark_boundaries), $final_xml; $self->contents($new_contents); } #====================================================================== # OPERATIONS ON FIELDS #====================================================================== sub fields { my ($self) = @_; # regex to find field nodes state $field_rx = qr{ < w:fld # initial prefix for a field node (Simple|Char) # $1 : distinguish between simple fields and complex fields \h* ([^>]*?) # $2 : node attributes (?: # either .. /> # .. the end of an empty XML element | # or .. > # .. the end of the opening tag (.*?) # .. $3: some node content # .. the closing tag ) }sx; # split the whole xml according to the regex. Captured groups are also added to the list my @xml_chunks = split /$field_rx/, $self->contents; my $final_xml = pop @xml_chunks; # walk through the list of fragments and build a stack of field objects my @field_stack; NODE: while (my @chunk = splice @xml_chunks, 0, 4) { # initialize a node hash my %node; @node{qw/xml_before field_kind attrs node_content/} = @chunk; $node{$_} //= "" for qw/xml_before field_kind attrs node_content/; # node attributes my %attrs = parse_attrs($node{attrs}); if ($node{field_kind} eq 'Simple') { # for a simple field, all information is within the XML node push @field_stack, MsOffice::Word::Surgeon::Field->new( xml_before => $node{xml_before}, code => $attrs{'w:instr'}, result => $node{node_content}, ); } elsif ($node{field_kind} eq 'Char') { # for a complex field, we need an auxiliary subroutine to handle the begin/separate/end parts _handle_fldChar_node(\@field_stack, \%node, \%attrs); } $self->_maybe_embed_last_field(\@field_stack); } return wantarray ? (\@field_stack, $final_xml) : \@field_stack; } sub replace_fields { my ($self, $field_replacer) = @_; my ($fields, $final_xml) = $self->fields; my @xml_parts = map {$_->xml_before, $field_replacer->($_)} @$fields; $self->contents(join "", @xml_parts, $final_xml); } sub reveal_fields { my $self = shift; # replace all fields by a textual representatio of their "code" part my $revealer = sub {my $code = shift->code; encode_entities($code); return "{$code}"}; $self->replace_fields($revealer); } sub unlink_fields { my $self = shift; # replace all fields by just their "result" part (in other words, ignore the "code" part). # ASK fields return an empty string (because they have a special treatment in Word, where # their 'result' part is hidden, unlike all other fields. my $unlinker = sub { my $field = shift; return $field->type eq 'ASK' ? '' : $field->result; }; $self->replace_fields($unlinker); } # below: auxiliary methods or subroutines for field handling sub _decode_instr_text { my ($xml) = @_; my @instr_text = $xml =~ m{(.*?)}g; my $instr = join "", @instr_text; decode_entities($instr); return $instr; } sub _handle_fldChar_node { my ($field_stack, $node, $attrs) = @_; my $fldChar_type = $attrs->{"w:fldCharType"}; # if this is the beginning a of a field : push a new field object on top of the stack if ($fldChar_type eq 'begin') { push @$field_stack, MsOffice::Word::Surgeon::Field->new( xml_before => $node->{xml_before}, code => '', result => '', status => "begin", ); } # otherwise this is the continuation of the current field (eiter "separate" or "end") : update it else { my $current_field = $field_stack->[-1] or croak qq{met but there is no current field}; my $current_status = $current_field->status; if ($current_status eq "begin") { $current_field->append_to_code(_decode_instr_text($node->{xml_before})); } elsif ($current_status eq "separate") { $fldChar_type eq "end" or croak qq{after a "separate" node, w:fldCharType cannot be "$fldChar_type"}; $current_field->append_to_result($node->{xml_before}); } elsif ($current_status eq "end") { croak qq{met but last field is not open}; } $current_field->status($fldChar_type); } } sub _maybe_embed_last_field { my ($self, $field_stack) = @_; my $last_field = $field_stack->[-1]; my $prev_field = $field_stack->[-2]; if ($last_field && $prev_field && $last_field->status eq 'end') { my $prev_status = $prev_field->status; if ($prev_status eq 'begin') { # the last field is embedded within the "code" part of the previous field $prev_field->append_to_code(_decode_instr_text($last_field->xml_before) . sprintf $self->surgeon->show_embedded_field, $last_field->code); pop @$field_stack; } elsif ($prev_status eq 'separate') { # the last field is embedded within the "result" part of the previous field $prev_field->append_to_result($last_field->xml_before . $last_field->result); pop @$field_stack; } # elsif ($prev_status eq 'end') : $last_field is an independend field, just leave it on top of stack } } #====================================================================== # OPERATIONS ON IMAGES #====================================================================== sub replace_image { my ($self, $image_title, $image_PNG_content) = @_; my $member_name = $self->images->{$image_title} or die "could not find an image with title: $image_title"; $self->surgeon->zip->contents($member_name, $image_PNG_content); } sub add_image { my ($self, $image_PNG_content) = @_; # compute a fresh image number and a fresh relationship id my @image_members = $self->surgeon->zip->membersMatching(qr[^word/media/image]); my @image_nums = map {$_->fileName =~ /(\d+)/} @image_members; my $last_img_num = max @image_nums // 0; my $target = sprintf "media/image%d.png", $last_img_num + 1; my $last_rId_num = $self->relationships->$#*; my $rId = sprintf "rId%d", $last_rId_num + 1; # assemble XML for the new relationship my $type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"; my $new_rel_xml = qq{}; # update the rels member my $xml = $self->_rels_xml; $xml =~ s[][$new_rel_xml]; $self->_rels_xml($xml); # add the image as a new member into the archive my $member_name = "word/$target"; $self->surgeon->zip->addString(\$image_PNG_content, $member_name); # update the global content_types if it doesn't include PNG my $ct = $self->surgeon->_content_types; if ($ct !~ /Extension="png"/) { $ct =~ s[(]+>)][$1]; $self->surgeon->_content_types($ct); } # return the relationship id return $rId; } #====================================================================== # INTERNAL CLASS FOR TRACKING PARAGRAPHS #====================================================================== package # hide from PAUSE MsOffice::Word::Surgeon::PackagePart::_ParaTracker; use strict; use warnings; sub new { my $class = shift; my $nb_para = 0; bless \$nb_para, $class; } sub count_paragraphs { my ($self, $xml) = @_; # count opening and closing paragraph nodes while ($xml =~ m[<(/)?w:p.*?(/)?>]g) { next if $2; # self-ending node -- doesn't change the number of paragraphs $$self += $1 ? -1 : +1; } } sub maybe_add_paragraph { my ($self, $xml) = @_; # add paragraph nodes only if the ParaTracker is currently outside of any paragraph my $is_outside_para = !$$self; return $is_outside_para && $xml ? "$xml" : $xml; }; #====================================================================== # INTERNAL CLASS FOR INTRODUCING BOOKMARK MARKERS #====================================================================== package # hide from PAUSE MsOffice::Word::Surgeon::PackagePart::_BookmarkMarker; use strict; use warnings; use MsOffice::Word::Surgeon::Utils qw(encode_entities); use Carp qw(croak carp); sub new { my $class = shift; my %self = @_; $self{color} //= "yellow"; $self{props} //= qq{}; $self{start} //= "<%s>"; $self{end} //= ""; $self{ignore} = qr/^_/ if not exists $self{ignore}; $self{color} =~ m{ ^( black | blue | cyan | darkBlue | darkCyan | darkGray | darkGreen | darkMagenta | darkRed | darkYellow | green | lightGray | magenta | none | red | white | yellow )$}x or carp "invalid color : $self{color}"; bless \%self, $class; } sub mark { my ($self, $bookmark_name, $is_end_node) = @_; # some bookmarks are just ignored return "" if $self->{ignore} and $bookmark_name =~ $self->{ignore}; # build the visible text no warnings 'redundant'; # because sprintf templates may decide not to use their arguments my $sprintf_node = $is_end_node ? $self->{end} : $self->{start}; my $text = sprintf $sprintf_node, $bookmark_name; my $props = sprintf $self->{props}, $self->{color}; encode_entities($text); # full xml for a visible run before or after the boookmark node return "$props$text"; } 1; __END__ =encoding ISO-8859-1 =head1 NAME MsOffice::Word::Surgeon::PackagePart - Operations on a single part within the ZIP package of a docx document =head1 SYNOPSIS my $part = $surgeon->document; print $part->plain_text; $part->replace(qr[$pattern], $replacement_callback); $part->replace_image($image_alt_text, $image_PNG_content); $part->unlink_fields; $part->reveal_bookmarks; =head1 DESCRIPTION This class is part of L; it encapsulates operations for a single I within the ZIP package of a C<.docx> document. It is mostly used for the I part, that contains the XML representation of the main document body. However, other parts such as headers, footers, footnotes, etc. have the same internal representation and therefore the same operations can be invoked. =head1 METHODS =head2 new my $part = MsOffice::Word::Surgeon::PackagePart->new( surgeon => $surgeon, part_name => $name, ); Constructor for a new part object. This is called internally from L; it is not meant to be called directly by clients. =head3 Constructor arguments =over =item surgeon a weak reference to the main surgeon object =item part_name ZIP member name of this part =back =head3 Other attributes Other attributes, not passed through the constructor but generated lazily on demand, are : =over =item contents the XML contents of this part =item runs a decomposition of the XML contents into a collection of L objects. =item relationships an arrayref of Office relationships associated with this part. This information comes from a C<.rels> member in the ZIP archive, named after the name of the package part. Array indices correspond to relationship numbers. Array values are hashrefs with keys =over =item Id the full relationship id =item num the numeric part of C =item Type the full reference to the XML schema for this relationship =item short_type only the last word of the type, e.g. 'image', 'style', etc. =item Target designation of the target within the ZIP file. The prefix 'word/' must be added for having a complete Zip member name. =back =item images a hashref of images within this package part. Keys of the hash are image I. If present, the alternative I will be preferred; otherwise the alternative I<description> will be taken (note : the I<title> field was displayed in Office 2013 and 2016, but more recent versions only display the I<description> field -- see L<MsOffice documentation|https://support.microsoft.com/en-us/office/add-alternative-text-to-a-shape-picture-chart-smartart-graphic-or-other-object-44989b2a-903c-4d9a-b742-6a75b451c669>). Images without alternative text will not be accessible through the current Perl module. Values of the hash are zip member names for the corresponding image representations in C<.png> format. =back =head2 Contents restitution =head3 contents Returns a Perl string with the current internal XML representation of the part contents. =head3 original_contents Returns a Perl string with the XML representation of the part contents, as it was in the ZIP archive before any modification. =head3 indented_contents Returns an indented version of the XML contents, suitable for inspection in a text editor. This is produced by L<XML::LibXML::Document/toString> and therefore is returned as an encoded byte string, not a Perl string. =head3 plain_text Returns the text contents of the part, without any markup. Paragraphs and breaks are converted to newlines, all other formatting instructions are ignored. =head3 runs Returns a list of L<MsOffice::Word::Surgeon::Run> objects. Each of these objects holds an XML fragment; joining all fragments restores the complete document. my $contents = join "", map {$_->as_xml} $self->runs; =head2 Modifying contents =head3 cleanup_XML $part->cleanup_XML(%args); Apply several other methods for removing unnecessary nodes within the internal XML. This method successively calls L</reduce_all_noises>, L</unlink_fields>, L</suppress_bookmarks> and L</merge_runs>. Currently there is only one legal arg : =over =item C<no_caps> If true, the method L<MsOffice::Word::Surgeon::Run/remove_caps_property> is automatically called for each run object. As a result, all texts within runs with the C<caps> property are automatically converted to uppercase. =back =head3 reduce_noise $part->reduce_noise($regex1, $regex2, ...); This method is used for removing unnecessary information in the XML markup. It applies the given list of regexes to the whole document, suppressing matches. The final result is put back into C<< $self->contents >>. Regexes may be given either as C<< qr/.../ >> references, or as names of builtin regexes (described below). Regexes are applied to the whole XML contents, not only to run nodes. =head3 noise_reduction_regex my $regex = $part->noise_reduction_regex($regex_name); Returns the builtin regex corresponding to the given name. Known regexes are : proof_checking => qr(<w:(?:proofErr[^>]+|noProof/)>), revision_ids => qr(\sw:rsid\w+="[^"]+"), complex_script_bold => qr(<w:bCs/>), page_breaks => qr(<w:lastRenderedPageBreak/>), language => qr(<w:lang w:val="[^/>]+/>), empty_run_props => qr(<w:rPr></w:rPr>), soft_hyphens => qr(<w:softHyphen/>), =head3 reduce_all_noises $part->reduce_all_noises; Applies all regexes from the previous method. =head3 merge_runs $part->merge_runs(no_caps => 1); # optional arg Walks through all runs of text within the document, trying to merge adjacent runs when possible (i.e. when both runs have the same properties, and there is no other XML node inbetween). This operation is a prerequisite before performing replace operations, because documents edited in MsWord often have run boundaries across sentences or even in the middle of words; so regex searches can only be successful if those artificial boundaries have been removed. If the argument C<< no_caps => 1 >> is present, the merge operation will also convert runs with the C<w:caps> property, putting all letters into uppercase and removing the property; this makes more merges possible. =head3 replace $part->replace($pattern, $replacement, %replacement_args); Replaces all occurrences of C<$pattern> regex within the text nodes by the given C<$replacement>. This is not exactly like a search-replace operation performed within MsWord, because the search does not cross boundaries of text nodes. In order to maximize the chances of successful replacements, the L</cleanup_XML> method is automatically called before starting the operation. The argument C<$pattern> can be either a string or a reference to a regular expression. It should not contain any capturing parentheses, because that would perturb text splitting operations. The argument C<$replacement> can be either a fixed string, or a reference to a callback subroutine that will be called for each match. The C<< %replacement_args >> hash can be used to pass information to the callback subroutine. That hash will be enriched with three entries : =over =item matched The string that has been matched by C<$pattern>. =item run The run object in which this text resides. =item xml_before The XML fragment (possibly empty) found before the matched text . =back The callback subroutine may return either plain text or structured XML. See L<MsOffice::Word::Surgeon::Run/SYNOPSIS> for an example of a replacement callback. The following special keys within C<< %replacement_args >> are interpreted by the C<replace()> method itself, and therefore are not passed to the callback subroutine : =over =item keep_xml_as_is if true, no call is made to the L</cleanup_XML> method before performing the replacements =item dont_overwrite_contents if true, the internal XML contents is not modified in place; the new XML after performing replacements is merely returned to the caller. =item cleanup_args the argument should be an arrayref and will be passed to the L</cleanup_XML> method. This is typically used as $part->replace($pattern, $replacement, cleanup_args => [no_caps => 1]); =back =head2 Operations on bookmarks =head3 bookmark_boundaries my $boundaries = part->bookmark_boundaries; my ($boundaries, $final_xml) = part->bookmark_boundaries; Parses the XML content to discover bookmark boundaries. In scalar context, returns an arrayref of L<MsOffice::Word::Surgeon::BookmarkBoundary> objects. In list context, returns the arrayref followed by a plain string containing the final XML fragment. =head3 suppress_bookmarks $part->suppress_bookmarks(full_range => [qw/foo bar/], markup_only => qr/^_/); Suppresses bookmarks according to the specified options : =over =item full_range For bookmark names matching this option, the bookmark will be fully suppressed (not only the start and end markers, but also any content inbetween). =item markup_only For bookmark names matching this option, start and end markers are suppressed, but the inner content remains. =back Options may be specified as lists of strings, or regexes, or coderefs ... anything suitable to be compared through L<match::simple>. In absence of any options, the default is C<< markup_only => qr/./ >>, meaning that all bookmarks markup is suppressed. Removing bookmarks is useful because MsWord may silently insert bookmarks in unexpected places; therefore some searches within the text may fail because of such bookmarks. The C<full_range> option is especially convenient for removing bookmarks associated with ASK fields. Such bookmarks contain ranges of text that are never displayed by MsWord. =head3 reveal_bookmarks $part->reveal_bookmarks(color => 'green'); Usually bookmarks boundaries in MsWord are not visible; the only way to have a visual clue is to turn on an option in L<Advanced / Show document content / Show bookmarks|https://support.microsoft.com/en-gb/office/troubleshoot-bookmarks-9cad566f-913d-49c6-8d37-c21e0e8d6db0> -- but this only displays where bookmarks start and end, without the names of the bookmarks. The C<reveal_bookmarks()> method will insert a visible run before each bookmark start and after each bookmark end, showing the bookmark name. This is an interesting tool for documenting where bookmarks are located in an existing document. Options to this method are : =over =item color The highlighting color for visible marks. This should be a valid highlighting color, i.e black, blue, cyan, darkBlue, darkCyan, darkGray, darkGreen, darkMagenta, darkRed, darkYellow, green, lightGray, magenta, none, red, white or yellow. Default is yellow. =item props A string in C<sprintf> format for building the XML to be inserted in C<< <w:rPr> >> node when displaying bookmarks marks, i.e. the style for displaying such marks. The default is just a highlighting property : C<< <w:highlight w:val="%s"/> >>. =item start A string in C<sprintf> format for generating text before a bookmark start. Default is C<< <%s> >>. =item end A string in C<sprintf> format for generating text after a bookmark end. Default is C<< </%s> >>. =item ignore A regexp for deciding which bookmarks will not be revealed. Default is C<< qr/^_/ >>, because bookmarks with an initial underscore are usually technical bookmarks inserted automatically by MsWord, such as C<_GoBack> or C<_Toc53196147>. =back =head2 Operations on fields =head3 fields my $fields = part->fields; my ($fields, $final_xml) = part->fields; Parses the XML content to discover MsWord fields. In scalar context, returns an arrayref of L<MsOffice::Word::Surgeon::Field> objects. In list context, returns the arrayref followed by a plain string containing the final XML fragment. =head3 replace_fields my $field_replacer = sub {my ($code, $result) = @_; return "...";}; $part->replace_fields($field_replacer); Replaces MsWord fields by the product of the C<< $field_replacer >> callback. The callback receives two arguments : =over =item C<$code> A plain string containing the field's full code instruction, i.e a keyword followed by optional arguments and switches, including initial and final spaces. Embedded fields are represented in curly braces, like for example C<< IF { DOCPROPERTY foo } = "bar" "is bar" "is not bar" >>. =item C<$result> An XML fragment containing the current value for the field. =back The callback should return an XML fragment suitable to be inserted within an MsWord I<run>. =head3 reveal_fields $part->reveal_fields; Replaces each field with a textual representation of its code instruction, embedded in curly braces. =head3 unlink_fields $part->unlink_fields; Replaces each field with its current result, i.e removing the code instruction. This is the equivalent of performing Ctrl-Shift-F9 in MsWord on the whole document. =head2 Operations on images =head3 replace_image $part->replace_image($image_alt_text, $image_PNG_content); Replaces an existing PNG image by a new image. All features of the old image will be preserved (size, positioning, border, etc.) -- only the image itself will be replaced. The C<$image_alt_text> must correspond to the I<alternative text> set in Word for this image. This operation replaces a ZIP member within the C<.docx> file. If several XML nodes refer to the I<same> ZIP member, i.e. if the same image is displayed at several locations, the new image will appear at all locations, even if they do not have the same alternative text -- unfortunately this module currently has no facility for duplicating an existing image into separate instances. So if your intent is to only replace one instance of the image, your original document should contain several distinct copies of the C<.PNG> file. =head3 add_image my $rId = $part->add_image($image_PNG_content); Stores the given PNG image within the ZIP file, adds it as a relationship to the current part, and returns the relationship id. This operation is not sufficient to make the image visible in Word : it just stores the image, but you still have to insert a proper C<drawing> node in the contents XML, using the C<$rId>. Future versions of this module may offer helper methods for that purpose; currently it must be done by hand. =head1 AUTHOR Laurent Dami, E<lt>dami AT cpan DOT org<gt> =head1 COPYRIGHT AND LICENSE Copyright 2019-2024 by Laurent Dami. This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. ������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon/Revision.pm������������������������������������000444��000000��000000�� 6766�15004462235� 25116� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������package MsOffice::Word::Surgeon::Revision; use 5.24.0; use Moose; use MooseX::StrictConstructor; use Moose::Util::TypeConstraints; use POSIX qw(strftime); use MsOffice::Word::Surgeon::Carp; use MsOffice::Word::Surgeon::Utils qw(maybe_preserve_spaces encode_entities); use namespace::clean -except => 'meta'; subtype 'Date_ISO', as 'Str', where {/\d{4}-\d{2}-\d{2}(?:T\d{2}:\d{2})?Z?/}, message {"$_ is not a date in ISO format yyyy-mm-ddThh:mm:ss"}; #====================================================================== # ATTRIBUTES #====================================================================== has 'rev_id' => (is => 'ro', isa => 'Num', required => 1); has 'to_delete' => (is => 'ro', isa => 'Str'); has 'to_insert' => (is => 'ro', isa => 'Str'); has 'author' => (is => 'ro', isa => 'Str', default => 'Word::Surgeon'); has 'date' => (is => 'ro', isa => 'Date_ISO', default => sub {strftime "%Y-%m-%dT%H:%M:%SZ", localtime}); has 'run' => (is => 'ro', isa => 'MsOffice::Word::Surgeon::Run'); has 'xml_before' => (is => 'ro', isa => 'Str'); #====================================================================== # INSTANCE CONSTRUCTION #====================================================================== sub BUILD { my $self = shift; $self->to_delete || $self->to_insert or croak "attempt to create a Revision object without 'to_delete' nor 'to_insert' args"; } #====================================================================== # METHODS #====================================================================== sub as_xml { my ($self) = @_; my $rev_id = $self->rev_id; my $date = $self->date; my $author = $self->author; encode_entities($author); my $props = $self->run && $self->run->props ? "<w:rPr>" . $self->run->props . "</w:rPr>" : ""; my $xml = ""; if (my $to_delete = $self->to_delete) { my $space_attr = maybe_preserve_spaces($to_delete); encode_entities($to_delete); $xml .= qq{<w:del w:id="$rev_id" w:author="$author" w:date="$date">} . qq{<w:r>$props} . qq{<w:delText$space_attr>$to_delete</w:delText>} . qq{</w:r>} . qq{</w:del>}; } if (my $to_insert = $self->to_insert) { my $space_attr = maybe_preserve_spaces($to_insert); encode_entities($to_insert); $xml .= qq{<w:ins w:id="$rev_id" w:author="$author" w:date="$date">} . qq{<w:r>$props} . ($self->xml_before // '') . qq{<w:t$space_attr>$to_insert</w:t>} . qq{</w:r>} . qq{</w:ins>}; } return $xml; } 1; __END__ =encoding ISO-8859-1 =head1 NAME MsOffice::Word::Surgeon::Revision - generate XML markup for MsWord revisions =head1 DESCRIPTION This class implements the XML markup generation algorithm for the method L<MsOffice::Word::Surgeon/new_revision>. See that method for a description of the API. =head1 INTERNALS The constructor requires an integer C<rev_id> argument. The C<rev_id> is fed by the surgeon object which generates a fresh value at each call. This is inserted as C<w:id> attribute to the C<< <w:del> >> and C<< <w:ins> >> nodes -- but I don't really know why, since it doesn't seem to be used for any purpose by MsWord. =head1 COPYRIGHT AND LICENSE Copyright 2019-2024 by Laurent Dami. This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. ����������MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon/Run.pm�����������������������������������������000444��000000��000000�� 13163�15004462255� 24073� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������package MsOffice::Word::Surgeon::Run; use 5.24.0; use Moose; use MooseX::StrictConstructor; use MsOffice::Word::Surgeon::Carp; use MsOffice::Word::Surgeon::Utils qw(maybe_preserve_spaces is_at_run_level); use namespace::clean -except => 'meta'; #====================================================================== # ATTRIBUTES #====================================================================== has 'xml_before' => (is => 'ro', isa => 'Str', required => 1); has 'props' => (is => 'ro', isa => 'Str', required => 1); has 'inner_texts' => (is => 'ro', required => 1, isa => 'ArrayRef[MsOffice::Word::Surgeon::Text]'); #====================================================================== # METHODS #====================================================================== sub as_xml { my $self = shift; my $xml = $self->xml_before; if ($self->inner_texts->@*) { $xml .= "<w:r>"; $xml .= "<w:rPr>" . $self->props . "</w:rPr>" if $self->props; $xml .= $_->as_xml foreach $self->inner_texts->@*; $xml .= "</w:r>"; } return $xml; } sub merge { my ($self, $next_run) = @_; # sanity checks $next_run->isa(__PACKAGE__) or croak "argument to merge() should be a " . __PACKAGE__; $self->props eq $next_run->props or croak sprintf "runs have different properties: '%s' <> '%s'", $self->props, $next_run->props; !$next_run->xml_before or croak "cannot merge -- next run contains xml before the run : " . $next_run->xml_before; # loop over all text nodes of the next run foreach my $txt ($next_run->inner_texts->@*) { if ($self->{inner_texts}->@* && !$txt->xml_before) { # concatenate current literal text with the previous text node $self->{inner_texts}[-1]->merge($txt); } else { # cannot merge, just add to the list of inner text nodes push $self->{inner_texts}->@*, $txt; } } } sub replace { my ($self, $pattern, $replacement_callback, %replacement_args) = @_; # apply replacement to inner texts $replacement_args{run} = $self; my @inner_xmls = map {$_->replace($pattern, $replacement_callback, %replacement_args)} $self->inner_texts->@*; # a machinery of closures for assembling the new xml my $xml = $self->xml_before; my $is_run_open; my $maybe_open_run = sub {if (!$is_run_open) { $xml .= "<w:r>"; $xml .= "<w:rPr>" . $self->props . "</w:rPr>" if $self->props; $is_run_open = 1; }}; my $maybe_close_run = sub {if ($is_run_open) { $xml .= "</w:r>"; $is_run_open = undef; }}; # apply the machinery, loop over inner texts foreach my $inner_xml (@inner_xmls) { is_at_run_level($inner_xml) ? $maybe_close_run->() : $maybe_open_run->(); $xml .= $inner_xml; } # final cleanup $maybe_close_run->(); return $xml; } sub remove_caps_property { my $self = shift; if ($self->{props} =~ s[<w:caps/>][]) { $_->to_uppercase foreach @{$self->inner_texts}; } } 1; __END__ =encoding ISO-8859-1 =head1 NAME MsOffice::Word::Surgeon::Run - internal representation for a "run of text" =head1 DESCRIPTION This is used internally by L<MsOffice::Word::Surgeon> for storing a "run of text" in a MsWord document. It loosely corresponds to a C<< <w:r> >> node in OOXML, but may also contain an anonymous XML fragment which is the part of the document just before the C<< <w:r> >> node -- used for reconstructing the complete document after having changed the contents of some runs. =head1 METHODS =head2 new my $run = MsOffice::Word::Surgeon::Run( xml_before => $xml_string, props => $properties_string, inner_texts => [MsOffice::Word::Surgeon::Text(...), ...], ); Constructor for a new run object. Arguments are : =over =item xml_before A string containing arbitrary XML preceding that run in the complete document. The string may be empty but must be present. =item props A string containing XML for the properties of this run (for example instructions for bold, italic, font, etc.). The module does not parse this information; it just compares the string for equality with the next run. =item inner_texts An array of L<MsOffice::Word::Surgeon::Text> objects, corresponding to the XML C<< <w:t> >> nodes inside the run. =back =head2 as_xml my $xml = $run->as_xml; Returns the XML representation of that run. =head2 merge $run->merge($next_run); Merge the contents of C<$next_run> together with the current run. This is only possible if both runs have the same properties (same string returned by the C<props> method), and if the next run has an empty C<xml_before> attribute; if the conditions are not met, an exception is raised. =head2 replace my $xml = $run->replace($pattern, $replacement_callback, %replacement_args); Replaces all occurrences of C<$pattern> within all text nodes by a new string computed by C<$replacement_callback>, and returns a new xml string corresponding to the result of all these replacements. This is the internal implementation for public method L<MsOffice::Word::Surgeon::PackagePart/replace>. =head2 remove_caps_property Searches in the run properties for a C<< <w:caps/> >> property; if found, removes it, and replaces all inner texts by their uppercase equivalents. =head1 AUTHOR Laurent Dami, E<lt>dami AT cpan DOT org<gt> =head1 COPYRIGHT AND LICENSE Copyright 2019-2024 by Laurent Dami. This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon/Text.pm����������������������������������������000444��000000��000000�� 15226�15004462273� 24255� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������package MsOffice::Word::Surgeon::Text; use 5.24.0; use Moose; use MooseX::StrictConstructor; use MsOffice::Word::Surgeon::Carp; use MsOffice::Word::Surgeon::Utils qw(maybe_preserve_spaces is_at_run_level encode_entities); use namespace::clean -except => 'meta'; #====================================================================== # ATTRIBUTES #====================================================================== has 'xml_before' => (is => 'ro', isa => 'Str'); has 'literal_text' => (is => 'ro', isa => 'Str', required => 1); #====================================================================== # METHODS #====================================================================== sub as_xml { my $self = shift; my $xml = $self->xml_before // ''; my $lit_txt = $self->literal_text; if (defined $lit_txt && $lit_txt ne '') { encode_entities($lit_txt); my $space_attr = maybe_preserve_spaces($lit_txt); $xml .= "<w:t$space_attr>$lit_txt</w:t>"; } return $xml; } sub merge { my ($self, $next_text) = @_; !$next_text->xml_before or croak "cannot merge -- next text contains xml before the text : " . $next_text->xml_before; $self->{literal_text} .= $next_text->literal_text; } sub replace { my ($self, $pattern, $replacement, %args) = @_; my $xml = ""; my $current_text_node; my $xml_before = $self->xml_before; # closure to make sure that $xml_before is used only once my $maybe_xml_before = sub { my @r = $xml_before ? (xml_before => $xml_before) : (); $xml_before = undef; return @r; }; # closure to create a new text node my $mk_new_text = sub { my ($literal_text) = @_; return MsOffice::Word::Surgeon::Text->new( $maybe_xml_before->(), literal_text => $literal_text, ); }; # closure to create a new run node for enclosing a text node my $add_new_run = sub { my ($text_node) = @_; my $run = MsOffice::Word::Surgeon::Run->new( xml_before => '', props => $args{run}->props, inner_texts => [$text_node], ); $xml .= $run->as_xml; }; # closure to add text to the current text node my $add_to_current_text_node = sub { my ($txt_to_add) = @_; $current_text_node //= $mk_new_text->(''); $current_text_node->{literal_text} .= $txt_to_add; }; # closure to clear the current text node my $maybe_clear_current_text_node = sub { if ($current_text_node) { if (is_at_run_level($xml)) { $add_new_run->($current_text_node); } else { $xml .= $current_text_node->as_xml; } $current_text_node = undef; } }; # find pattern within $self, each match becomes a fragment to handle my @fragments = split qr[($pattern)], $self->{literal_text}, -1; my $txt_after_last_match = pop @fragments; # loop to handle each match while (my ($txt_before, $matched) = splice (@fragments, 0, 2)) { # new contents to replace the matched fragment my $replacement_contents = !ref $replacement ? $replacement : $replacement->(matched => $matched, (!$txt_before ? $maybe_xml_before->() : ()), %args); my $replacement_is_xml = $replacement_contents =~ /^<w:/; if ($replacement_is_xml) { # if there was text before the match, add it as a new run if ($txt_before) { $maybe_clear_current_text_node->(); $add_new_run->($mk_new_text->($txt_before)); } # add the xml that replaces the match $xml .= $replacement_contents; } else { # $replacement_contents is not xml but just literal text $add_to_current_text_node->(($txt_before // '') . $replacement_contents); } } # handle remaining contents after the last match if ($txt_after_last_match) { $add_to_current_text_node->($txt_after_last_match); } $maybe_clear_current_text_node->(); if ($xml_before) { !$xml or croak "internal error : Text::xml_before was ignored during replacements"; $xml = $xml_before; } return $xml; } sub to_uppercase { my $self = shift; # split text fragments around HTML entities my @fragments = split /(&\w+?;)/, $self->{literal_text}; my $txt_after_last_entity = pop @fragments; my $txt_upcase = ""; # assemble upcased text fragments while (my ($txt_before, $entity) = splice (@fragments, 0, 2)) { $txt_upcase .= uc($txt_before) . $entity; } $txt_upcase .= uc($txt_after_last_entity); # return the upcased text $self->{literal_text} = $txt_upcase; } 1; __END__ =encoding ISO-8859-1 =head1 NAME MsOffice::Word::Surgeon::Text - internal representation for a node of literal text =head1 DESCRIPTION This is used internally by L<MsOffice::Word::Surgeon> for storing a chunk of literal text in a MsWord document. It loosely corresponds to a C<< <w:t> >> node in OOXML, but may also contain an anonymous XML fragment which is the part of the document just before the C<< <w:t> >> node -- used for reconstructing the complete document after having changed the contents of some text nodes. =head1 METHODS =head2 new my $text_node = MsOffice::Word::Surgeon::Text( xml_before => $xml_string, literal_text => $text_string, ); Constructor for a new text object. Arguments are : =over =item xml_before A string containing arbitrary XML preceding that text node in the complete document. The string may be empty but must be present. =item literal_text A string of literal text. =back =head2 as_xml my $xml = $text_node->as_xml; Returns the XML representation of that text node. The attribute C<< xml:space="preserve" >> is automatically added if the literal text starts of ends with a space character. =head2 merge $text_node->merge($next_text_node); Merge the contents of C<$next_text_node> together with the current text node. This is only possible if the next text node has an empty C<xml_before> attribute; if this condition is not met, an exception is raised. =head2 replace my $xml = $text_node->replace($pattern, $replacement_callback, %args); Replaces all occurrences of C<$pattern> within the text node by a new string computed by C<$replacement_callback>, and returns a new xml string corresponding to the result of all these replacements. This is the internal implementation for public method L<MsOffice::Word::Surgeon/replace>. =head2 to_uppercase Puts the literal text within the node into uppercase letters. =head1 AUTHOR Laurent Dami, E<lt>dami AT cpan DOT org<gt> =head1 COPYRIGHT AND LICENSE Copyright 2019-2024 by Laurent Dami. This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/lib/MsOffice/Word/Surgeon/Utils.pm���������������������������������������000444��000000��000000�� 5733�15004462312� 24405� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������package MsOffice::Word::Surgeon::Utils; use 5.24.0; use strict; use warnings; use MsOffice::Word::Surgeon::Carp; use Exporter qw/import/; our @EXPORT = qw/maybe_preserve_spaces is_at_run_level parse_attrs decode_entities encode_entities/; sub maybe_preserve_spaces { my ($txt) = @_; return $txt =~ /^\s/ || $txt =~ /\s$/ ? ' xml:space="preserve"' : ''; } sub is_at_run_level { my ($xml) = @_; return $xml =~ m[</w:(?:r|del|ins)>$]; } sub parse_attrs { # cheap parsing of attribute lists in an XML node my ($lst_attrs) = @_; state $attr_pair_regex = qr[ ([^=\s"'&<>]+) # attribute name \h* = \h* # Eq (?: # attribute value " ([^<"]*) " # .. enclosed in double quotes | ' ([^<']*) ' # .. or enclosed in single quotes ) ]x; my %attr; while ($lst_attrs =~ /$attr_pair_regex/g) { my ($name, $val) = ($1, $2 // $3); decode_entities($val); $attr{$name} = $val; } return %attr; } # Cheap version for encoding/decoding XML Entities. # We just need 4 of them, so no need for a module with complete support. my %entities = (quot => '"', amp => '&', 'lt' => '<', gt => '>'); my $entity_names = join "|", keys %entities; my $entity_chars = "[" . join("", values %entities) . "]"; my %entity_for_char = reverse %entities; sub decode_entities { $_[0] =~ s{&($entity_names);}{$entities{$1} }eg; } sub encode_entities { $_[0] =~ s{($entity_chars)} {'&'.$entity_for_char{$1}.';'}eg; } 1; __END__ =encoding ISO-8859-1 =head1 NAME MsOffice::Word::Surgeon::Utils - utility functions for MsOffice::Word::Surgeon =head1 SYNOPSIS use MsOffice::Word::Surgeon::Utils qw(maybe_preserve_spaces); my $attr = maybe_preserve_spaces($some_text); =head1 DESCRIPTION Functions in this module are used internally by L<MsOffice::Word::Surgeon>. =head1 FUNCTIONS =head2 maybe_preserve_spaces my $attr = maybe_preserve_spaces($some_text); Returns the XML attribute to be inserted into C<< <w:t> >> nodes and C<< <w:delText> >> nodes when the literal text within the node starts or ends with a space -- in that case the XML should contain the attribute C<< xml:space="preserve" >> =head2 is_at_run_level if (is_at_run_level($xml)) {...} Returns true if the given XML fragment ends with a C<< </w:r> >>, C<< </w:del> >> or C<< </w:ins> >> node. =head2 parse_attrs my %attrs = parse_attrs($lst_attrs) Returns a hash of name-value pairs parsed from the input string. Values may be enclosed in single or in double quotes. Values are entity-decoded. =head2 decode_entities decode_entities($string) Decodes XML entities within the supplied string (in-place decoding). =head2 encode_entities encode_entities($string) Encodes XML entities within the supplied string (in-place encoding). =head1 COPYRIGHT AND LICENSE Copyright 2019-2024 by Laurent Dami. This program is free software, you can redistribute it and/or modify it under the terms of the Artistic License version 2.0. �������������������������������������MsOffice-Word-Surgeon-2.10/t������������������������������������������������������������������������000755��000000��000000�� 0�15004477605� 16262� 5����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/t/msoffice-word-surgeon.t������������������������������������������������000444��000000��000000�� 6362�14731474414� 23040� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������use strict; use warnings; use Test::More; use MsOffice::Word::Surgeon; my $do_save_results = $ARGV[0] && $ARGV[0] eq 'save'; (my $dir = $0) =~ s[msoffice-word-surgeon.t$][]; $dir ||= "."; my $sample_file = "$dir/etc/MsOffice-Word-Surgeon.docx"; diag( "Testing MsOffice::Word::Surgeon $MsOffice::Word::Surgeon::VERSION, Perl $], $^X" ); my $surgeon = MsOffice::Word::Surgeon->new($sample_file); $surgeon->part($_)->replace(qr/\bPage\b/ => sub {"Pagina"}, cleanup_XML => 0) for $surgeon->headers; my $plain_text = $surgeon->plain_text; like $plain_text, qr/because documents edited in MsWord often have run boundaries across sentences/, "plain text"; like $plain_text, qr/1st/, "found 1st"; like $plain_text, qr/2nd/, "found 2nd"; like $plain_text, qr/paragraph\ncontains a soft line break/, "soft line break"; unlike $plain_text, qr/&\w+;/, "decoded entities"; $surgeon->document->cleanup_XML(no_caps => 1); my $contents = $surgeon->contents; like $contents, qr/because documents edited in MsWord often have run boundaries across sentences/, "XML after merging runs"; like $contents, qr/somme de 1'200/, "do not remove runs containing '0'"; like $contents, qr/SMALL & CAPS LTD/, "w:caps preserves HTML entities"; unlike $contents, qr/bookmarkStart/, "remove bookmarks (no markup)"; unlike $contents, qr/_GoBack/, "remove bookmarks (no _GoBack)"; like $contents, qr/Condamne SMALL/, "remove bookmarks (contents preserved)"; like $contents, qr/do you prefer Foo \? Really \?/, "ASK field (1/2)"; like $contents, qr/like this : Foo \?\B/, "ASK field (2/2)"; like $contents, qr/soft hyphens that should really be removed/, "soft hyphens"; my $new_xml = $surgeon->replace(qr/\bMsWord\b/, sub {"Microsoft Word"}, ); like $new_xml, qr/edited in Microsoft Word/, "after replace"; $surgeon->contents($new_xml); $plain_text = $surgeon->plain_text; my ($test_tabs) = $plain_text =~ /(\n.*?TAB.*)/; like $test_tabs, qr/starts\twith an\tinitial TAB, and also has\tmany internal TABS/, "TABS were preserved"; # check preservation of tabs through replacements $surgeon->document->replace(qr/\bDO_NOT_LOSE_THE_INITIAL_TAB\b/ => sub { my %args = @_; my $replacement_txt = "AFTER_REPLACE"; return $args{xml_before} ? "$args{xml_before}<w:t>$replacement_txt</w:t>" : $replacement_txt; } ); $plain_text = $surgeon->plain_text; like $plain_text, qr/\tAFTER_REPLACE/, "tab just before a replaced text is preserved"; is_deeply [$surgeon->headers], [qw/header1 header2 header3/], "headers"; is_deeply [$surgeon->footers], [qw/footer1 footer2 footer3/], "footers"; # replace contents also in headers and footers $surgeon->all_parts_do(replace => qr/\bSurgeon\b/ => sub {"Ph<y>sician"}, cleanup_XML => 0); # use Path::Tiny; # my $img = path("d:/temp/foo.png")->slurp_raw; # my $rId = $surgeon->document->add_image($img); # warn "created rId $rId\n"; $surgeon->save_as("surgeon_result.docx") if $do_save_results; done_testing(); ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/t/replace_fields.t�������������������������������������������������������000444��000000��000000�� 1173�14626654365� 21560� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������use strict; use warnings; use Test::More; use MsOffice::Word::Surgeon; my $do_save_results = $ARGV[0] && $ARGV[0] eq 'save'; (my $dir = $0) =~ s[replace_fields.t$][]; $dir ||= "."; my $sample_file = "$dir/etc/MsOffice-Word-Surgeon.docx"; my $surgeon = MsOffice::Word::Surgeon->new($sample_file); $surgeon->document->reveal_fields; my $contents = $surgeon->contents; like $contents, qr[\{\h+TOC.*?}], "field TOC was replaced"; like $contents, qr[\{\h+ASK.*?}], "field ASK was replaced"; like $contents, qr[\{\h+REF.*?}], "field REF was replaced"; $surgeon->save_as("fields_replaced.docx") if $do_save_results; done_testing(); �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/t/reveal_bookmarks.t�����������������������������������������������������000444��000000��000000�� 1424�14626254527� 22140� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������use strict; use warnings; use Test::More; use MsOffice::Word::Surgeon; my $do_save_results = $ARGV[0] && $ARGV[0] eq 'save'; (my $dir = $0) =~ s[reveal_bookmarks.t$][]; $dir ||= "."; my $sample_file = "$dir/etc/MsOffice-Word-Surgeon.docx"; my $surgeon = MsOffice::Word::Surgeon->new($sample_file); $surgeon->document->reveal_bookmarks(color => 'cyan'); my $contents = $surgeon->contents; like $contents, qr{<w:highlight w:val="cyan"/></w:rPr><w:t><nested_bookmarks_1></w:t></w:r><w:bookmarkStart}, "bookmark start"; like $contents, qr{<w:bookmarkEnd w:id="\d+"/><w:r><w:rPr><w:highlight w:val="cyan"/></w:rPr><w:t></nested_bookmarks_2></w:t></w:r>}, "bookmark end"; $surgeon->save_as("bookmarks_revealed.docx") if $do_save_results; done_testing(); ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/t/zip_from_filehandle.t��������������������������������������������������000444��000000��000000�� 1613�15004421032� 22564� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������use strict; use warnings; use Test::More; use MsOffice::Word::Surgeon; (my $dir = $0) =~ s[zip_from_filehandle.t$][]; $dir ||= "."; my $sample_file = "$dir/etc/MsOffice-Word-Surgeon.docx"; open my $fh, "<:raw", $sample_file or die "open $sample_file: $!"; my $surgeon = MsOffice::Word::Surgeon->new($fh); my $plain_text = $surgeon->plain_text; like $plain_text, qr/because documents edited in MsWord often have run boundaries across sentences/, "plain text"; like $plain_text, qr/1st/, "found 1st"; like $plain_text, qr/2nd/, "found 2nd"; like $plain_text, qr/paragraph\ncontains a soft line break/, "soft line break"; unlike $plain_text, qr/&\w+;/, "decoded entities"; my $zip_in_memory = ""; open my $out, ">:raw", \$zip_in_memory or die "open output handle : $!"; $surgeon->save_as($out); close $out; ok bytes::length($zip_in_memory), "output zip in memory is not empty"; done_testing(); ���������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/t/etc��������������������������������������������������������������������000755��000000��000000�� 0�15004477605� 17035� 5����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/t/etc/MsOffice-Word-Surgeon.docx�����������������������������������������000444��000000��000000�� 107651�14636464646� 24162� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������PK�����!�mU�� ���[Content_Types].xml (���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ĖMO0HUJjaa,WמKgRB!)}f(3;w;)�'nY'?YP8%wP $v>?>n] 1q H(Rhm\ ~:;l4|vX,.q&`+~ƫb"@;Lv%UnsJ?D^7D,~˵mQ KHfnMFnCzu! iE=᫳pkH?V"@|mT0N7Q<Gqh;!ʝ+ oAea?B=G= c;!VoV-Kiʼnh{4Փ:Yza}9`η���PK�����!�U~����� �_rels/.rels (���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������MK1!̽;*"^DMdC2(.Ե3y3C֛+4xW(A yX܂JBWpb#InJ*Eb=[JM%a B,o0f@=a noA;N<v"eӨbR1REF7ZnhYȐjy#1'<犦7 9m.3󭄓Y���PK�����!�F_U�����word/_rels/document.xml.rels (�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������XQO0~P=q2`IH $4^]_d;]f=5ߝ}g)G&E`M@ SYx__cob,W1`mxgߝ܀&$̈́PYx1#ȸ 0Eo֨3ni7,oE!] EEL^u*3"e` !iz(Lxʮu ƾmAEt1sdA+.=?pDnяwCX.rv`v8尰CaAwba@@ Tof> !]YTIj!D$/>s,A[DiA r # +ݔ,}I RD~vvTO WXF|kT~)T6z6ADF `@Μ*[x1S"[QͫK|jȍ0pY*4\ZSIL j++i=~Q1xr>\I;; Glp_bvGʨ9e┚j/7%c4bU׶[.kzUdOU@҃-r8] v6'F lW S\KnM}pJX݂$*vdɅ+U/+ vuR^Krc8m> 4,QTo_EPvFڱ:*2^jO9Qd*|ұaտgGAb,ǽ@qݝ3N|py g5 Lcv{A:s+{hu#0���PK�����!�e5������word/document.xml}rHqޡBݶc,bɶk͑e[RegE(V D y�bY�x)R많D�}Uoڌč6񻍝Pq`B_ :22zVahb'qzJw í4hL+MXeuζ+&PiJ;L7_'ZheN m iڽν/uvBۏfӗucG{Ք:K6麉t#te̻Ƈ%6;]S3$OP-"ꃉӆN:2mN{5Q^殇iFeV0'~*Q󻯸=KtΘ ,{Ҕ:x*wg~ؽuש%Klf4ZdIjaW;;bukҫ:sѐ r38<zD^ #5w"Z/6߼(!?"wC oԨHkG;|;e;"|qe_dxQ/l~.릁'eCK;N|q*?b2/q<Ho7wwڔQt"S`"c˞}~իƝcw.<rw,ݫxX'?p˭B4[WQ3AI:d!l&yjooPM u>s~g2Ë^)s})qy%ga%?onxo<6,B(Z994:k)=ѻԭw_l&p %2O% _+Yw1cEgbPĪTq$d3G@^8yaJ{ݦ!86)_\WpIt~/8~{!}_=R(ѵW ű1RC?_kf7wwvϯENگ='W]s Fߩ?LV>EmIw|$뱰µ顊'<7Sr"4>[-!6:lrqS/w:T^h@.?CIwV_58JB?&|kbsn'e= ORuNkJ{41⍹Sg/Q rDb KC%9:3SH+}}c Ȅ7tm3wO~*wqޭy2zx颒"qK~av->Z>}Φ8 <?Gѫl۫fOwG;3[=qs{ήY+s1}.>N(==[@n;sw(oPPQ|1=ŭgѩ)$HNHZF9eQ �OƊWw�VL~ X1 +. Rb/S>b^|88rv~y'@RC> c.�d2rOd P׀w�\L �3_:4!PcQ Pc5S_:0Cj-b--_l8�Z\qz^n8�\ Cb/\ & H4em@;d�2;g=d8^'92.Xjh6c ر؁ߣS&6cJ V~99_lѣ১Nj ^Dx XJxq�--hx,c.Oďg'?]rV/"xY?yzkB(yǑA# PaHZ||&]X͡UT+cR;ۻfC8[TTaewieuEŶ@m/l͵?yGzQzӔ۠Ls}R+ԣ.f]=vwԆl(Pve<j)n^i^ǤE&n7,~CQM ހ+Š=`EǎFYek6摼"XxN~ܽ\Ί05iNON{: AODJڞ G6NO~!L7;ΌjU7':rOQ-^~]ڨ{Z뽝1=}jwLON)Q7ㄺ}p0;;c:K՛q{ˆ_h'&\98fNrj5r_T@ ?>e<YZ0eԇo^43w2gԪHe-mR#V-8Rϡ(8X~ԙAY:Wb[ב~Qm!W\re%\*p}p}sw}oKZa@k07B}uVN$Ա-1<P1Rݔ-f2`O>FފQٔ01!\[p l7wׇlDKM5-e$GI.JlnPD$@姢e쵨)G_ej��j5B޲_VY2ѩ7җ V V VC%bbbb.&[k U U U U U j͊j Q0ng]!&�L�&9 Oї_T*sy[%sBAi76EFZ(QT1KQ^<|{,F|N|8st< MWg)l %3tlu'?4p8 WL4;L}>10ql V`b:eyH.]u{G&Y1qdڲ_0jPUUhK>iiGZi!A%"-BL T2j2FA@2gf5(-Sa -+ pAr<;*98xzPRI|$I $AH2g&$e6q[ePzPoPκOANNft5QfTC)d\;Y,�9U?~)]P!Ԧ?PJQB: 0ÆK 0-6'1Í^!CA{b9rwr!C0`#C0`nJ4J\V,VϏORF܂q □MG%nA|`�l܆[�@F,( !=S0kEfAAAW-n*y@kt^%\C :J<`1`1w@I$Fٖ(06Ɣ[3 G#fU>V8eS+sƚ="PY| &s|ǛΝ('eCI_jOs~ j'W||̵]_xZ3) 6̯F;_QX?^|9;<Ӏ(ysK3?y- qo8ZP )j-eqYp)m1<cۛ@Կhˆc5y+vic'xsˆY~Jb:VViEៗ IyD%Rԩ&QV:m[{ l{!Yf@WgeX@hQH~nMS|ҁ5;gcC бB~+HM zuQCC,`o+Bǁ"R@'w"7:ݯ}w ]mQYRaRd&3,niDˁh#ֿl{e8hpzgBƶ{R!/GV((Ւ#`1 ‚ V'�UO-sPUĵ;w*z2 MT/ ALHgpt=T]|G㬙nxX3fd'{GA7EWa=j==NDE`I>LN 0BO3iAVQL%ئ("V N6/6?P棬V@ X VvʬR_E%&CKOWLȖE"{E}hH&n7>Nz)tEUz')ehUߵ%UH$fZrǧT@Hp1Kؠ+?<7c1RIWko&V~#(S٘/~Y2;r"-Kɫ~i]6QǢ7IBSqWQ*)"qҼg{0+n>ɬ8?d{kVUv2>xĦ O{[3�Yn>s~vg^2պѩy6Cej;,Jk| twuWFݺ\Q p.9jZ4f97$W[IV댪oNơauʭ_y]i`Lԛ7"x&Oɦ^ׁ2 T܂vNq 5P$W`MH4d,jL 2)P{+qZcg's׵%crΞE;fә[j~風`!:IpH4z)Z~5'Y#ɑ-'~o9nnk'M[R򔬎–#/s3Frg4TI0(1nLs]‘ZܖD.YrH*A@I:|^@r*턉6 VbįgTxp~4&=w 293GLÒ/!PiԫgJ,dwZ/*|UL<+vEviUZun*UyeeҠE~+T^r .\9[:P,e%K]ky(Wң]5 =$m1iJ1[Tg-<[@kV+@b$Gp?cc'i,G%,#dyї_T* syp\k؋s/umSGՔ%C-F#.t* o}"\cɛ<EFjUXtL4CS|~[M4e[ ,|ꙿߍwF(rXj]o V).ƢՎ Jq|q>-}̝̆Fܕm̂/a!^6S{űrs+xZDؓ{r`OM'-iߓc-?mlATrPl:s,|fͧ}x%/Rҝ:;ЋR|6eY\w C 2F4A̖9ovNIu30sH&- Rzw1W>NHB_O+=(M0y'Nv[Nh㼹 ]vV>=P`BӋp#pF_l\وWzРڢrLkgA_0V2[5 V$ﯱ7V_i*�I_4�`v?�߿徚(ge%+qXW J�%V%t)(X!>ЁΦz_\-Aᣋi_?e%tʼB 3"U/H׭')P3w\x0EQ.SQqG]?�?PZKz9=B!+Qq`B~)9K_9GV= -ws20~hrywN,_NK^9ӬKIG0 %ExȚ{se�:aQ3bfQPʠ1A`#4FF;<2PAh \Z2ht&Ua0~]QDt+z S Wu]n4'ʂ X #`a-iѪ/_CAq}YIB(2g6C[_[Skl)08 YL,w+?A@ԏϪp,PCO2>ƪ+K)*^`/ %,N9Ǚ&=E ϭ&<VOͪ8tqFW=% Q`FB*Ӌà5TMo޼x2bcw|'U:*X,Lfa11(JfUW̦ȘQ5RQwBc1j TTJe)jOZ�mz(M&YB_DŪaX*Mm 狆*er2I"9yer-c/[ hN;/ *b-JԹ6/![2'j2ʑf)Nʛ.UQG`#F.`*]S/7e[JW/1Q|7Ec 8ZUWVWrAT|F꾨8$JJ^k&)O-ӑ#*qyWMRi7WȁU1dmlgd?q'Y�/x Vc$7h8Fz1ҋM0kGzz�X*VXw+)K>Bq3uR˸gVUNbV kJln_Ov$&YL^{M| $7 [>Hn,N1jP5?Z;ye Z4Zxڿ'K؜4Ԭ9Πe wBAAU@Zdxy3ͪQh"~_Km?7 PuPuPuPuPun~7L}ZUk& AV@{@/ mPKg,,ttEЌ+Usi ~>x;x?텦/f8ׄG{ۻgÀgtvElHj&]Y\MIpdOCکˋ?g/e{=6C±(7VHHt= cއMI0假ըz\lG6)ZDje&LVn/XAPy%DjŤ պV@ xsx_x›uݝZR'"%UYKWDHBVɠ_wr)o|DYT/vlyu@|b"�G2h|$ZhEV$ZBPH!:F-=zM5'7½ )r+.STNUyݒlvNY&9e$9h s$Η2qg]snۚ쪑S_hiy6FwFx.d+姶*IS]x+ț4Le[M*})dVq=6O?҇5ZJ/*? ؟]b -gXiJS"Wޱ&{MM2KF ~0SgcC2iGFM1CiSq@H-,ÐL|B.~'RSlS*iX8jSGDtEBi$@vwFLhUOm•7ѥ`TQ Gr+x*'o=mN^q?r끉oLȟoM}e! Z�"ɜc(H$R5%BFP@2rOywT)Q̚hdq.,;(X8`>:77#`-iU[O?ѤA)3:y}/7276>Yg;:z,рtD_ ^-G ?П _zGګ 8D2_ܔa $|ɵ=_l|}P(F8V>+fȤ9Qk )bv,A; :ai5f*ryf+ y:JH1ML9oW-O(*JU9 6S9�8ǀc0Y QIΣ<3cHZrTQ _0K̶_?+( !?5" jQ^?+⌗ 7m?Xoy@bi1;*E(·4Y3CGݩCwgSzN1wgL(HL{ylCM؀PFCZPyxYAאN4d,=* )تg%i]!B2%L8Z 99^sd:T}€J A~Qpkq#ה[y'9չ&UYjWH) sWLE?$;y2g+Bf2v,)յ:|a=q_|~qvᅥy6BKE1T)Aӑ/xQU6T!O*֋w^ ʇUANBiG �B_�PXտ(�J�,(�ԧ0(�4N_P�h@eP�h\xf4"5Dje Dj MGdJ,G6L5Trca b6P]*G!Apsp9yUd9Sp/pspspspM#d cƪ 9ynZ|慦;5%fLDAAGٴ@T|AAAAA,R kAAEɴyZhx9x- M//_w^xW^mٴ///x9xyI^sCWOĜu|Щh*0RQg^g/)pV*yY"~ "1;ƙFӣSU۵븼 PtgtfEh}6u"Kt*5C]&.~h}o Ŗ�EaCb}vz  ]>j>KUTeVsRxLWY5=|Ε64A*[YUk8&H+_3sVu{)jE89G+/ʎ@fS)e9uf\hfOՔ_ o'I q ;I5o'igJ7VGG߹ϳ4O:No�1a[kGbi|n^tQs.r"?�,<@?ً?%p���]r8~gf˱n],%N"*Dbeͯy=7's@Rօ؎"\ $x+7U8=.yB4,A*~\/&]R,qrB7?._R*YIK-4fe,|v7Oi}qCNTܽRrkLMڠ#BoT/?Hm ŸOhGBbN*^ e?-m7TDv\$SLް GfKy4rMo)8ܼ€yBB ^6c-&PLN[F>N=O.b �"!]aZ?OJ.a$ _%M&cdU s`B>%TAF1p]9J\CKg`/3a$ܿ3JāsvB۔ ,YyϥYk,tdn٩d U2 ' ">?.7Uv+5C':iŧMnHV[>.Ъvڭ$O $yC-^^I$wKܭJ 9AJVyU@'<F F2C&FbAgk s4Q`8t@ [1=5�jofKFAMFGD.a>Z"I_aEOhjy:><oj�8 ǥvwpPj, OCh'jI: Q֝Fug>kW ^Wޅ(qVps(Z3@m %4.Lk\!ͣM6hSHE 8ΎOPIjIR!*A]ӳ{%5s[$fh}Vz 5~KMPT@pjՃ~*/J<eM/ b;fx_)-K%c-d62XPwd"JЁkֈY%XFZGQ.HBˑpg‰wo4]֚Ԛq9fObk~&WLGo8?{¹}, )H$cGjIhIAih,! S'gL] NG>bA_zpnXL©DC]B34{dDQ"Y1%J0^yHI \0#@F`(.$%GkX(|ȟC>7GξB+=11!0#Ѹ"TgQ*=`0`Ue"?q;zjL$"-2wN߃T�GH�-micv|NlbJq:Jpx0ҨsmPKLUBԶJbx?L=4LgxrW?<ֲf#)wh8b9!CB֯?Q {1m`ZpG %<눠 UYg,"8~/)xC8\6뵨lBltAn@|ʄ5,<0&){R�,x$w#1�NHBn38  Vf X-M$/䐛σ>kML4 ǵ){� BoI{0h�EV[oU?$=%b0Ijx1ujxS>Eߕ`�.,,lm]|̲Z>G˓އ^ק(@ޟ$y)Ә?1w)&dvIowӞ!I n+YV0eOo+L"}?= X; V^Y;aCă`dGf?LM\{Ro0îfLoSXϯXFۤYR[8^"9qhH4ZR` p\7ά , w|y>"\e *-cGǒn)bI2,}ĒYt(i1/Cx&jD#,eڱjuT&l]쌈-.bJiJ7aV?jo?pb&*r F \b  8nR4 峙�Ll 4Y1։ϑ;:=W֮GgܯLLu0؋XR^,L'8}j=&g4;v4ĉ8A7 | ۥ[KZ!yqvqC4?J<* $~J%Ñ&JƝU,-_8$4=p OdD&z*tS78ej \%Ibx &2P|1Cbw4*#EcxBBBIuh+J0>Y#&#yVj~cN,h6M4RP(tuvp{!PtK<C9�l �[zk? {+$"v 4yc$sr֞Fo't�2殞wF5# e[۳|&d ۷8L_G(ӳa;V %g?15x.u_WuYn+/ڤmP"7>?^m,tg6rzXVVYwsO= U܃ثWNN2&WJ^Yr܁$hI:^m1H�YQs^*W{!~r>1ك kI�}LC\Α'XvZS1kdgs\\iufvj\}/{پh_>7?^"uxh;9Iál~XInJi{qJ9ysVoE5|V#<-c|a& m<&v*r2 -@Bm\G\ V&Y]x'uKS~c#9 U'r? ]&<'4Uy[wo9]fMd0kY;aKvEՙ9h^LJRsO5G ۘ\XLDiܐ>! Nz)G hr`|F\azp%{cu3$\OpM1 zP3UErov̛N/L:Wc9{stwv67 /eA�|^9A]t{pyX`V%p�EHCcR` )qe* :9YRr|)"y. 9;  KX(/I<LݝCF9/\R߯5gHwxּ7.on%isҧ7/}7jWdGˬX#ƚlХAȖW|_}OΠá,{)O~A`5=[e pFXHrHN PriO>w?=>Ԃ=ejaHTXTO"I4 cfD,�*0(b_>,n!B:_#.flA}_b?u"$ٳZh"'h-ogDVJ7C\/:O+v^(Ur&W_P1iYN&-h.i6I9`骶.hij P N"V) 4=Ġgyv9;XiɋZv‚& a�t4,&xcO3Dŷ0F,47�γj$b%2;7`p->7%c\!JwibL7&~ƞĖ/\ɺ=K:]biC,'eLSjNO&̈7d \),,6w^S8eE3|J._Q*P 3_+%r@1G8jn\EXZۯ7;g #F]&/p/:0&0<wk樫^n 7ysΌ[%K5۾~UOXj /Sc`Q;ヷaq-"(K0bޝ";7Sqb8NO6i<6Xsy/9_~`nPΰc$Q<����PK�����!�*ʮ�������word/footer3.xml]O0M[a:dl1.?=0S 9o{zb"u\ELQr'qrJЊ%hZ-//uyZ64A&тI⦒SjuqpmgaojʜPDUġGh%55mFt2dO5mL[I<|Kb7�?~VbrH⽕nvLܽdi)mDl�ZÙ`!_&>BYcwbobH8(Xw" WCF�_NC;v;9Fmdui?6VӪN`u̼@)K?J[)>mJ p0ͯרf)?i/N�(H gi V5cv���PK�����!�ju[?��h�����word/header2.xmln0#;X^D(q_do?ΝM&Nl},{t*;ʉSy0@C N}x>߿r/ j\&H!Mʰc)QBP {u7ZPOﱆ5Bs+.#D =t jDs\H(T1bX}r%66RsHg`w{5bZ)$^en1M)Y$mNWiv0i gY3/ 1دKɲDQ*X7fw�!5#F.۪Tb';Zz⪺Uo3`i2#K̅:|�(N E*A 8\q_p8MלFxˑIW Yy7̲=|c(D 3ʇ۪˶E <-1=  s^}Unil7yk6 `\���r~}ܔa[eJpji|蔩SUbX)!ZP`rŚ)xE˧O���PK�����!�ҽ�������word/header1.xml]O0M[anFb?2=0% sr#EP2V ! :j '*%B+-shWq�ŕ ʽ71ƎL7ZtTKS+mS< y3VS'$8FK-@\zɐ7Axk0xk,*x[@F'*nZהּCC$+M ɔo"bxTy)D)E2 eߦRQ8 #5bbgΉ$\u4NL�׎춲/ʬՅi<ڳڳVu-7؝g-'JYy%AH_d - q_+BFbo⒈= +yB^|���PK�����!�5f��z�����word/endnotes.xml̖n0Ww@O i*v՛Uv5CMh~mp mU 0<W aRd(9Qʜe<]X"rRIZAnD.90ihJkU%pb8ZY3*9E(F$nGJK ƸXF5iN0-60!S|>nhRpԜX7K̉~qU̮2`dj-ҀRKI >e.iADr0%S}NWin@VƮQbUe�"?WωI|@E<8D37J8ab8KJn2=0\81 l|ZV_Zjhg~u+|-_9MSIʜK!5y"Wȕ/j+[fC7Ԯ33&VjXQ)7u:_d(u2 RWvw/%w<j2P'‚5wOxOMj+޽cltv[3h!}Q),uۢ>F|`>=廻?_*l2qs(\G A>.sQ Y4D̸q�;%���PK�����!�,j�������word/footnotes.xml̖n0%"X-r)}�YD$eo_Rҋ$33CݽmXd#Le\S FQRIH,6\)'<Cڤ,Ņs:!IJ3S< HLFQ5_(TnG s�N+qc'Cܼ) /Ь#/zdtcT+#1ҽzsLeX%@&"1Pz Jڂ롦4XgIlD:!mWvcwesbё<6fDP.wT�+ !حZ/*he` V[w=OsA?ʂ%k ]^=CMP8%xw:q[,hjS)x7}-SE6fj9Jq)L墅<2܁lPPd, VNa2e:%4>1%UsK=5/S=(C^Zxp9y>W~v26Lo`Em浬`}q5AS9vƽ%CjÅTH`6 nE= O7����PK�����!�w�������word/footer2.xmlo0'8 XDXt^\I?d;9?ڪ'}sgf30)B= 2b" Eo XD8@ ~2)jࢅ EBZ I)ǦH1#RGMiI1.;l`#hƅ .׈X[?2!C__ Kͱu:A\Wa˞X!Q!̵DRfi#)y됙$9V Ij?Ksi };~/;+G)VV1NH"NV L4/\ =1lj*ZHcіc VsZ^`smʜDH2%pEp ?`[ӌ8l3:bek;phQ t:8ʑ c`�X7fu*~GQeBq<T5qmsP)$ճSTˬmVO���PK�����!���&�����word/header3.xmlN0Ww|:RFMCX$IڷgrRzc'7=*f*A2DSTg\KБ9t"x+׆&;Z0IRrjӹ_R-sN*XMsꞨ8a-xiAg Y#H~>6Lf$~KbJ�!zV=b1Ji\NJ? vNei)mDl� ZNwiY CTR v.{.+p>Rtʿ&Fጌ4ccJ$j 9h}` pyu(Ҩ,?Y]/=մ3Xk9}211Pʒ{- P }AB#5AC$( W_w[4,mYNJOvZN(�WD$A-g<Z'ni|P01XA#t���PK�����!�*ʮ�������word/footer1.xml]O0M[a:dl1.?=0S 9o{zb"u\ELQr'qrJЊ%hZ-//uyZ64A&тI⦒SjuqpmgaojʜPDUġGh%55mFt2dO5mL[I<|Kb7�?~VbrH⽕nvLܽdi)mDl�ZÙ`!_&>BYcwbobH8(Xw" WCF�_NC;v;9Fmdui?6VӪN`u̼@)K?J[)>mJ p0ͯרf)?i/N�(H gi V5cv���PK�����!�+/�������word/theme/theme1.xmlYM7;3X N&!IQgՌH]%9R(zkh襇643d]u a}}G+ aFIvc[0 HҨk9UڶEHCI R];, }Jw@׎T4̀^"S x5aNWӬ&�=;9�ڻ!RFEC& 6</:>άc6($'?{0lb7]aե]¿`asO@\T^U@y{4j^X<5h/cgI^bs rz%((=ZC;^ᗳ]@&_3;;j UUVWnMk-I6�\PjN@q>h!kE1_xSʛ3r$#v P󦀮5 > 24e]SV /_8{og={H#_s_k3W??=h}g/򯟞 U!J n6I qf1@E/(H1,7�8͸\Wg5q6c�'p'qNXjfid<�W<MG&~ 50O9` %{iqGAF(0�CrjZ]C D[]O�H7�6X U0c 12 V{&,Nt1!ds3kts1}1tdBBT1HF(U'/Q`"H;Dy@1w}޾eȼ@D,3m H8�ꊮ'(=WW{{EwO͚I7/" wӪo­ O=�[� dMybgy/M>A9{T*; GQVQab12 VFg1ajr#jM gl6x0oʧSn�ز-e;?Xl-e-%a&$t Vx 9X,W, ~ܜ_o�P)/Lo >azu;H(M',py˹,SXj\ Y5﹆`ڵ'VȋɔB7Ү"GYe@&'(k]MNj;J{_jdeY}cBfAXc<n(U eh(S2+rUlEWxDQ<2tuVz8I{PDs"NM~C^a}U.ݫZ)n)qAL&-[uj[(-3bۧDyd|0*<~rTSf2Ե8^_qް6\zJWsC'5/{ğgx"^$5R@*4`juL~OEm!N7~ʠ;u,naҬ~m:~SizmC5y]W���PK�����!�p�� �����word/settings.xmlWK60tZR dj6X7EFIM$ $mSweML7/ g?@Y4&YA!΍)p0NDo1DJ`PшN6D#57% %5ɷ};YbI3ZQyY0 GӚ VN$deIs2( H}M[qR; �)%o]PW7\x,: + mΆWm߀*Gf>+ ߧUĩ&OJ|(1umqV; \m{gBaÖR O2YJ>l#Y , |wvGpr<asI9d)fR|^ $%ca/@ cFr,iW/\S+ OHk_1sw}4 iAn")sC)>x- `Té%)riIH+ڮ)5O3F˒p0@$k(wٱ ?hwvYF0 bii" KDN"Lsƈ@|}EwSw,vC,׎R=~9K"8ZvܥVڛzmڛze:+-ͷ-;1 [^#s\-9'hw"_1`E"7^:d~hBm'zNhc򴙋#;vLRmůZ$TU[+m Rd9H4svQ73Sל' :O4823C\"}t�D*iuXP&YTuuS9۷zbA3JFZ>(֎ h9<PB=p[iav^M`Ч-Z$Zv#Z#f5`λx8,E>Es4W3S4^DxEGKVUHgi$¸!$OJҢO#h]+k X +_m9rܜ WTma+CNX^z!!#3)XbĔ;y5u#?:Ԝ~9yD˹Hտ����PK�����!�JZ��]�����word/styles.xml]rH}߈v<Ԓb4+{4~ZES,Ж44c[7h褀,J5aO *OfVfU? /$I F? <śfn!~|8LÕw4XfY|zKrhL"vrAMb7 AdO;po`.(t<rFDIHi.8=ď4e^oQ3U%4"GCU^Jp b'}ZǁE4q!Cb]rX<S,ugrFY< j0(  q8 ړK3^~0wLg'`<ΏTnt[$oN/-9~hpn ɿKݍǮ0C 9hp?>uFMgˬF:;KWԻ'MN Ľo&q Yta |iI)7ƯxtOf#aa?z$F.=`ss!l_g !szVUM^FӗKhn4{ԍs(|(hq64Ɨ8WAh<1t4Ǝ83EdYa'kom#pۇ3 =w3pnpۃ5WZ[fQf͈hnİDez$I 027"|<x9Ѕ 6D_HȪd}g0!:hĦ <bӰJЉ֫ی;kX$-/G f;I`WMpuAuKX혘__A3[*Rh4,)LYқO[zSh,Mm"ėQӐi "%�5g\{>csB'ƘV 녉^ѺB+h^%+;&N=sgN+:9eBܬm"HRknPkti#mZٿan6OAZheH{;a)& +{#]0ķx%TZǂN.nZ}ԝwnܻCסDvx;rбA\޾rniL;'4 '2"8zcKC40H$[BbifVPo4nANÒK7*Ibq ِ|^ȖSZ+Mgucefu&E+O*pS&Zlg+p:{ihPlu7dzş£!MОs@kUagMFYx$o f fff;tJ`Ց`R-;:[Z)ٲ3f-;`lrł%-+Ahbɓ%ܹ&H%uBM uh1ٖpHD֚ƱlŒZ[ 8BwML<ѻ ס% }he|,c=e,2ްU2/+b7^<K;UP0ޤ]xIT$w;J{Kn䬣$~GI&8sZC5OQiodEpm 3YU\9<Z�3znΣxNz~hrKGvL+vO/N5ueSg}ezns�CtDZqTHңtMzAJVpDE+(VP$ZAh# CtNhGhG)!P čBB&`8G8G& QLBBBB0׊9*DA;*@;*@;{8*9*7qTb@Q! Q!Q!Q壆 q M8*DA;*@;*@;*@;*@;*@9*7rTvTvTvTXQ<QBG(hGhGhGhGhG(GF QЎ !Ў !S-QُݗT>.CMCcu{É70bZ^["P 6?SF%,X3Ӯ`NedeIPM,, iS-Kapt_RpLIxo%q]n%Ax\uxpޖb)@h2Ld<CJ+{z4P|ja ̨nQX!�ƜjeL52F,Kyp#Q `̩PTC(3P"`X{Zs!1ʌjaX!j`D51BS ̨U2j"`FTs!1j1RpIqrIKRIڰZ*!VKs\T&MЕ==BW(>0xbPhPfT㪥:UW-iUKT㪥FqՒj\TG5Z<8UKT㪥FqՒj\TG5ZW-Qs@˜SUKzqRոjj\TG5ZRUKT%=ոjj\TG5ZW-iUKT㪥F5CL[|ߌ]=ń/AŅoCI\QREՂlU~d _i*o>$nJ_.M䩍> >&)+Bxz<S 5ob=̒ŏ|?՗HݨsdS+:Ƀ\L#`5'jx)<,+Sԍ/[Z~ɲB/D=I &*Nlw{m{g'm:'?z&u o\cq`kjKxηg\s:>?6ǞF&ZQ{ 062y56߲p 62V*|71)FZQG062}56"6_1|COS<GMjtMÀin0K5\giK^W٠%[iqƓÆ֊]7s2ԚͶ6Ci)o#n㚲+S\y5dɳx|WV>E`Fkn;֏:"!'i:?{G4y \>):}|[`m̶q?j^1_1iEq(2z'8Ti_×M*Q̰i?UN&}1뗷�/#ޕ>9SNpv%gKS>MCC%cˢK;s'TBv\O}}v9Y+]Vn,*/:*jߏ, :hu_Q]tH牶85 L͝nw.̵/)3烑u'*e]1^Dn>�؝to:jil2ni8Aa-- 6Z: [;Q^{d?Wd+K<e-]͘t/i%Rȑ:yş8r4La*1^׮Q/?n 4J<ڕV**ib 9~ǟ'ݞИ$nF͙ؗl&K`{Si/%/d:<`W\X{Tʼnph/>]>?zޣ+9;ؕsLL諀DKmAIu8`�Yc ,?*N*/TeA+ƫS@@}f)q(_ eJͤ_`gr՜:u[h_=JoeBc ?_]<z'XIJ !)h^um,EC݃gpOHU?+Y7Ի#6 gS|l)g\r/LJsu^~(t~oC^˄[ǫelz"6ۈLb'0aq^5DB[+5 pM*_,I{~Ưw0(b_MRgt"(U'V1mbߘ+O r\+p%JoZ+c9%JÈh6߰3NaVx爝gsb^1Vpg&&2NG6uUȍoxXk8r6CҺ ]z LY)׺Kh idZv}~-|MvgkȔO-Mv6_8 m@|,7\ҫD:NFYx%}X-wJ׈⒃|AcI֑ζ g_F" ftg+cJZޫm~ϛm~!zOyΚ`_(3B~"'iK.krem^&Tm%ܸEmlq)zOw㘄5)^rUGcJ+_H]тt$ K9t VM 6{RW>i}mZj7?_�����PK�����!�9��v8�����word/numbering.xmln8eըSI<HQ4Y2m Ix}>B0q.eeR'E郜OIjl1 ӹMӘ-/"9K2' >ƴ ̡TwY<17y-KL$I̙`2fbUckrmVeXйFtYŏ-9AxxNghEυKc\1NP|mt3'$M=Ha-&f鸒hJ)㲔PgS-SnX\Ls5q 50*6IxJUٶ]Ėu]M9+Sʯe튎}ŒH&Ǭ+!(ϲ批NM}! M"$,'[cA-vG-auЪV,}2I<[S"` >C̀! S9Gq rb¶ )ca/沱yAv*T$KHߢvo=ӒRyoqzpG앓,cMaXFҭ $pR40'MHS7 i׭)^Uݳug &gڲuPyRѵzyaݻRah."N`}ߓKU =jH(/ qeưZ^Zڞ[H6V؝X9Xy `n|û'~>okluƧlUslzw4㶾z4Z-aZRRP-' pv#JihZO*<LKZ'gȌV!3/ ˊC"o4K@C !'ت!DCHc !B4hBvf_BFȞm35Bz[CC :4thR :NJ:h>cYQw58Mϯ !٪!DCHc !B4h2P >hs+tD"^%G_IsNSSFvZ]ki/w否iĦ����PK�����!�w���c���docProps/custom.xml (�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������;O0�` 4&P؍㴑beqţ;۝ClDfÈlŁhÞ(m/'E47ɣ>:DP)FW1IVlSe@odL?2QhY=SPWd]x9,.y285|tEu/h+[lK˻56o4@41Ys&o^ ;&4_����PK�����!�t?9z���(���customXml/_rels/item1.xml.rels (�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������ϱ0 hoPK)t;JGILcXji߾+t(QE]1hjP >N~j.G{J D60o,W0H9X)ctl'_uݓ|P=;6w#w Ev d*yB1ߪ k���PK�����!�~���U���(�customXml/itemProps1.xml $�( ���������������������������������j0 u&V;ױ®$36};uǝ'!}?fB,|RVNہ? QNL* D!:Q:׉1cU?2/[qUQ߀ڦ3'$Q6nV6 {獈 @\kN.FH,{ rIzf&h</F[媯v 7h~����PK�����!�WgC���2���(�customXml/item1.xml $�( ���������������������������������j0 _8롌Avq%1RՑ vwkcrL <%($˃ $㙰bصu_u|qJU,TZ';c0)FHy[@)˭]O,Qu }>c*qp^~XρolB3 -+~���PK�����!�Lփ�����docProps/core.xml (�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������|N0 HCP$@BbFXFIҷ'mI:EcE2 @ .BuTqZ 2TE$e:a)5'^Iل }:-I?J#YaMٚ�DŽLG9u7N^RoL p� Y"|`i6_p{ۊjT[LJPW Pr8 S|}d7_\w'>f+M>Rtc݂yKkp58Xfv5 OԺGM}l_3tx!;t+/1( cI|,9?*ƓLbId|���PK�����!�a8 I��f �����word/fontTable.xmlԔ]o0'?D/ |PQVI.n6!b;1PB:PNHc9~rov2܀jBN@<^ o!RV|Bbhe ƒMHjm>}`):: 6Z|4k_Rs0-sjRd(1$l#z Tpp+.q+YF3{Y'PO6a0 :LQiàѠwA0΢W[|G<J aV^iL􊱢_rRϩC mi6!AGM>DRyTJN-,=[jˬ Xc`�}TJ5zD9S>S%mӯ�4@< K9"~E1")r"s4[,D qAd1|.'2#qLZh hWѐz9U Ni\Wt X5ʜfbiDKI,Rp#∮* ( $Fe6R$WhISQx3&[4wnVTxt:_%2jjo$���PK�����!�wR̠��֊�����word/webSettings.xml]mo6>`ByA]a0 k[IY!u_?I۸U:EAd::Psog/>Tiz}>ŧ0E,Wӿ߾~U.Χ7E;}矞m϶ś_$eݞUumfvq]Ti)˺]\ͪyϻ͓E]m]yQfF�2|\ŻXwgMJu{]nOҶ#m[7MS/MSnUrY oU墩{nF;QvU"& mq'bTŇZv*IJ4IO'.\O9(3z /ͫwd.Y?{q}ϣWoͷ/뮫<^.r:4}h?_lzQd?w]}+buofy7m?!?t_r_'#"S?.EQ}b I'WޱolON!&p2}O-˯WU!?2")Pt|#PgQ n+`3<GF>Y"G'ߣZJ19`�)ػ%  c>.\ D fy SvFV#͔t^p2:!c3=˗E-7~ۇF�oұO <<?DCpTLJz8e ,h D1 tz##IF9C˖DYFF>;f|,l##O͏!cK5"TL%tcOX4c0꣊VQ{j؃E1k8C=)F,�DĘ0CvQe6wȳ%z17M/"d:d-!#.R6>d!xxa><`vp.FEK$9D]g֟t.M9<I?b┊p ֔~L G >XS6 #{srXʮ_6:dC*32:OOmmm>`.F ֓=B8A�d;> $9"!A[ֶf!F%Vѝ'(-NP΃? f9YOIqVs! 'o/-9F !4eY<AXt"|@qOV?d!;O1 l0>2ydgr!FvNh8'8t8}EqϲAmɄ7s(BtO3>χ(x1pl>$#[~b+cn^6-|U_og-.]W���PK�����!�i �����docProps/app.xml (�����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Sr03?x|od'iZ2:L �43q۳׉[H[Ok8۷맧yi5Eϲ4l;HiPJ6@!7+ցG !! ֌uV ej[=uZ܂A6ϲSAu&tT\wUX ^B >6bҢlJ݂ȗWB{' %g#曃R!5Q",b[$R' ~p %n5EYڐ7KFD{di{BS uAԲ o߁'w'A H;4ec0I{X/E>8/swtBᾦ?h5;;ƶNx `~|bD__-='Exx9hFbW"Pь!M+?5{N5'%{WaNmO����PK-������!�mU�� �������������������[Content_Types].xmlPK-������!�U~����� ���������������_rels/.relsPK-������!�F_U�����������������.��word/_rels/document.xml.relsPK-������!�e5���������������� ��word/document.xmlPK-������!�*ʮ�����������������A��word/footer3.xmlPK-������!�ju[?��h���������������zC��word/header2.xmlPK-������!�ҽ�����������������E��word/header1.xmlPK-������!�5f��z���������������G��word/endnotes.xmlPK-������!�,j�����������������gJ��word/footnotes.xmlPK-������!�w�����������������M��word/footer2.xmlPK-������!���&���������������KO��word/header3.xmlPK-������!�*ʮ�����������������LQ��word/footer1.xmlPK-������!�+/�����������������9S��word/theme/theme1.xmlPK-������!�p�� ���������������Y��word/settings.xmlPK-������!�JZ��]���������������^��word/styles.xmlPK-������!�9��v8���������������p��word/numbering.xmlPK-������!�w���c���������������t��docProps/custom.xmlPK-������!�t?9z���(���������������v��customXml/_rels/item1.xml.relsPK-������!�~���U���������������x��customXml/itemProps1.xmlPK-������!�WgC���2���������������9z��customXml/item1.xmlPK-������!�Lփ�����������������Y{��docProps/core.xmlPK-������!�a8 I��f ���������������~��word/fontTable.xmlPK-������!�wR̠��֊�����������������word/webSettings.xmlPK-������!�i �����������������^��docProps/app.xmlPK���������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/xt�����������������������������������������������������������������������000755��000000��000000�� 0�15004477605� 16452� 5����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/xt/manifest.t������������������������������������������������������������000444��000000��000000�� 314�13555712537� 20565� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!perl -T use 5.006; use strict; use warnings; use Test::More; my $min_tcm = 0.9; eval "use Test::CheckManifest $min_tcm"; plan skip_all => "Test::CheckManifest $min_tcm required" if $@; ok_manifest(); ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������MsOffice-Word-Surgeon-2.10/xt/pod.t�����������������������������������������������������������������000444��000000��000000�� 362�13555712537� 17544� 0����������������������������������������������������������������������������������������������������ustar�00unknown�������������������������unknown�������������������������000000��000000�������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!perl -T use 5.006; use strict; use warnings; use Test::More; # Ensure a recent version of Test::Pod my $min_tp = 1.22; eval "use Test::Pod $min_tp"; plan skip_all => "Test::Pod $min_tp required for testing POD" if $@; all_pod_files_ok(); ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������