ocr-1.2.2/0000775000175000017500000000000012433405322013077 5ustar dalitzdalitz00000000000000ocr-1.2.2/scripts/0000775000175000017500000000000012433405322014566 5ustar dalitzdalitz00000000000000ocr-1.2.2/scripts/ocr4gamera.py0000644000175000017500000004317412401342420017165 0ustar dalitzdalitz00000000000000#!/usr/bin/python # # Copyright (C) 2009-2010 Rene Baston, Christoph Dalitz # 2014 Fabian Schmitt # 2011-2014 Christoph Dalitz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # import codecs #keep an eye on encoding stuff... http://evanjones.ca/python-utf8.html import sys import time import os.path VERSION = "1.2.0" def usage(returncode): sys.stdout.write("Usage:\n\tocr4gamera -x [options] \n" +\ "Options (can be short or long):\n" +\ "\t-v , --verbosity=\n" + \ "\t set verbosity level to ; possible values are\n" + \ "\t 0 (default): silent operation\n" + \ "\t 1: information on progress\n" + \ "\t >2: segmentation info is written to PNG files with prefix 'debug_'\n" +\ "\t-h, --help\n" + \ "\t this help message\n" +\ "\t--version\n" + \ "\t print version and exit\n" +\ "\t-d, --deskew\n" + \ "\t do a skew correction (recommended)\n" +\ "\t-mf , --median_filter=\n" +\ "\t smooth the input image with a median filter with window size \n" +\ "\t default is =0, which means no smoothing\n" +\ "\t-ds , --despeckle=\n" +\ "\t remove all speckle with size <= \n" +\ "\t default is = 0, which means no despeckling\n" +\ "\t-f, --filter\n" + \ "\t filter out connected components that are very big or very small\n" +\ "\t-a, --automatic_group\n" + \ "\t autogroup glyphs with classifier\n" +\ "\t-x , --xmlfile=\n" + \ "\t read training data from \n" +\ "\t-k , --k=\n" + \ "\t number of neighbors used by kNN classifier (default is = 1)\n" +\ "\t-o , --output=\n" + \ "\t write recognized text to file \n" + \ "\t (otherwise it is written to stdout)\n" +\ "\t-od , --output_directory=\n" + \ "\t writes for each input image the recognized text to '/.txt\n" +\ "\t note that this option cannot be used in combination with -o (--outfile)\n" + \ "\t (otherwise it is written to stdout)\n" +\ "\t-c , --extra_chars_csvfile=\n" + \ "\t read additional class name conversions from file \n" + \ "\t must contain one conversion per line\n" +\ "\t-R , --heuristic_rules=\n" + \ "\t apply heuristic rules for disambiguation of some chars\n" + \ "\t can be 'roman' (default) or 'none' (for no rules)\n" +\ "\t-D, --dictionary_correction\n" + \ "\t dictionary correction (requires aspell or ispell)\n" +\ "\t-L , --dictionary_language=\n" + \ "\t language to be used by aspell (when option -D is set)\n" +\ "\t-e , --edit_distance=\n" + \ "\t dictionary correct only when edit distance not more than \n" + \ "\t-ho, --hocr_out\n" +\ "\t writes output as hocr file (only works with the -o option)\n" + \ "\t-hi , --hocr_in=\n" +\ "\t uses an hocr input file for textline segmentation\n" ) sys.exit(returncode) def correct(sentence, lang): import os from gamera.plugins.structural import edit_distance from popen2 import Popen3 correct="\*" incorrect="&" #trim_signs = '.,!?;:"' trim_signs = ('.',',','!','?',';',':','"') spell_prog = 'aspell' lang_opt = '-l' new_sentence = "" words = sentence.split(" ") if(len(words) == 0): return sentence p = Popen3('%s' % spell_prog, True) if opt.verbosity: print 'Using %s for word-correction.\n' % spell_prog if p.childerr.readlines() != []: if opt.verbosity: print '% is not installed\n' % spell_prog spell_prog = 'ispell' if opt.verbosity: print 'Using % for word-correction.\n' % spell_prog lang_opt = '-d' p = Popen3('%s Q' % spell_prog, True) if p.childerr.readlines() != ['ispell: specified file does not exist\n']: print 'Wether aspell nor ispell is installed on your system. Please make sure to install either of this programs.' exit # open with local setting language if (opt.lang == ''): if opt.verbosity: if spell_prog == 'aspell': print 'No language was given. Will open aspell with locale-settings language.\n' if spell_prog == 'ispell': print 'No language was given. Will open ispell with default language.\n' p = Popen3('%s -a' % spell_prog, True) # True is for also storing error object in return-value # user chosen language else: p = Popen3('%s -a %s %s' % (spell_prog, lang_opt, lang), True) out = p.fromchild.readline() # first line gives information about programm if (out == '' ): #something went wrong print p.childerr.readlines() exit word_count = len(words) for word in words: #word = word.strip(trim_signs) sign = "" if word.endswith(trim_signs): sign = word[-1:] word = word[:-1] word_count = word_count - 1 if(correct_this(word)): p.tochild.write('%s\n' % word.encode('utf-8')) p.tochild.flush() out = p.fromchild.readline() while (out=='\n'): out = p.fromchild.readline() if(out[0] == '*'): #spell_prog says: word correct new_sentence = new_sentence + word +sign if(word_count): new_sentence = new_sentence + " " continue elif(out[0] == '&'): #spell_prog says: word incorrect out = out.split(" ") if edit_distance(word, out[4][:-1]) <= opt.distance: word = out[4][:-1].decode('utf-8') elif opt.verbosity: print('%d. word: \'%s\' was not corrected to \'%s\'. ' 'Edit_distance: %i is larger than distance: %i.\n' % (len(words)-word_count, word, out[4][:-1], edit_distance(word, out[4][:-1]), opt.distance)) new_sentence = new_sentence + word + sign if(word_count): new_sentence = new_sentence + " " return new_sentence def correct_this(word): for character in word: if(character == "-"): return False if(character == "[" or character == "]"): return False if(character.isdigit()): return False if(word == word.upper()): return False return True def line_to_hocr(line, nr): id_s = " ' text = "" for word in line.words: word_s = "' word_s += line.text.split(" ")[line.words.index(word)] text += word_s + " " end = "
\n" return id_s + bbox_s + text + end class Options(): def __init__(self): self.help = False self.deskew = False self.ccsfilter = False self.auto_group = False self.dict_correct = False self.hocr_out = False self.hocr_in = "" self.verbosity = 0 self.outputfile = "" self.outputdirectory = "" self.trainfile = "" self.lang = "" self.distance = 2 self.extra_chars_csvfile = "" self.heuristic_rules = "roman" self.median_size = 0 self.speckle_size = 0 self.k = 1 # # here starts the main program # opt = Options() args = sys.argv[1:] imagefiles = [] extra_chars_dict = {} if(len(args) == 0): usage(1) i =0 while i< len(args): # options without second parameter if args[i] in ("-h", "--help"): usage(0) if args[i] == "--version": print VERSION sys.exit(0) elif args[i] in ("-d", "--deskew"): opt.deskew = True elif args[i] in ("-f", "--filter"): opt.ccsfilter = True elif args[i] in ("-a", "--automatic_group"): opt.auto_group = True elif args[i] in ("-D", "--dictionary_correction"): opt.dict_correct = True elif args[i] in ("-ho"): opt.hocr_out = True # options with second parameter # verbosity level elif args[i] == "-hi": i+=1 opt.hocr_in = args[i] elif args[i].startswith("--hocr_in="): opt.hocr_in = args[i][len("--hocr_in="):] elif args[i] in ("-v"): i+=1 opt.verbosity = int(args[i]) elif args[i].startswith("--verbosity="): opt.verbosity = int(args[i][len("--verbosity="):]) # output file name elif args[i] in ("-o"): i+=1 opt.outputfile = args[i] elif args[i].startswith("--output="): opt.outputfile = args[i][len("--output="):] # output directory elif args[i] in ("-od"): i+=1 opt.outputdirectory = args[i] elif args[i].startswith("--output_directory="): opt.outputdirectory = args[i][len("--output_directory="):] # training data file elif args[i] in ("-x"): i+=1 opt.trainfile = args[i] elif args[i].startswith("--xmlfile="): opt.trainfile = args[i][len("--xmlfile="):] # k for kNN elif args[i] in ("-k"): i+=1 opt.k = int(args[i]) elif args[i].startswith("--k="): opt.k = int(args[i][len("--k="):]) # median filter size elif args[i] in ("-mf"): i+=1 opt.median_size = int(args[i]) elif args[i].startswith("--median_size="): opt.median_size = int(args[i][len("--median_size="):]) # speckle size for despeckling elif args[i] in ("-ds"): i+=1 opt.speckle_size = int(args[i]) elif args[i].startswith("--despeckle="): opt.speckle_size = int(args[i][len("--despeckle="):]) # dictionary language elif args[i] in ("-L"): i+=1 opt.lang = args[i] elif args[i].startswith("--dictionary_language="): opt.lang = args[i][len("--dictionary_language="):] # edit distance for dictionary lookup elif args[i] in ("-e"): i+=1 opt.distance = int(args[i]) elif args[i].startswith("--edit_distance="): opt.distance = int(args[i][len("--edit_distance="):]) # additional translations classname -> character elif args[i] in ("-c"): i+=1 opt.extra_chars_csvfile = args[i] elif args[i].startswith("--extra_chars_csvfile="): opt.extra_chars_csvfile = args[i][len("--extra_chars_csvfile="):] # heuristic disambiguation rules elif args[i] in ("-R"): i+=1 opt.heuristic_rules = args[i].lower() elif args[i].startswith("--heuristic_rules="): opt.heuristic_rules = args[i][len("--heuristic_rules="):].lower() # unknown option elif args[i][0] == '-': print "Error: option %s does not exist" % args[i] usage(1) else: # we assume it is an imagefile imagefiles.append(args[i]) i+=1 # some plausibility checks if opt.trainfile == "": sys.stderr.write("Error: no training data given\n") sys.exit(1) if len(imagefiles) == 0: sys.stderr.write("Error: no image file given\n") sys.exit(1) if len(imagefiles) > 1 and opt.outputdirectory == "": sys.stderr.write("Error: for multiple image files option -od (--output_directory) must be given\n") sys.exit(1) if opt.outputdirectory != "" and not os.path.isdir(opt.outputdirectory): sys.stderr.write("Error: output directory '" + opt.outputdirectory +"' is not a proper directory\n") sys.exit(1) for imagefile in imagefiles: if not os.path.exists(imagefile): sys.stderr.write("Error: image file '" + imagefile + "' not found\n") sys.exit(1) if not(opt.hocr_in == "") and not(opt.outputdirectory == ""): sys.stderr.write("hocr-input doesn't works with -od option\n") sys.exit(1) if opt.hocr_out and opt.outputdirectory == "" and opt.outputfile == "": sys.stderr.write("hocr-output does only works with an output option\n") sys.exit(1) # we import Gamera after parsing the command line arguments so that # in case of a command line error the script can be aborted beforehand from gamera.core import * init_gamera() from gamera import knn from gamera.plugins import pagesegmentation from gamera.plugins.pagesegmentation import textline_reading_order from gamera.classify import ShapedGroupingFunction from gamera.plugins.image_utilities import union_images from gamera.toolkits.ocr.ocr_toolkit import * from gamera.toolkits.ocr.classes import Textline,ClassifyCCs,Page,hocrPage # load trainingsdata only once for all images cknn = knn.kNNInteractive([], ["aspect_ratio", "fourier_broken", "moments", "volume64regions", "nholes_extended"], 0) if opt.k > 0: cknn.num_k = opt.k cknn.from_xml_filename(opt.trainfile) # loop over all input images for imagefile in imagefiles: if opt.verbosity > 0: print "processing file '" + imagefile + "' ..." img = load_image(imagefile) if img.data.pixel_type != ONEBIT: img = img.to_onebit() if opt.outputdirectory != "": opt.outputfile = os.path.join(opt.outputdirectory, os.path.basename(imagefile) + ".txt") if opt.extra_chars_csvfile != "": f = codecs.open(opt.extra_chars_csvfile, "r", encoding='utf-8') for line in f: classname, char = line.split(',', 2)[:2] classname = classname.strip() char = char.strip("\n\r") extra_chars_dict[classname] = char f.close() if opt.median_size > 0: img = img.rank((opt.median_size*opt.median_size+1)/2, opt.median_size) if opt.speckle_size > 0: img.despeckle(opt.speckle_size) if opt.ccsfilter: ccs = img.cc_analysis() print "filter started on",len(ccs) ,"elements..." median_black_area = median([cc.black_area()[0] for cc in ccs]) newccs = [] for cc in ccs: if cc.black_area()[0] > (median_black_area * 10): cc.fill_white() else: new_ccs.append(cc) for cc in ccs: if cc.black_area()[0] < (median_black_area / 10): cc.fill_white() else: new_ccs.append(cc) print "filter done:", len(ccs)-len(newccs), "of", len(ccs), "CCs deleted." ccs = new_ccs if opt.deskew: if opt.verbosity > 0: print "\ntry to skew correct..." rotation = img.rotation_angle_projections(-10,10)[0] img = img.rotate(rotation,0) if opt.verbosity > 0: print "rotated with",rotation,"angle" if opt.auto_group: if(opt.ccsfilter): the_ccs = ccs else: the_ccs = img.cc_analysis() median_cc = int(median([cc.nrows for cc in the_ccs])) autogroup = ClassifyCCs(cknn) autogroup.parts_to_group = 3 autogroup.grouping_distance = max([2,median_cc / 8]) if opt.hocr_in == "": p = Page(img, classify_ccs=autogroup) else: p = hocrPage(img, opt.hocr_in, classify_ccs=autogroup) img.reset_onebit_image() if opt.verbosity > 0: print "autogrouping glyphs activated." print "maximal autogroup distance:", autogroup.grouping_distance else: if opt.hocr_in == "": p = Page(img) else: p = hocrPage(img, opt.hocr_in) if opt.verbosity > 0: print "start page segmentation..." t = time.time() p.segment() if opt.verbosity > 0: t = time.time() - t print "\t segmentation done [",t,"sec]" if opt.verbosity > 1: rgbfilename = "debug_lines.png" rgb = p.show_lines() rgb.save_PNG(rgbfilename) print "file '%s' written" % rgbfilename rgbfilename = "debug_chars.png" rgb = p.show_glyphs() rgb.save_PNG(rgbfilename) print "file '%s' written" % rgbfilename rgbfilename = "debug_words.png" rgb = p.show_words() rgb.save_PNG(rgbfilename) print "file '%s' written" % rgbfilename if opt.outputfile == "": sys.stdout = codecs.getwriter('utf-8')(sys.stdout) if opt.hocr_out: opt.outputfile += ".html" f = codecs.open(opt.outputfile, "a", "utf-8") start_text = '''
""" f.write(start_text) f.flush() f.close() for line in p.textlines: if opt.ccsfilter: if(len(line.glyphs) < 2): #a line with one or no glyph is useless continue cknn.classify_list_automatic(line.glyphs) if(opt.ccsfilter): #lines with a median confidence lower than 0.005 should be useless too if(median([glyph.get_confidence() for glyph in line.glyphs]) < 0.005): continue line.sort_glyphs() line.text = textline_to_string(line, heuristic_rules=opt.heuristic_rules, extra_chars_dict=extra_chars_dict) if opt.dict_correct: line.text = correct(line.text, opt.lang) line_text = line.text if opt.outputfile != "": f = codecs.open(opt.outputfile, "a", "utf-8") if not opt.hocr_out: line_text = line_text + "\n" else: line_text = line_to_hocr(line, p.textlines.index(line)) f.write(line_text) f.flush() f.close() else: print line_text if opt.hocr_out: f = codecs.open(opt.outputfile, "a", "utf-8") end_text = """
""" f.write(end_text) f.flush() f.close() if opt.verbosity > 0 and opt.outputfile != "": print "text has been written to file", opt.outputfile ocr-1.2.2/README0000644000175000017500000000366312401076422013765 0ustar dalitzdalitz00000000000000OCR Toolkit for Gamera ====================== Purpose ------- "Optical character recognition" (OCR) means the extraction of the text content from a document image. This toolkit provides - python library functions for building custom ocr applications - a ready to use script ocr4gamera Requirements ------------ This toolkit has been written for the Gamera framework and requires a working Gamera installation. See the Gamera homepage: http://gamera.informatik.hsnr.de/ Documentation ------------- For a user's guide and a developer's guide see 'doc/html/index.html'. For release notes and a revision history see 'CHANGES'. A comprehensive overview of design, usage and customization of the OCR toolkit can be found in the paper C. Dalitz, R. Baston: Optical Character Recognition with the Gamera Framework. In C. Dalitz (Ed.): "Document Image Analysis with the Gamera Framework." Schriftenreihe des Fachbereichs Elektrotechnik und Informatik, Hochschule Niederrhein, vol. 8, pp. 53-65, Shaker Verlag (2009) Installation ------------ See the section "Installation" in 'doc/html/index.html' or 'doc/src/index.txt'. Authors ------- Rene Baston, 2009 Christoph Dalitz, , 2009-2014 Please contact Christoph Dalitz for questions about this toolkit. Acknowledgements ---------------- Thanks to Jakub Wilk, Robert Butz, and Fabian Schmitt for valuable contributions to this toolkit. License ------- This toolkit is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, either version 2 of the license, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file LICENSE for more details. ocr-1.2.2/doc/0000775000175000017500000000000012433405322013644 5ustar dalitzdalitz00000000000000ocr-1.2.2/doc/html/0000775000175000017500000000000012433405322014610 5ustar dalitzdalitz00000000000000ocr-1.2.2/doc/html/pygments.css0000644000175000017500000000617512401076126017200 0ustar dalitzdalitz00000000000000.hll { background-color: #ffffcc } .c { color: #408080; font-style: italic } /* Comment */ .err { border: 1px solid #FF0000 } /* Error */ .k { color: #008000; font-weight: bold } /* Keyword */ .o { color: #666666 } /* Operator */ .cm { color: #408080; font-style: italic } /* Comment.Multiline */ .cp { color: #BC7A00 } /* Comment.Preproc */ .c1 { color: #408080; font-style: italic } /* Comment.Single */ .cs { color: #408080; font-style: italic } /* Comment.Special */ .gd { color: #A00000 } /* Generic.Deleted */ .ge { font-style: italic } /* Generic.Emph */ .gr { color: #FF0000 } /* Generic.Error */ .gh { color: #000080; font-weight: bold } /* Generic.Heading */ .gi { color: #00A000 } /* Generic.Inserted */ .go { color: #808080 } /* Generic.Output */ .gp { color: #000080; font-weight: bold } /* Generic.Prompt */ .gs { font-weight: bold } /* Generic.Strong */ .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ .gt { color: #0040D0 } /* Generic.Traceback */ .kc { color: #008000; font-weight: bold } /* Keyword.Constant */ .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */ .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */ .kp { color: #008000 } /* Keyword.Pseudo */ .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */ .kt { color: #B00040 } /* Keyword.Type */ .m { color: #666666 } /* Literal.Number */ .s { color: #BA2121 } /* Literal.String */ .na { color: #7D9029 } /* Name.Attribute */ .nb { color: #008000 } /* Name.Builtin */ .nc { color: #0000FF; font-weight: bold } /* Name.Class */ .no { color: #880000 } /* Name.Constant */ .nd { color: #AA22FF } /* Name.Decorator */ .ni { color: #999999; font-weight: bold } /* Name.Entity */ .ne { color: #D2413A; font-weight: bold } /* Name.Exception */ .nf { color: #0000FF } /* Name.Function */ .nl { color: #A0A000 } /* Name.Label */ .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */ .nt { color: #008000; font-weight: bold } /* Name.Tag */ .nv { color: #19177C } /* Name.Variable */ .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */ .w { color: #bbbbbb } /* Text.Whitespace */ .mf { color: #666666 } /* Literal.Number.Float */ .mh { color: #666666 } /* Literal.Number.Hex */ .mi { color: #666666 } /* Literal.Number.Integer */ .mo { color: #666666 } /* Literal.Number.Oct */ .sb { color: #BA2121 } /* Literal.String.Backtick */ .sc { color: #BA2121 } /* Literal.String.Char */ .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */ .s2 { color: #BA2121 } /* Literal.String.Double */ .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */ .sh { color: #BA2121 } /* Literal.String.Heredoc */ .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */ .sx { color: #008000 } /* Literal.String.Other */ .sr { color: #BB6688 } /* Literal.String.Regex */ .s1 { color: #BA2121 } /* Literal.String.Single */ .ss { color: #19177C } /* Literal.String.Symbol */ .bp { color: #008000 } /* Name.Builtin.Pseudo */ .vc { color: #19177C } /* Name.Variable.Class */ .vg { color: #19177C } /* Name.Variable.Global */ .vi { color: #19177C } /* Name.Variable.Instance */ .il { color: #666666 } /* Literal.Number.Integer.Long */ocr-1.2.2/doc/html/gamera.toolkits.ocr.classes.Page.html0000644000175000017500000002154712401076126023702 0ustar dalitzdalitz00000000000000 class Page

class Page

Last modified: September 01, 2014

Page

In module gamera.toolkits.ocr.classes

The Page object offers the page segmentation functionality by providing a segment method. See its documentation for more information on how to overwrite specific steps of the segmentation process.

After the call of segment, the segmentation results are stored in the following attributes of Page:

textlines
List of Textline objects representing all text lines
img
The image to which Ccs in the textlines refer.

__init__

The only required argument in the constructor is the image that is to be segmented. Note that the constructor does not do the segmentation; for this, you must call the segment method.

Signature:

init (image, glyphs=None, classify_ccs=None)

with

image:
The image to be segmented.
glyphs:
An optional list of connected components representing the characters in the image. In general, this is not needed, but it can be useful for bottom up methods starting from already detected characters (e.g. by Gamera's classification based character grouping.
classify_ccs:
A callable class with the same interface as ClassifyCCs. If given, it will be called during the segmentation process, right after the splitting of lines to characters.

segment

Segments Page.img and stores the result in Page.textlines. This method has no arguments.

It calls the following methods in the given order:

  • page_to_lines for splitting the page into segments representing text lines
  • order_lines for sorting the lines into reading order
  • lines_to_chars for splitting all lines into characters
  • Page.classify_ccs when it is set, i.e., has been passed to the constructor (default is that it is not set)
  • chars_to_words for grouping the characters to words

By overwriting one (or several) of the above functions, you can replace specific steps of the segmentation process with custom algorithms.

page_to_lines

Splits the image into segments representing text lines. This method has no arguments.

The current implementation simply calls the bbox_merging plugin from the Gamera core with Ey=0, such that the page is not split into paragraphs, but into lines.

The segmentation result is stored in the variable Page.ccs_lines, which is a list of the data type Cc, i.e., with each segment (line) represented by a different label in the image. This is the interface used by all page segmentation plugins in the Gamera core.

Note

When you overwrite this method, make sure that write the segmentation result to self.ccs_lines. This member variable will then be further processed by lines_to_chars.

order_lines

Sorts the segments in Page.ccs_lines into reading order. This method has no arguments.

The current implementation uses the plugin textline_reading_order from the Gamera core.

lines_to_chars

Splits text lines into characters. Signature:

lines_to_chars (lines=None)

lines must be a list of Cc data types, each of them representing a text line. When not given (default), Page.ccs_lines is used instead. The current implementation calls get_line_glyphs as defined in the module ocr_toolkit.

The result is stored in Page.textlines; the characters are stored for each textline in Textline.glyphs.

chars_to_words

Groups the characters in each Textline from Page.textlines to words and stores the result for each Textline in the property Textline.words.

This method has an optional but generally useless argument for the list of textlines. It is therefore usually called without arguments.

The current implementation calls chars_make_words as defined in the module ocr_toolkit.

show_lines

Returns an RGB image with all segmented text lines marked by hollow rects. Makes only sense after page_to_lines (or segment) has been called.

show_glyphs

Returns an RGB image with all segmented/grouped characters marked by hollow rects. Makes only sense after lines_to_chars (or segment) has been called.

show_words

Returns an RGB image with all grouped words marked by hollow rects. Makes only sense after chars_to_words (or segment) has been called..

ocr-1.2.2/doc/html/functions.html0000644000175000017500000002633211716171512017516 0ustar dalitzdalitz00000000000000 OCR Toolkit: Global Functions

OCR Toolkit: Global Functions

Last modified: June 08, 2010

The toolkit defines a number of free function which are not image methods. These are defined in ocr_toolkit.py and can be imported in a python script with

from gamera.toolkits.ocr.ocr_toolkit import *

Output text generation

While the class Page splits the image into Textline objects and possibly classifies the characters, it does not generate an output string. For this purpose, you can use the function textline_to_string.

textline_to_string

Returns a unicode string of the text in the given Textline.

Signature:

textline_to_string (textline, heuristic_rules="roman", extra_chars_dict={})

with

textline:
A Textline object containing the glyphs. The glyphs must already be classified.
heuristic_rules:

Depending on the alphabeth, some characters can very similar and need further heuristic rules for disambiguation, like apostroph and comma, which have the same shape and only differ in their position relative to the baseline.

When set to "roman", several rules specific for latin alphabeths are applied.

extra_chars_dict
A dictionary of additional translations of classnames to character codes. This is necessary when you use class names that are not unicode names. Will be passed to return_char.

As this function uses return_char, the class names of the glyphs in textline must corerspond to unicode character names, as described in the documentation of return_char.

return_char

Converts a unicode character name to a unicode symbol.

Signature:

return_char (classname, extra_chars_dict={})

with

classname:
A class name derived from a unicode character name. Example: latin.small.letter.a returns the character a.
extra_chars_dict
A dictionary of additional translations of classnames to character codes. This is necessary when you use class names that are not unicode names. The character 'code' does not need to be an actual code, but can be any string. This can be useful, e.g. for ligatures:
return_char(glyph.get_main_id(), {'latin.small.ligature.st':'st'})

classname must correspond to the standard unicode character names, as in the examples of the following table:

Character Unicode Name Class Name
! EXCLAMATION MARK exclamation.mark
2 DIGIT TWO digit.two
A LATIN CAPITAL LETTER A latin.capital.letter.a
a LATIN SMALL LETTER A latin.small.letter.a

chars_make_words

Groups the given glyphs to words based upon the horizontal distance between adjacent glyphs.

Signature:
chars_make_words (glyphs, threshold=None)

with

glyphs:
A list of Cc data types, each of which representing a character. All glyphs must stem from the same single line of text.
threshold:
Horizontal white space greater than threshold will be considered a word separating gap. When None, the threshold value is calculated automatically as 2.5 times teh median white space between adjacent glyphs.

The result is a nested list of glyphs with each sublist representing a word. This is the same data structure as used in Textline.words

Segmentation

These functions are used in the segmentation methods of class Page. You will generally not need to call them, unless you are implementing a custom segmentation method.

get_line_glyphs

Splits image regions representing text lines into characters.

Signature:

get_line_glyphs (image, segments)

with

image:
The document image that is to be further segmentated. It must contin the same underlying image data as the second argument segments
segments:
A list Cc data types, each of which represents a text line region. The image views must correspond to image, i.e. each pixels has a value that is the unique label of the text line it belongs to. This is the interface used by the plugins in the "PageSegmentation" section of the Gamera core.

The result is returned as a list of Textline objects.

show_bboxes

Returns an RGB image with bounding boxes of the given glyphs as hollow rects. Useful for visualization and debugging of a segmentation.

Signature:

show_bboxes (image, glyphs)

with:

image:
An image of the textdokument which has to be segmentated.
glyphs:
List of rects which will be drawn on image as hollow rects. As all image types are derived from Rect, any image list can be passed.
ocr-1.2.2/doc/html/classes.html0000644000175000017500000000310512401076126017131 0ustar dalitzdalitz00000000000000 Classes

Classes

Last modified: September 01, 2014

Contents

Alphabetical

C

ClassifyCCs (gamera.toolkits.ocr.classes.ClassifyCCs)

H

hocrPage (gamera.toolkits.ocr.classes.hocrPage)

P

Page (gamera.toolkits.ocr.classes.Page)

T

Textline (gamera.toolkits.ocr.classes.Textline)

ocr-1.2.2/doc/html/gamera.toolkits.ocr.classes.Textline.html0000644000175000017500000000752712401076126024624 0ustar dalitzdalitz00000000000000 class Textline

class Textline

Last modified: September 01, 2014

Textline

In module gamera.toolkits.ocr.classes

The Textline object stores information about a text line in its following properties:

bbox
A Rect object representing the bounding box of the text line.
glyphs
A list of Cc objects, each representing a character in the line.
words
A nested list of Cc objects, where each sublist represents the characters of a single word.

__init__

Signature:

init (bbox, glyphs=None)

with

bbox:
Rect object representing position and size of the text line
glyphs:
A list of Cc objects representing the characters in the text line

add_glyph

Adds the given glyph to the Textline. Signature:

add_glyph (glyph, extend=True)

When extend is True, the text line bounding box bbox is extended by the glyph's bounding box.

add_glyphs

Adds the given glyphs to the Textline. Signature:

add_glyphs (glyphs, extend=True)

When extend is True, the text line bounding box bbox is extended by the union of the glyphs' bounding boxes.

sort_glyphs

Sorts the characters in Textline.glyphs from left to right.

ocr-1.2.2/doc/html/index.html0000644000175000017500000003754312353015016016615 0ustar dalitzdalitz00000000000000 OCR toolkit for Gamera

OCR toolkit for Gamera

Last modified: June 26, 2014

Editor:Rene Baston, Christoph Dalitz
Version:1.1.0

Use the 'Addons' section on the Gamera home page for access to file releases of this toolkit.

Overview

The purpose of the OCR Toolkit is to help building optical character recognition (OCR) systems for standard text documents. Even though it can be used as is, it is specifically designed to make individual steps of the recognition system customizable and replacable. The toolkit is based on and requires the Gamera framework for document analysis and recognition. As an addon package for Gamera, it provides

  • python library functions for building a custom OCR system
  • a ready-to-run python script ocr4gamera which acts as a basic OCR-system

A comprehensive overview of design, usage and customization of the OCR toolkit can be found in the paper

C. Dalitz, R. Baston: Optical Character Recognition with the Gamera Framework. In C. Dalitz (Ed.): "Document Image Analysis with the Gamera Framework." Schriftenreihe des Fachbereichs Elektrotechnik und Informatik, Hochschule Niederrhein, vol. 8, pp. 53-65, Shaker Verlag (2009)

The recognition process

Optical character recognition (OCR) means the extraction of a machine readable text code from bitmap images of text documents. This process typically consists of the following steps:

Preprocessing:
Includes binarization, skew correction, image enhancement, text/graphics separation
Segmentation:
Segmentation of the page in text lines (page segmentation) and characters (character segmentation)
Classification:
Identification of the individual characters
Postprocessing:
Includes the generation of the output string and maybe detection and correction of possible errors

The OCR toolkit only covers the process from segmentation to postprocessing. For preprocessing, the standard routines shipped with Gamera must be used beforehand, e.g. rotation_angle_projections for skew correction, or despeckle for noise removal.

For classification, the kNN classifier shipped with Gamera must be used. This means in particular, that you must train some sample pages before doing the classification. At present, the toolkit does not include training databases for common fonts.

Provided Components

The toolkit consists of two python modules, a plugin image function and one end user application.

The modules are

  • classes which contains all class definitions
  • ocr_toolkit for global functions used across the classes

The end user application is

  • ocr4gamera.py is a script that acts as a basic OCR-system

There is also one image plugin bbox_seg for textline segmentation which is simply a wrapper around the Gamera core plugin bbox_segmentation.

Limitations

As the segmentation of the individual characters is based on a connected component analysis, the toolkit cannot deal with touching characters, unless they have been trained as ligaturae. It is therefore in general only applicable to printed documents, rather than handwritten documents.

From a user's perspective, there are some points to beware in this toolkit:

  • It does not provide methods for text/graphics separation. Hopefully, some generic methods for this purpose will be added at some point in the Gamera core.
  • It does not provide prototypes of latin characters. This means that characters must be trained on sample pages before using the toolkit.
  • The standard page segmentation algorithm for textline separation is currently very basic.

User's Manual

This documentation is written for those who want to use the toolkit for OCR, but are not interested in extending the toolkit itself.

Developer's Manual

This documentation is for those who want to extend the functionality of the OCR toolkit, or who want to customize specific steps of the recognition process.

  • Developer's manual: describes how to customize the recognition process
  • Classes: reference for the classes involved in the segmentation process. These are:
    • Page for doing the page segmentation
    • Textline for storing the segmentation result within Page
    • ClassifyCCs for (optionally) doing the classification during page segmentation
  • Functions: the global functions defined by the toolkit
  • Plugins: Reference for the plugin functions shipped with this toolkit

Installation

We have only tested the toolkit on Linux and MacOS X, but as the toolkit is written entirely in Python, the following instructions should work for any operating system.

Prerequisites

First you will need a working installation of Gamera 3.x. See the Gamera website for details. It is strongly recommended that you use a recent version, preferably from SVN.

If you want to generate the documentation, you will need two additional third-party Python libraries:

  • docutils for handling reStructuredText documents.
  • pygments for colorizing source code.

Note

It is generally not necessary to generate the documentation because it is included in file releases of the toolkit.

Building and Installing

To build and install this toolkit, go to the base directory of the toolkit distribution and run the setup.py script as follows:

# 1) compile
python setup.py build

# 2) install
sudo python setup.py install

Command 1) compiles the toolkit from the sources and command 2) installs it. As the latter requires root privilegue, you need to use sudo on Linux and MacOS X. On Windows, sudo is not necessary.

Note that the script ocr4gamera is installed into /usr/bin on Linux, but into /System/Library/Frameworks/Python.framework/Versions/2.x/bin on MacOS X. As the latter directory is not in the standard search path, you could either add it to your search path, or install the scripts additionally into /usr/bin on MacOS X with:

# install scripts into standard path (MacOS X only)
sudo python setup.py install_scripts -d /usr/bin

If you want to regenerate the documentation, go to the doc directory and run the gendoc.py script. The output will be placed in the doc/html/ directory. The contents of this directory can be placed on a webserver for convenient viewing.

Note

Before building the documentation you must install the toolkit. Otherwise gendoc.py will not find the plugin documentation.

Installing without root privileges

The above installation with python setup.py install will install the toolkit system wide and thus requires root privileges. If you do not have root access (Linux) or are no sudoer (MacOS X), you can install the MusicStaves toolkit into your home directory. Note however that this also requires that Gamera is installed into your home directory. It is currently not possibole to install Gamera globally and only toolkits locally.

Here are the steps to install both Gamera and the OCR toolkit into ~/python:

# install Gamera locally
mkdir ~/python
python setup.py install --prefix=~/python

# build and install the OCR toolkit locally
export CFLAGS=-I~/python/include/python2.3/gamera
python setup.py build
python setup.py install --prefix=~/python

Moreover you should set the following environment variables in your ~/.profile:

# search path for python modules
export PYTHONPATH=~/python/lib/python

# search path for executables (eg. gamera_gui)
export PATH=~/python/bin:$PATH

Uninstallation

The installation uses the Python distutils, which do not support uninstallation. Thus you need to remove the installed files manually:

  • the installed Python library files of the toolkit
  • the installed standalone scripts

Python Library Files

All python library files of this toolkit are installed into the gamera/toolkits/ocr subdirectory of the Python library folder. Thus it is sufficient to remove this directory for an uninstallation.

Where the python library folder is depends on your system and python version. Here are the folders that you need to remove on MacOS X and Debian Linux ("2.3" stands for the python version; replace it with your actual version):

  • MacOS X: /Library/Python/2.3/gamera/toolkits/ocr
  • Debian Linux: /usr/lib/python2.3/site-packages/gamera/toolkits/ocr

Standalone Scripts

The standalone scripts are installed into /usr/bin (linux) or /System/Library/Frameworks/Python.framework/Versions/2.3/bin (MacOS X), unless you have explicitly chosen a different location with the options --prefix or --home during installation.

For an uninstall, remove the following script:

  • ocr4gamera.py

Note

In older versions (1.0.0 and 1.0.1) this script was named ocr4gamera. Remove this old script, if you are upgrading from one of these versions.

About this documentation

The documentation was written by Rene Baston and Christoph Dalitz. Permission is granted to copy, distribute and/or modify this documentation under the terms of the Creative Commons Attribution Share-Alike License (CC-BY-SA) v3.0. In addition, permission is granted to use and/or modify the code snippets from the documentation without restrictions.

ocr-1.2.2/doc/html/ocr_toolkit.html0000644000175000017500000002270411716171512020035 0ustar dalitzdalitz00000000000000 ocr_toolkit_py

ocr_toolkit_py

Last modified: September 15, 2009

Functions

return_char

Returns a result character for building the string.

Signature:
return_char(unicode_str)

with

unicode_str:
This expeted string has to be in unicode format, e.g. latin.small.letter.a will return the character a The returned character should be used for creating the result string.

chars_make_words

Splits the amount of grouped characters which has been detected in a textline to single words.

Signature:
chars_make_words(lines_glyphs,threshold=None)

with

lines_glyphs:
lines_glyphs has to be the list of connected-components which represents the amount of characters for a textline.
threshold:
For splitting the amount of characters to single words is a threshold needed. If two characters have more or equal empty space between each other there is a white space detected. The default value for this parameter is None which will make the function calculate a threshold value automatic

The Textline Objects keep a word list as an attribute which expects a list like this functions return value.

textline_to_string

Gives a full text string as result.

Signature:
textline_to_string(line, heuristic_rules="roman")

with

line:
The Textline Object which keeps the characters.
heuristic_rules:
Some classified characters need some further heuristic classification rules. Take the apostroph as an example which might get classified as a comma but is placed at the top of a textline. Therefore the apostroph can be classified "manual" as a comma. On default this function includes some rules for often noticed classification errors for _roman_ alphabet.

The result of this function can be used as a final result as this is the full text string.

check_upper_neighbors

Check two glyphs for beeing grouped to one single character. This function is for unit connected-components like quotation marks.

Signature:
check_upper_neighbors(item,glyph,line)

with

item:
Some connected-component.
glyph:
Some connected-component.
line:
The Textline Object which includes item and glyph

There is returned an array with two elements. The first element keeps a list of characters (images that has been united to a single image) and the second image is a list of characters which has to be removed as these have been united to a single character.

check_glyph_accent

Check two glyphs for beeing grouped to one single character. This function is for unit connected-components like i, j or colon.

Signature:
check_glyph_accent(item,glyph)

with

item:
Some connected-component.
glyph:
Some connected-component.

There is returned an array with two elements. The first element keeps a list of characters (images that has been united to a single image) and the second image is a list of characters which has to be removed as these have been united to a single character.

get_line_glyphs

Segmentates the glyphs which are included in every single textline with simple rules.

Signature:
get_line_glyphs(image,textlines)

with

image:
The textdocument image which is beeing segmentated.
textlines:
A list of connected-components which keeps every textline of the document.

A Page Object has a list named textlines which should include Textline Objects. This list is filled within this function as it is called in a Page method.

show_bboxes

Draws hollow rects in an copy of image based on the rects in the glyphs list. If the save parameter is set to 1 a file with the name of filename will be created.

Signature:
show_bboxes(image,glyphs,filename=\"segmenated_glyphs.PNG\",save=1)

with:

image:
An image of the textdokument which has to be segmentated.
glyphs:
A list of rects which will be drawn on image as hallow rects.
filename:
A filename for the image file that might be created.
save:
On default save is set to 1 which will cause the function to create a new image file named filename. If save is set to 0 the function will try to display the image on-the-fly in a box.

This function is usefull for debugging or for getting information about the segmentation process. The Page object useses this function in three methods for displaying the segmentation of textlines, all single characters and all detected words.

ocr-1.2.2/doc/html/gamera.toolkits.ocr.classes.ClassifyCCs.html0000644000175000017500000000774112401076126025174 0ustar dalitzdalitz00000000000000 class ClassifyCCs

class ClassifyCCs

Last modified: September 01, 2014

ClassifyCCs

In module gamera.toolkits.ocr.classes

This is a callable class that can optionally be passed to the constructor of Page, so that it will be called during the segmentation process.

Its standard definition should generally be sufficient for using a kNN classifier. Should you need to write your own classification function (e.g. one that additionally uses heuristic rules for classification), make sure that you overwrite the __call__ method with the same signature.

For fine tuning the classification, the follwoing attributes can be used:

knn
The knn classifier; this is passed in the constructor
parts_to_group
Corresponds to max_parts_per_group in kNNInteractive.group_list_automatic. Default value is 3.
grouping_distance
Corresponds to the distance argument of the grouping_function in kNNInteractive.group_list_automatic. Only CCs closer than this distance are considered for grouping. Default value is -1, which means that it will be calculated automatically as in __call__.

__init__

Signature:

__init__ (knn)

where knn is a kNN classifier which has already loaded training data.

__call__

This method will be called in Page.segment. Signature:

__call__ (ccs)

where ccs is the list of glyphs that is to be classified. See the documentation of Gamera's classifier API how the classification result is stored in the glpyhs.

How the classification is done is controled by the following attributes of ClassifyCCs:

  • When parts_to_group > 1, the classification is done with Gamera's grouping algorithm; otherwise no grouping of broken characters is done.
  • In case of grouping, the property distance is passed to the grouping function. When it is -1 (default), it is set to the median height of the ccs.
ocr-1.2.2/doc/html/gamera.toolkits.ocr.classes.hocrPage.html0000664000175000017500000000420312401076126024546 0ustar dalitzdalitz00000000000000 class hocrPage

class hocrPage

Last modified: September 01, 2014

Contents

hocrPage

In module gamera.toolkits.ocr.classes

A class derived from Page that overrides the page_to_lines method. Instead of bbox_merging, page_to_lines reads the segmentation information from a hOCR file for textline detection.

__init__

Like Page.__init__, but with the additional obligatory argument hocr_in_path for the name of a hOCR file from which the textline segmentation is read. Note that the constructor does not do the segmentation; for this, you must call the segment method.

Signature:

init (image, hocr_in_path, glyphs=None, classify_ccs=None)
ocr-1.2.2/doc/html/html4css1.css0000644000175000017500000001301512401076126017143 0ustar dalitzdalitz00000000000000/* :Author: David Goodger :Contact: goodger@users.sourceforge.net :Date: $Date: 2007/08/10 14:44:25 $ :Revision: $Revision: 1.1 $ :Copyright: This stylesheet has been placed in the public domain. Default cascading style sheet for the HTML output of Docutils. See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to customize this style sheet. */ /* used to remove borders from tables and images */ .borderless, table.borderless td, table.borderless th { border: 0 } table.borderless td, table.borderless th { /* Override padding for "table.docutils td" with "! important". The right padding separates the table cells. */ padding: 0 0.5em 0 0 ! important } .first { /* Override more specific margin styles with "! important". */ margin-top: 0 ! important } .last, .with-subtitle { margin-bottom: 0 ! important } .hidden { display: none } a.toc-backref { text-decoration: none ; color: black } blockquote.epigraph { margin: 2em 5em ; } dl.docutils dd { margin-bottom: 0.5em } /* Uncomment (and remove this text!) to get bold-faced definition list terms dl.docutils dt { font-weight: bold } */ div.abstract { margin: 2em 5em } div.abstract p.topic-title { font-weight: bold ; text-align: center } div.admonition, div.attention, div.caution, div.danger, div.error, div.hint, div.important, div.note, div.tip, div.warning { margin: 2em ; border: medium outset ; padding: 1em } div.admonition p.admonition-title, div.hint p.admonition-title, div.important p.admonition-title, div.note p.admonition-title, div.tip p.admonition-title { font-weight: bold ; font-family: sans-serif } div.attention p.admonition-title, div.caution p.admonition-title, div.danger p.admonition-title, div.error p.admonition-title, div.warning p.admonition-title { color: red ; font-weight: bold ; font-family: sans-serif } /* Uncomment (and remove this text!) to get reduced vertical space in compound paragraphs. div.compound .compound-first, div.compound .compound-middle { margin-bottom: 0.5em } div.compound .compound-last, div.compound .compound-middle { margin-top: 0.5em } */ div.dedication { margin: 2em 5em ; text-align: center ; font-style: italic } div.dedication p.topic-title { font-weight: bold ; font-style: normal } div.figure { margin-left: 2em ; margin-right: 2em } div.footer, div.header { clear: both; font-size: smaller } div.line-block { display: block ; margin-top: 1em ; margin-bottom: 1em } div.line-block div.line-block { margin-top: 0 ; margin-bottom: 0 ; margin-left: 1.5em } div.sidebar { margin-left: 1em ; border: medium outset ; padding: 1em ; background-color: #ffffee ; width: 40% ; float: right ; clear: right } div.sidebar p.rubric { font-family: sans-serif ; font-size: medium } div.system-messages { margin: 5em } div.system-messages h1 { color: red } div.system-message { border: medium outset ; padding: 1em } div.system-message p.system-message-title { color: red ; font-weight: bold } div.topic { margin: 2em } h1.section-subtitle, h2.section-subtitle, h3.section-subtitle, h4.section-subtitle, h5.section-subtitle, h6.section-subtitle { margin-top: 0.4em } h1.title { text-align: center } h2.subtitle { text-align: center } hr.docutils { width: 75% } img.align-left { clear: left } img.align-right { clear: right } ol.simple, ul.simple { margin-bottom: 1em } ol.arabic { list-style: decimal } ol.loweralpha { list-style: lower-alpha } ol.upperalpha { list-style: upper-alpha } ol.lowerroman { list-style: lower-roman } ol.upperroman { list-style: upper-roman } p.attribution { text-align: right ; margin-left: 50% } p.caption { font-style: italic } p.credits { font-style: italic ; font-size: smaller } p.label { white-space: nowrap } p.rubric { font-weight: bold ; font-size: larger ; color: maroon ; text-align: center } p.sidebar-title { font-family: sans-serif ; font-weight: bold ; font-size: larger } p.sidebar-subtitle { font-family: sans-serif ; font-weight: bold } p.topic-title { font-weight: bold } pre.address { margin-bottom: 0 ; margin-top: 0 ; font-family: serif ; font-size: 100% } pre.literal-block, pre.doctest-block { margin-left: 2em ; margin-right: 2em ; background-color: #eeeeee } span.classifier { font-family: sans-serif ; font-style: oblique } span.classifier-delimiter { font-family: sans-serif ; font-weight: bold } span.interpreted { font-family: sans-serif } span.option { white-space: nowrap } span.pre { white-space: pre } span.problematic { color: red } span.section-subtitle { /* font-size relative to parent (h1..h6 element) */ font-size: 80% } table.citation { border-left: solid 1px gray; margin-left: 1px } table.docinfo { margin: 2em 4em } table.docutils { margin-top: 0.5em ; margin-bottom: 0.5em; background-color: #f7fffd; border-color: #72ada8; border: solid thin #aaaaaa; } table.footnote { border-left: solid 1px black; margin-left: 1px } table.docutils td, table.docutils th, table.docinfo td, table.docinfo th { padding-left: 0.5em ; padding-right: 0.5em ; vertical-align: top } table.docutils th.field-name, table.docinfo th.docinfo-name { font-weight: bold ; text-align: left ; white-space: nowrap ; } td.field-body, th.field-name { padding: 0.5em; border: solid thin #aaaaaa; } h1 tt.docutils, h2 tt.docutils, h3 tt.docutils, h4 tt.docutils, h5 tt.docutils, h6 tt.docutils { font-size: 100% } tt.docutils { } ul.auto-toc { list-style-type: none } ocr-1.2.2/doc/html/ocr.html0000644000175000017500000000752211716171512016271 0ustar dalitzdalitz00000000000000 OCR

OCR

Last modified: May 28, 2010

segmentation

bbox_mcmill

[object] bbox_mcmill ([object glyphs], float section_search_size = 1.00, float noise_mltplk = 1.00, float large_mltplk = 20.00, float stdev_mltplk = 5.00)

Operates on:Image [OneBit]
Returns:[object]
Category:OCR/segmentation
Defined in:bbox_merging_mcmillan.py
Author:Robert Butz, Karl MacMillan

Returns the textlines from image as connected components. The segmentation method is adapted from McMillan's segmentation method in roman_text.py. It allows a more individual segmentation through parameterization.

Options:

glyphs:
This list can be build out of a cc_analysis. On default, this parameter is blank, which will cause the function to call cc_analysis itself.
section_search_size
This optional parameter adjusts the calculated avg_glyph_size by multipling its value (default=1).
noise_mltplk
With this optional parameter one can adjust the noise_recognition rate independently from the calculated avg_glyph_size (default = 1). Values greater than 1 let the noise_removal detect bigger noise (but maybe even glyphs). Chose smaller values to avoid assigning small glyphs to noise.
large_mltplk
Analog to noise_mltplk one can set this parameter to manipulate the recognition of very large ccs according to the avg_glyph_size (default=20). Higher values lead to a better acceptance of above-average ccs. Beneficial, for example for big capital initials at the beginning of paragraphs such as seen in bibles.
stdev_mltplk
This parameter affects the line finding algorithm by excluding abnormally tall glyphs (default=5). The standard deviation will be calculated and multiplied by this parameter.
ocr-1.2.2/doc/html/default.css0000644000175000017500000001104412401076126016745 0ustar dalitzdalitz00000000000000@import url(html4css1.css); @import url(pygments.css); body { margin: 2em 2em 2em 2em; background-color: #effffd; } a.toc-backref { text-decoration: none ; color: black } h1 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 4px } h2 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 2px } h3 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 1px } h4 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 1px } h5 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 0.5px } div.code-block, div.highlight { margin-left: 2em; margin-right: 2em; background-color: #f0f0e0; font-family: "Andale Mono", "Bitstream Vera Sans Mono", monospace; border-color: #e0e0d0; border-style: solid; border-width: 1px; font-size: 10pt; padding: 1em; } /* The following is for SilverCity syntax highlighting */ .code_default { FONT-FAMILY: "Andale Mono", "Bitstream Vera Sans Mono", monospace; FONT-SIZE: 10pt; } .c_character { color: olive; } .c_comment { color: green; font-style: italic; } .c_commentdoc { color: green; font-style: italic; } .c_commentdockeyword { color: navy; font-weight: bold; } .c_commentdockeyworderror { color: red; font-weight: bold; } .c_commentline { color: green; font-style: italic; } .c_commentlinedoc { color: green; font-style: italic; } .c_default { } .c_identifier { color: black; } .c_number { color: #009999; } .c_operator { color: black; } .c_preprocessor { color: navy; font-weight: bold; } .c_regex { color: olive; } .c_string { color: olive; } .c_stringeol { color: olive; } .c_uuid { color: olive; } .c_verbatim { color: olive; } .c_word { color: navy; font-weight: bold; } .c_word2 { color: navy; font-weight: bold; } .h_asp { color: #ffff00; } .h_aspat { color: #ffdf00; } .h_attribute { color: #008080; } .h_attributeunknown { color: #ff0000; } .h_cdata { color: #ffdf00; } .h_comment { color: #808000; } .h_default { } .h_doublestring { color: olive; } .h_entity { color: #800080; } .h_number { color: #009999; } .h_other { color: #800080; } .h_script { color: #000080; } .h_singlestring { color: olive; } .h_tag { color: #000080; } .h_tagend { color: #000080; } .h_tagunknown { color: #ff0000; } .h_xmlend { color: #0000ff; } .h_xmlstart { color: #0000ff; } .pl_array { color: black; } .pl_backticks { color: olive; } .pl_character { color: olive; } .pl_commentline { color: green; font-style: italic; } .pl_datasection { color: olive; } .pl_default { } .pl_error { color: red; font-weight: bold; } .pl_hash { color: black; } .pl_here_delim { color: olive; } .pl_here_q { color: olive; } .pl_here_qq { color: olive; } .pl_here_qx { color: olive; } .pl_identifier { color: black; } .pl_longquote { color: olive; } .pl_number { color: #009999; } .pl_operator { color: black; } .pl_pod { color: black; font-style: italic; } .pl_preprocessor { color: navy; font-weight: bold; } .pl_punctuation { color: black; } .pl_regex { color: olive; } .pl_regsubst { color: olive; } .pl_scalar { color: black; } .pl_string { color: olive; } .pl_string_q { color: olive; } .pl_string_qq { color: olive; } .pl_string_qr { color: olive; } .pl_string_qw { color: olive; } .pl_string_qx { color: olive; } .pl_symboltable { color: black; } .pl_word { color: navy; font-weight: bold; } .p_character { color: olive; } .p_classname { color: blue; font-weight: bold; } .p_commentblock { color: gray; font-style: italic; } .p_commentline { color: green; font-style: italic; } .p_default { } .p_defname { color: #009999; font-weight: bold; } .p_identifier { color: black; } .p_number { color: #009999; } .p_operator { color: black; } .p_string { color: olive; } .p_stringeol { color: olive; } .p_triple { color: olive; } .p_tripledouble { color: olive; } .p_word { color: navy; font-weight: bold; } .yaml_comment { color: #008800; font-style: italic; } .yaml_default { } .yaml_document { color: #808080; font-style: italic; } .yaml_identifier { color: navy; font-weight: bold; } .yaml_keyword { color: #880088; } .yaml_number { color: #880000; } .yaml_reference { color: #008888; } ocr-1.2.2/doc/html/pagesegmentation.html0000644000175000017500000000700612401076126021032 0ustar dalitzdalitz00000000000000 PageSegmentation

PageSegmentation

Last modified: September 01, 2014

Contents

bbox_mcmillan

[object] bbox_mcmillan ([object glyphs] = None, float section_search_size = 1.00, float noise_mltplk = 1.00, float large_mltplk = 20.00, float stdev_mltplk = 5.00)

Operates on:Image [OneBit]
Returns:[object]
Category:PageSegmentation
Defined in:bbox_merging_mcmillan.py
Author:Robert Butz, Karl MacMillan

Returns the textlines in an image as connected components. The segmentation method is adapted from McMillan's segmentation method in roman_text.py. It allows a more individual segmentation through parameterization.

Options:

glyphs:
This list can be build out of a cc_analysis. On default, this parameter is blank, which will cause the function to call cc_analysis itself.
section_search_size
This optional parameter adjusts the calculated avg_glyph_size by multipling its value (default=1).
noise_mltplk
With this optional parameter one can adjust the noise_recognition rate independently from the calculated avg_glyph_size (default = 1). Values greater than 1 let the noise_removal detect bigger noise (but maybe even glyphs). Chose smaller values to avoid assigning small glyphs to noise.
large_mltplk
Analog to noise_mltplk one can set this parameter to manipulate the recognition of very large ccs according to the avg_glyph_size (default=20). Higher values lead to a better acceptance of above-average ccs. Beneficial, for example for big capital initials at the beginning of paragraphs such as seen in bibles.
stdev_mltplk
This parameter affects the line finding algorithm by excluding abnormally tall glyphs (default=5). The standard deviation will be calculated and multiplied by this parameter.
ocr-1.2.2/doc/html/plugins.html0000644000175000017500000000275512401076126017167 0ustar dalitzdalitz00000000000000 Plugins ocr-1.2.2/doc/html/images/0000775000175000017500000000000012433405322016055 5ustar dalitzdalitz00000000000000ocr-1.2.2/doc/html/images/RGB_generic.png0000644000175000017500000014417412401076126020703 0ustar dalitzdalitz00000000000000‰PNG  IHDRä‚Ù˜ó pHYsêe¤ IDATxœ<¼WíÈ’..3I.SnÛÞÝs¦ïqƒ;ºF?Z?AzÐ £‘Æ×}º{›2ËÌÌ0zà>SõRÉ•ñ¹(ü?þ·ÿµÕÕ]ÜÝsÍ-j]/Ëüç?þëÏŸCûu©2”" $;t üõ7¿~8Þ‰Åò§_à¯O ‡Ñ_ßì^?¬ —¹™™Ö5¹áËÉ/ËååÉ´g\zr´ðžί_§ß}È÷‡Þj§OŸ>rΉR¡ÐÛ÷¯ó0D "‘›E8DD8@oÕ$SýËǧÿýÿüñbY=´¹ë¬óËû[ùýß½úí÷ßÜßߎ%D«ýåÇ>þåy óéôåóu("‚‰ÑÍ\ó>sâœRoÊLµë¿n«ÆŠÓáîîwÿþÃë¡ç”1¢÷ªæóÒÿòiþÃÏϧ)†ý~š ‘œÎËÇ/Ë:Ïe,oßî>¼Û½¿Ÿ2ÆA`?æ!q @@"d º®øÃüË¿ýGx$Iß¾{øÝo~½;ܘ{=?ÿòõ­Ý)‰–¦çyýçk¨ªÀćq¸ßþéŸþ—Ãíýùó__>þTkí­;‰çñÝwß¿~ó¦aÉf†®½÷öütž¯óíaúüÃøã‰iØþãóéÅ×§uIyúýï¾ýõ‡oÿþÕçzzúø§oµ·îû»»i*óå ’’¤¤]×ëÅT“ qÊÌ‚ÄÞZ{¾®çÅT)Q¹™vw»»‡ýû< ,LD]{b‘œÄ­#23ë¦u]kkÏŸúôó?Õniº™pH’3 w³óea‡—óé8L`èµyëîA)ëµöC¥<€+CtˆÖ„ }Àž\siÝÐÌçïnÓûWP2—a&DN¥¸)'IÂAn€„ˆîáæAD株ªkU{|žk‡ ps3µÖÃ`YÚå2 B` •B"½õ^  < Ñ=¬+!F@…± ïÞ¼ûÕ‡7w‡aÌ‚Ûý!¢º‚oo’ÛÜ„´{Õzº¬áÀ‡Aîn§©$íÞ‹ 2tL@LÄ¥ „„„ÇÃþx¼ˆ×·‡w¯_» A†T²j³µjFŒeíç¹GÀR-Yˆ¥ 1 p(ܵwNHáu]#Àˆ™‡aìç˵÷ÖÚ„Øz¯ÍjhÞÞî»aŸŠ·º^ÎO¿ü\×Lh­]ûÚ{#"BÔs'"¶n$¨µ¶óÜ×Ú!pL‘¹#ôÞÝ]{Eˆ$’˜DÝA…P<ˆ-`žç/Ÿ?úüñ§—ÇÎ^îÇQHÃQ k?®×Ëîd}usÇ‘ôe±¥®9ö=ÃEæç L6ÏK¸é²t †˜öûãáÀLèÑ–º\æõË)Dânù8äÞÚ0Ž¥”Z+!.kKäë²zx)<‘TÝL» ˜ƒ™Võçk.‘¹s’]¾™2õ«F Š0‹„[Êy<§ÃáåÓgëÁ¦– sÐpBZÖ–‰¨$$&ˆx};åÛwß~÷Ý4F03€’›m•¤µrø«Û±{||¼.µŸ–º4G&NB]9±GœÎK—àè\hH˜¾ŽI)m§Â…éÕ«‡ÿÆÌDûÝnœÆaÜÝÝI8u ìaáæk÷óª”r¦ÄÈ)í·7oÒ8Q*Jòw÷Ö†aÔ¶jo‘ÅÂÍL‰€$¥”sïÕT#Â"B­«vµq7|óîö8Ž™D[¿žžë2KJ½w& ëêæ¦ÀÛ¡jÝ»2¸·y®ëÚ›:ŒœJâ”™ZéLÖN9•QrA"3ED&B bssÕe]þôãÿü/ÿº¢î_O’“¹_j“Ìàîî5л=>Íçe¯Wö´žgì „¶¶áxh, ˆÁ×ùLœÞ½{{»gúÚëL­®ÕTëû×§Ç—ËZÅúGb f9쓪j75«×«>¼y HfÎ  fæ@KmkmnA„æaµû—sïFœC1óÓ®à˜|$õp3 gDB†q¸ûæ[)~ü §S›¯­DôC(¤AâTÃpÿêöávàHDîææ ËÚjÕ`A·±$mýÓÇ—k'Èc‰‰çE/V ÇÝH82 ‹ª²¤ð@waÁB¯î°”ÂÌD´ý9€·÷,²,Ër½‚õ®ºv­Ý‘Ò‡÷oöS8äôúÍ7÷7¯0 R)ãþXç¹­«{ [ôuç”$ç€àÄ@Âj΄„t­½5Õˆ‡ÛÝÍnÜå’ˆ´¶º,€Ì­UŒ3w›™ÈT‘(• ­v­ÝÕ€ ™raf@"bF÷¶6k‹ªPžŽ‡û‡~yþç?=>­€H”’¸c[{ͯ `ÊPÜ4¢’Œ6eF¢œLµÕ á‘svµå|>üx}ü¢à|?¥!ç<ŒeÚ•i¢Þš&R3SµùÚk-cN)C_"O»p3æ”Ä=Ì‚%q*±‘6í$!€7$–œvû<‰ù¬X«Î—5ÐÃ"†9@¨0IÎeš8%`J9÷Ö"‘©[8 ¦rwÜåÄnÀÁ"’’ “¤„´º;c¸ö^×$LL4GD"FpÛð" 4¹¹™vCI$èx}y^ÏgÂ3`Â0ws rïn«q(ðÖƒÄÑ…H(1©™”â­iïR $„z=_Ÿ¾oï˜IHMŸ_žüåg.”²8l\kÏàØ›]Nk„q"5·jèœ #râ”ñy†îaªNx ãÍÍ8Àíä@D RDDÄ&A GDJ~ä°§çËzY»>ŸÚËs‘2¥œS’2]{½Ô/ϧ¹öÌùþfÜMy^êãs}X»×fn®µ#…vŸÏ§ù †D’:„0C€Y÷pðÖÛõúøãÏ!4vÓ@ˆ’RÞíË´+YC™‰…D”¹×fÚ½£jT¸Œ‡;.š»»sJ"læ)Ë(‰· 5¼Î-'!&0!¦X›=¾,×Kíq#q9g!4Eˆèm½ö8?2¥HHR¦qØO”ˆ™|C„Éâþ–onŽ·ûq˜ÙTÉÙ¶–ïîˆÀ"y8e5'wÇörºÜ=Ô\2ƒ@¦DˆDu]—¥F¬ÝʘîoöSÆ4@ÓùôÖ #²0EŠæ¡fÖÝ¢_›V’$ $$YêéÚ˜Ò”¥¤@p3ÀðÖ›"ön½7Bp÷å|jË,B""Ýüñåå².»×#.k“)UÕV»ªå3[ç¶ž×q—ܽÍÝ=„Ù®Ú§8ÕÈahÁ©°ÈöÀˆnŽ´hxh×"D"D$"F‚ijó2õjó§g||¢ÞcŸˆ‰YX$" ‚öƒ¼˜^=Ü~xs[’´®O§ëŸ~>ýËž×YÃ’²°<_ôôtÙÏy¿K6˜°vu3ÓÞçU/—óãçZ/†FDá@„Ä"¥”!c!DÏÓÎ-- €™™‡ÞæiÏ,î¦FĈ,ćý0Ž9Çq‡t8$> àXRÔåºØÚû4•ùzµZo ÜíòëÛñ8B! ˆ™·¥Û¼DD°¬×ëüœó4 ‡IŠHJÓþÀã~g±ÛÓ4朢® ±$f 7í-8¥¯,Û½6m]BÕ‚ ˜0ÂU-‡áþæf’I ¯//ºÎLäêê}ØíxعY›g½,^µ­ªnnဠ̘‡Œ‰¬ë6mÁÃmÍ@ÈzÏIXR›—T€nª]3¨êåêC$š/óÇÏŸò^XxžÛõR1v­«r"@ìÍæç ܽ®]«åIÀ‘„Q‹ŒoïûóâË îfáàæˆ[cÝžY».󂈦% ÓˆLDÂLD¦}¿Û÷)if$³Ç`×ÓewDwÇá›÷¯onê~7MƒˆÈ˜¥$žJv³®íùÔëõ>ðsÍþ¼$üiÚÆã ‰ÔuñVûõjë¬m]ÛJa@ŒÁ"œó0”œ81:а?L7÷õº®—“ÍWRr@âáî÷ûW¯PØÝݬ.ëùå²?%%ä ¿Ù—,øréÜ ™ÕlÚ‡!_«’ðõ²¾T.(¬V[¿,ññÙ÷eýǜƜ¡4ƒêH4@Òrž ¬‚èëË:ŸOÃñ0wûÃqðp7É"̈¨]¢wó€íx„[xp*DÔ»“™i[k3SÌ)ÀÃMÿ~eL«ßwû»óúòÔ–ëÆÉÐ,œTû<ëRu©n‚LB`ž$•œÂ<ï‡ÝÝ¡.µ«€ zï½õ¨&E gI㈠óÒ.ë: §Œ¹#5%骧ËËÒæÝÍ@½/®æUm]ZžRë:ŸWsM…ÛÚ­Y™{3R–’ÊýÎö“Û:‚àÖ` BU{ëÖkSëJy(Ì ¢Jr¸½ @f>c´ºº5œ»k·ÞÔT‚Y’ ¾[Öu‡½»™—$¯ï÷ÿsɉðÿú?®¦uuààôãiÙËËa>ËÇ_š*8‚‚GëjfIÔ,„#`7–q7æœrÉ,e’a7ìm:®_úºñáþáæõ΢jaîjÚ:öÖRNR2è8 G4UõÞÇc¹;,ëÚNçµ)â›Ûòú†.Wû·?_ÏU?Ú«C¦Öôyn?uòëä„e?تà&%q¢ËéúòyíÚwR233K"âð´ð0ó˼\ç!!"„0Zw&Ad ·p&@433cáRÒ\uUGŒûÃnd¹~ùÜ./Œè.âfÞu}>1` `—Ä„Âäj êÀœö#îLJï>@˜u}úôi¹œ ˆ4ÖÓœ÷c\»_ BJåöÕ«éþ™D]/ë %râj^«’P7kMÍ<Â×s»>¯R¨-êêÃN˜É5À`ó4 EJN%^ߨӵ¶µõ2›xFxD1G«UÕ¸5].×”åp³ˆp߸’¤"e4Óó²òÍ®\÷˜;eâ @ðÂH)EKRíˆ8ŽDä\þûï¿5‹ÿç?_.½i×ÈŸ®Ößú±(‡˜1º`˜:F$$dB§LˆàÈ2îo¥L€„„Óþp¸»7ís&b3Õ®×óÕz'æi7n´ÝÍ7Ö¶AI)ç”%5ƒåZOçJ’9ç|ØOûƒýø©Oz^míŽ/§ù¯Oõps3 ™JÆa7µ¾V@f´µŸŸ%§c~,D"’sÎhæk—ùz¹\€…º¶¶¨‰y SGqSƒøJ%ˆ1˜9‰yÚï‚ëóË:dSÒy=ù ½ )m„Ì×Õ|³Ç”‡¶Y5ªÞÍ=„©÷ž$‘HD$óXN§Ö;!²° ÛÒ}U/Þ§}™nÞߎÃÝííáþuÞí]j­½·\RDôÞzï,ÔÍz7bêݯ/5<ú¢$Xv"‰Â2ËÝqÿúx?•QPìî€;ª—µÕ8çë"%¥Än°Ýý:¯uí1_ç‡WwB@LˆîËùZvF̧ —‘FÆìÚݘE" ¢w5sS#"&ŒW»=”øíÛ,øoyy>µymªñi™N SÒÚ¡À‘a¤H {ˆ00qâT2IÎe(ÓÄ©¤Ý-• Y$ !äd]­·0³u©Ëu~yz*9û!"D˜ªš/×ëååÅU‘®küôØ‚ä݃d¡µy3ÊÃÇ!•œ‰Þ%q  CYzœæõ§ÏËOýÔÊbm—Ò~(»B2¶¦M11¥ÄµöårÝßÞe_†a›Î­÷ÖúºÌ½®»1;abO©6]k'ts @"b €í‚YßPø¦ XÄPU/_>­çS¡ÝØj«Ëši(—4 yW$%"ÖÞ´µ^ëùå²Î ºSx)y‡<Œˆ`îyÚ±0,D©¦TÊ´ž®}moóñv¸} 2H€ Dt÷Ö•KRw5c$ 4wuïç^—†©pž„™ Âôáf÷öîáÍí›Â9Ìa¿Ó77üÓ燵.§yÜÌ›° ˆ›_±,ËÒz¥,D´9þˆ„¸ÛH”ÖlZ¯ïž_v7»|s“†=7™BÝ 6¤á†7ûá~÷þ»oîþúËéÏ?~ùr]««Ë¥ËBãIý%·ÛR´ ,¶¸@˜¹dÄT¸Lãá–sAܼrBFt· æ0sõºôZ÷»‰™ÑÁ4º{]gÕ>dffb&XV{™Ûý13óÚB … Ì™Y8ýåi}:-/672 ÿ|±¿×b&a_X,™ˆ­¯6õˆÕ[)¥d!í=ÜÝ]˜ÌCÔ*yuíj€ÀÄêÖZ_×e¾^ˆ‰R±¸0x;?ÓÚÉ#µ[½®,²»»nn†ÃMGFDòØ´5ŸOÏŸúëåñ)‘ò0umÆ)•2¬×……sÓ0ÃdëKï˜Õ\UU1 " „D ¢š (jnjªä­öÖtyZ"ï$—„nááàHŠoîÞݽI(Û€¦$ÃýÝþÝ+•ˆ íé¼Þ™zà8db¦ÑÍ´5b,9OÓˆDîF€àA% `Q%z#smAÔÝ÷"@LÞÜÝ XDX$©ª™D)¥Üï^ß}ÿíùçÏýxùrnµƒ0!‚Py¸»ÙÓEŸ¾!1"•a˜vûñx»»}He DØjÀ#H$ÜݺuíµÕuqÓRò¦ì ’÷· /§a|¤S#á2$!aÂPcBb"ÌI¬¹ªÇ_>]k뽃v1 D$2G"FWfÆÄRv޳»–’‘9"˜#€ˆÒ0„)R«+B€ˆì&ìu]{×’5Â7Øj¦æææÚšÖÚÖ&éë™f€~фɻ•Ãaw{óðjØXþ­ïüí7@§4L§ýÏóù"%Ç#§äà9sÎ9s™†R$ CÙØëÜ3GÄZ{뺙àˆAÐUYÙÝÍ#"™l5€Y„«·YKoq¿;~ÿÍ·w‡|8Øã“’­q3xøZkW=‡ã>%w‹RJ)'ߦ"‚Gl¾“*F0Ѹïî^žOç—óþ¸—”hÓW·@# `n‚½{¸åœ`(RßßL¿þ;×®9‰Gâ˜ðúó_¾\ÏŽ.‰9IÊeÜíŽoòá&•AD6G'b㇠j®­×Ú–µÕF) líoÎqNRÆ”SS38×n°5×iàÝL°Ô¶®ˆÌ <à¼èéÜç†7·;Ĩ îêfˆDsr"F(¹‹çÒúš„1,£›#1Äæw$DW63 dIˆ¡n¹ ‰œÌÂ= Ü1‘¬7°RÌ£0&WEFÆñøöíx¼MÓX†Ñ‰q»:±l”$ûÛ»qÐÖD„ÆÁ# ™ˆ˜ó0”iÌ2äa$hkÂÌM›ö1@Ü Ð<‚6V]/ë°Ï,¦¡‹bhdÌßøðöÕa13BŠpD@‘tw,^AK×ÇÇ/(8L¹==>ç’‰ ’Hm`“´"ܶc±É±¦D"²?îçël­»9ŠwGGf!Ší€ {D1ü§¬=4ÑѦÚ‘HÿftgAΩŒÃáîöæÕÛÝí«@–”˜ÜÍÝcS§Tm]×ë•…sør3@t&’2¤œYÍÍ¢¶&‚Ã$7CÀeîµBˆ –$ðù¹JNß¼*OçúxÒ®›¨ÆŒw{™ å„LIsš:ãÒf€iÜ圙…X˜È ‘©*3Å[À¤ÎWj»Ýԗ릿®ëšK"Äí̶Z­Õ’Y²ˆ ¶®èD)Ç·ïoÞ•i@BD!@¦Píf†Hˆ)b—ìi6†b€¦æfy†i'˜ó0:@±( ; 13¡Rbiµ¯lŽ`µum6?¯yJœÉº{s(ƒ Ôxÿæõýͯ‡Ã–š"I)G)9•´Ì5«ùåÓéåÌyJËr™/sÎ) ‡Åõå’Dö÷· áèàÛDh­r˜yœ¦ýa½^¯ç+nn8Qq÷M øÚü{ï[«‹Ø¼iD¤”¾Îz@dwWÓœS™&J2ŽÓÍí¸?Žû°„9ÑDDfA4u×Þëùâ½Ó×9€[2U³HXÒ Šˆ®fÌRrJÊD@náÃnn‡Ý˜"`mú|nã ãÿüóÒ‚( )Ưv8&d&‘iJÓÝóåTM `:ìË~Ç9#o³…Uˆeƒ*ˆôÕýçãíñÅ ŠlaÞPU"ÒÞ{«­®,L’Ä*»«yæãÛ÷ûWod˜™‘8¿ú;Äî›E`îÛ‹úÛû –<ÂÝH¤L7ãþ@yÜÕóÅ\!q uLŒ©ÚÆN @À=sryn”‰½ûò²„šŒÉ«õÕ˜iÜ'ˆ¯oîÿñ÷¿}}7E»Æ×ÄP!:E†[Ê¥ÎË—Íì|Yö»iw˜ ·öòürsw áán!Y6™«÷^×µ­u7L)I)©Ý—¥>yô0I"‰s.€ˆfàˆ¸Õ ³#q¸›‡p@uÛ¨|µ_½€´›ò´/¥0± b#ll¹Šº,º,ÚÖqšXdÃv¸U…G8`U›¦ X6ÍØÌºjí¦¶un× |Ìv›Ç¶ëà^»>>ÍKÕÃ>}y¹žÏso=àë˜KBw{|uÈûi v¤äœ0¥Aøöõëµ"îǼ›$%ßÊ â«Ë½QRb4‹'D)Ez‹ða®ÌBÈL"‰X"ÜÝT{o]{O9C˜õfªXoßÜïß¼IÃÀL€áø•Ø‚0l¹üzF·@"˜ù<ˆˆ2N÷¾C$ ïón€±ªŸR# ¤„ˆà #‡vÊLˆ×s­s—"ë¹y·¾¬Ì4ùÓçµVsCƒvD IDATÀ`F"Ü ôáÕîí›»œÂU!å}¹Ú¼pJ‡‡;fªË2?úe>Þß„»z¨ª™·Öpû¢™x˜F$¢—óçÇ—Ú´w=Br÷Z»o©ÝR2–’Ë–—ó€¯})]9Ÿß¿>¾{}»ßeWõ¹vs’1kàõiõõZ«#ÑÚHrÉ»19i·yõqšàtj]#¶TÉüæ>}ÿÝ«’0ò~’aâ$ˆ$HhŠLÛ‡c nÒ„{lI·-@ˆ„¸ ™—«›21ºYkÍÌ6]ÜÛº B¸©uÊë÷ïÇã^D˜y+DŒ sØ4fwt÷-¤K›RDnŽ€H°k"âœ]­­kóŠEÐň¥ŒNr<ìDÜ1d9¿¤aº¹½ùx9/—ºÌ+„û L‰ßîÿéÿáWÞv#B˜*måâ¦Ö"‚ˆD‘…§ýÈòf¹Îóu&¢u­§—SÊîÕCÎRR:¿œ|®Ñìpw&5uYÖem½û1€bÊ9 eØ;bJ\†ÒZëóªj/OÏÚúnÌ»]¹y¸¦¤m¼Æ×ØÑß Ô–)&DíÊ„áŒjFˆªên¹dÓ­¶ååäf9§çºÙW1â?÷=ÖÚ~ürý—¿œz¤ÿéú«×O¿ùîþõë[õ¸.=ˆÓ0F#]çÞ5R–qJã EèãSE.»]~9]Ηjˆ’äæ˜û÷÷oÞÜH%o ümÂnζêÄ·ˆ €Í/Á !B @2‘äD€È¼Gp"ÜH¤A¸÷¦ë ཭­Ö¥öôêþn¸=:†zGß”)ÜÀl˜ À|Û°‰Ø<[DgúŠám[tQõóumª9Ñ$‰xxksrJsPÏcuÞ>îFÜréˆ(ëõ]Kæ<åu^çç…¢; 1 ”oÞ|ó?ýÃï¿}w? ™Hq:T­¯µ_g½^Ûº°Åp<¦Û[N")Q.øÛ_/×ui×뼜/è3½u¯z‡µÕÕL¦Q{z|šç•RᔉˆyÓ†™‡}ʹvÛ¤Ö-3 )åa˜†1»õóã‹õ>L;$Ú2¦L¸e^1œ  ­+ o›€ˆäMÍ]µ§”SNKèM­÷óãcNÂH€a_³Ïî°]1"C¤RæªçYŸ^ÚÓ©ýÿêÓn|zY`xuD`¤ó¬ªžsJBàöüÒ~y¬R3ÿùãÜšCá8Èß}sóæ~âĸ@7G"2303ˆp30×pb"ß™@„H)AI""7d$qwB$Ù˜( ¹j«KgÔVU{µP’Žòå夨ÝOÓ>‰‘{|%ê[$Ñ jïáfHH˜€rB¼ÌË?ÿÇ/«¾¾›ÞßîûèZÝOÍN0|6®”«Æ»i(I¶uEŒpiuíÝæ:ÅzYë¢9Æ&äý°ÿío¾ÿý÷ß½º?Ž%‹liÞ ¤p ííôtþËÏëy>× z¼:ܽúoï§œ’"•rN)soíp» ]—¥ÕÚkCŒ<åY ·Cë6Ÿ¯_O rw·G¦-Uàþ5KîL ŒSÕ oã@ûý˜d尿ÕÓ—G7e&Û*!öÞÛZÝÌÌ™0 2Yë’ÊX¶u¼@Ú>Ï#JJîîaˆ›]fj‘¦i7–WÇœϳ¶î­ÁŸ¼ßíùÃû‡çÓ Ä¹¤"°ÎA)åBuéîP ß2üòtá”ÇÓ—ùrnˆ€P¹=¤÷e?fDtØZ lJzhïjÑôë2ÝÚk„³o†Èõëp'f"vM‘MTÿj‰™m^qou~yÑÞYŠ™µ®§ªôËŸÿ\Áon§×¯n¿{ýî0RNL4äËàh¶“np¾.Oϧóu¾®µ*L%ßßL7Çi¿›’ä—Kýÿ~x©!«¢6 ›/Ï/§žìSãH‰™‰üÝý~È‚  ðºÎµ­LØÍE0…ÜÞÞþúû¿ÿðúõ›‡››Ã^‰(܃8¡·µÎóùóÇ—Ÿ?¶=š05kv¹Øq§IÜ QX¨@&NHeÈh¦u­ÏO§ËçÖ˜p¾^)•›‡»ýa7%'ÙØŒÛW†»‘t¯RÞ$¤ÖúXŠõJ„9‹pBÿbPèM‰(ÀÍ|s zïn¾q3UÍ „›!‘ԭΗÓåîáöpØm3οÎ@ˆ –„šóñpæoï>¼ùüüüÙ,ÌàtÿáˆO׎”“Øn,»Bu†”ä`«ÝSæåº>=ž{S@DI4fzó0 …⼬9‰ @k½ÖÖºÕÖ—jk÷îPJftDØgÚÒ¶¹¼mx8øfná6¬™ÙÜI’G¨)©²;nÚGD[–u¹nªžW{Zíªë¢švŠq…—å2JáÃ8ýêÝßÝ!%IDMýóóü¯üå‡Çk’4Ã6·ô\_.ß¼ºys{<]æçs?7½®þéËõËÇ“k}®ý¢YIÆœ¦„»}ywܦ~}-ë¼4wí­Íµ-MÞ¼¾ÿí¯õáÍë·÷¯†\"11"ºª[ HήƔ’ˆ hkD”Jf5F ·mÇÌÜlËUE˜ðóŠoy?þòxº®Ó~7ÜÞïwcÉ"$Lÿ)Êo‹XÜV €í´nÖš~  ‰"‰)"º¹0!`¸C†ÿ”ñÿ¦†y†n7¥‘8Æ!'æ’„¶•eÛžxÛÙܸ¶˜+—²¿9æ¡ÀÍ~üÕ»Ãþô庸¹W‹O¶ÛÏk?OH8މ)cU,}'LøÇéiË&Mµ¹;ÅXÕZUU…U¦†ûÃ:õ\Dˆ˜‘Ã0®(D<‰é݇ëßýðÃ7÷›¼2µÈŒ„îÌÔ¥ 'bfBb5Wi@´º¹Nð׿-uYöÛwïâaë|q‹ÕºÜ+5b¢­¶²4s0'5[–˜¶ëÁ‘ÁÜbŒ‰ \KQ¥À!„^¸ Þó¾aUÕºü©*³¸j@·‹‹sQáR¬»ÔúÀßa"4P¿ Ò=ƒ Sì&pF[ÓÖ"¨¨ªm®»›ë<¦L%Þ­‡Í™—Âë©…c à>OÅ U †„µ) 9æHŸŸçºHÇÌ bÎáþfõñv=¦¨¦çóRš(КCŒðnŒiŒÌèà#)`Uìo¸9±ªÍU¤-Ã`€/P&ìN ‡È™ÉÝZkóù\Χe:‡©ûrjŸž_f-yÍ ÄÌÕLÕ°©?ËŒ§/H¸ZoÆÕ†!­23úo>nßÝ_UÕש¼œêç×6ÏÒš[•s à®êÔ´€‚ŠŠb 2rü§7·ûMl¦n†! !"…”orz÷þúýÝý÷ßl†Mޱ ßîýÈ(*ŽˆH@è-ZC„¸鞯7«ÎPH9!¢‡ÀÑ !Ũj`°œOÓëÉ1 › PPÓeY2º©ª)88cd`ÂS_»Ø›Næ™åbbudSαŸ>ͼ.ÅDþ!9¹;’¼U5¬{-у@]z›ãÌÔÞžT-µ’#¸»ˆžOÓîæjw3nV!ð¥I¡¶ëÄäÖuu8äÄj&bÔãs83óÕÖC"‡—祕êfbØïÆ÷÷D>O§©Hµ¦ç*¯§â/åvÑÛëý1§„:G£KP‰Z­­,Ú$™‹éãéxnKÚN¨Š¦ÖÄÌÌ»œNKm/0ÿ‚OçÓf»ÿþÝ üù¯Q–ùý!ÜíSrQ{:Îÿÿ__þëçÓtV0Ëìy… µ†ÖÐÝu7ÆßõýÃ>Ç€àHTK‰Ä! "„ºÌFá0 »ÍøýÝÇm^ tÝQß2…½žrr6°õ@3vwU3Dº$î \¡wË4/ç³”ºÙmC^]ê \ØÜÔ¸Õ’R"3k€9! !Ô¥ xˆ.b­VCd"p53PFÔ]TS3{}y‡ÌÈn„!E0!Pࡹȯf= è=Gc¦f®ÖDZ•¦Ò•æåæýÝíûwyû’­ª¡=n!Ð;ÞÁ€!¡Šõ⛈#¢C(Õ‰10ºÙ¯_æÓ©Ôfæ­Vé›÷‡÷·ÛÒêñµ4ƒân= 9¦Èb™Å cY Õ\z—æhîjnæKµiiReª²4(zjŽÀQ˜À;G›(‘2Ø|>›Šú©Èqž”‰ °WèܼŠ51SW‘#àÒäç×óù®µ!åßßÜï†2ÏnØÙf½ÚmÆ¢Šü?¯Ï§£:^Ph„IwëðûŸ®ÿ姇1….Ç+ôR4xPNávµ]_ßÖûÈ‘ˆTå<Í*ÕD[Y¬•9ëíÕ-Q`¾,EH$ÒÜ ºàDBìYz3u·RµL“µ¶ÛïÆÍ¶KǨ:Œƒ™‰špfʉÁ‹X ‘‰HE †ž?Õ¦ÞǪŸê #ÄþØyÿÞBà¼Z½<=Ki«Íj½Ý¼MTfŠ}æë6!ªYÍ{×Þ^R@dRó8¦ïï¯nobÎðæ·2Q?þ3s !çHDHäæ„ND1´Ìâæf^«­’75O ¥éã<úrªÅÀ50ÆÄû]¾»žÏõ<ÍH´Ê˜2-­}y7«´sPT3#0—f­l¶Ûœ»Âƒ¥ji ¢:}:ʲTF_­ÃøéÜÿëÓ~½ºÞ¤Ã@ªÒš ±ö£-ªÔ àÒdvúåyz™ÎͽZ—õÍ/#:8¸˜Fb•Ç—§÷ËØó#ÓÆ7+»äÑ OÌn÷§×óŸÿò|Ô¦n‚½LD ':l‡ßÿxó?ÿðñf¿îm÷=ÖëÕ…oº8 !÷×§-µµ¶œÏ¯ŸÛ219…Pk-eRw$~ËÐxwm;æ²™ykâf ®u^–inó¼;ìW›MHÕ«¨2ó°Z—ÚdšÁ=¦™ªÚTÚ9¤Ä½0dj@Ý;uW7AàHˆ)2¿Å~Í´GkRòzóôøw9 1¸‡DRfUéÇs3íõ#ëoW/|«H]–šb¸ºº^ï·qÇõ:¦Ô}B ŽÝBx“ß ½·’rNj0M­ ’!ƒ%îö#Fò2×ǧ:'S !eÇPUÏgEÀufG8ÎjçEšÑëÍ@Õ»›’-óRÖ'ÛmVûu·_ç&˜šù\­Q±Ý*¦”‘ƒ;~zš~|¾ZÓïÞ­¬¶np¼½íäÀ€p.~Zª¨`©RO@ÿ÷¿KþMµˆE&Œôõõùóë—ýv›C Õ袇ºWu&D7@‡„›ux¸þåÇë¿{¸ÚmÝDú¦'¢é²: ƒ‡aµ&$D-­M¯/ç—ç:Ÿ—ùìnbÖÌ—Z9ÄÍi‰)qŒ1@/¥ôQ½OÜ%Ø®Žº/óŒ.çד‹nûa½ ± KèàŽ Ý™˜TçÉK1VQp3€¹éŠzõ…ˆ¹˜Þä O9m7C ¡ÿEp1M3†à€ó²‡pš‰ˆpZTµµÖï}wgB srWfÊCÜì×ÛÃÕz¿1ô²ª¤4¤DÖõy¦ jgŽož.y!¯ªâÄ!%jM?Ï*"ÐsY"µTmâf8g>ìóz¤ãT–ˆ(Öšê´ˆ(TÁm½NMÁr0e—e1G-ºÌÕrÀ¯/óÓké»P×HxœŸ‘™#Ã2µàæ~»Ó2Å’#3j«ä´öHZ,&Œ2cèî)÷LˆL°õŒ5¸y«Æ!zµvZÎÖCŠ€xYYÁ™88ÇÀL)’9ÆÈ›u¼½Ê¿ùöðãÇ««íj³YBUEBFêkbwuºúj­fÊ¢ó´Ìs‘e^^¿Öyª­5ÑêVU–ªsÕœp»‹î`*{î¤5C 1½ù{—¸@«Õ¥j ”¶›a‰ ÌŒ™!‚¹¤|pwcæþ¦KmLÔj™QÇzn§;Zn D@¤ 1…@E{þf~9ýò÷_Ö×{ç ªŸ?}zy~‚ žÒ™™øÂ*ˆ)2S^Åq³^­×!g Ç‘8f$ŽHH=Éb""ºy7ÃU¥—A:²?©Hœ"ÇP«´ÚŸf˜OÅš‚k«ÚDDß’±8®Óþ°‚@Eé4ËiR š\ÒvU¡ B óŒräó¢:Ûë$ûuܬâRä<ë´´yn&•˜¹¦6c¼Y‡Ÿ>ìüp•P+a?‹º‰5—=ŸçZ"LCˆ1˜š‰µÖÔ½.ÍT¦YÎÓùéùå°Úlw›û«ÛëÃu7.{8æ²^„ùtRsdš]OõËë|:ê8¬~óíÃß}¸½»[o6ã0˜©ˆ¸q@¤VµÌLÚr:Å÷W‡<=ãXKQwÕešj)n."Ë<Ÿ^OÓ´”ÚÖ›õw?|{ûî.ç !037QwO9.s©µ&UG3Aä‘—¦î®¦pÑSi"Šh&B!¨ùé<}ýòøý;k¤/ÕˆÎøm¸`Ú©nhÚBëÃÄëyþ˯/?OÂçÇÛ/ç?üpÿîn¿QË)^ Nó¹ê\ô2+1ë!¥0gSe¦Õz(ÍE¬ÏÕêVææn)%Î ÜÔkÑi’e®hf&nâ^Õ ŒlWñúv»ñúj“S¬µ|~:N³'{9kmn¢RÊé ¿_Ýb1ev#냭tîoêÚŠ4q&s)ÆRH9ºvê1 :˜ªº{Œ¹ySµaCk/Ðüs}!&S“&HÀ‰±ª4×è¤îØ£ELŒæN‘Pݦ¥<Öcy©‘x=®cH½ tqÚºÀÚÄ(ðâþ8-__ë<ãýíý?ÿö»ïîvÛõ¸ßÄù@H¿wfå|B÷< ãzÅ1 ’Ù…´1ªZΨ=˜W‹»çœë(ÇÓ„ÄbþùÓdþðí‡Nj°@h5F3sƒÖšôk[]UÜsg1PÀåüë~sØ­Wc!ÄBàKíèI±VÏR[[–‹}#½dº‚ð‹ž6 IDATˆÒZNÔ Ü¥ééx.ËßþøÍõý]±L‡Àˆän N9 +¨0"ÀóyÑì" ’šR¼d/½ ,Í^^OÓÌ›uF€óyþüåñóçÇe©×7×§—×Óv=®FQíin73""d 7é«…9h­UZE·2O­5Ž!ŽûcNI¤¼žŒ3¿”öïÿù)0 ß§ž+eùüxZ–jfÌ”Æ!„p<–Z9®VÙœ[UU³@@CЉ}šÚñuiK‘¦]ëËT·×¯ ¨ùn»ÚnVÀLûíjÙŒ‡]L/§ú÷Ú¤ª¹·y>¿bÙò&i¤Có2CÎÑ/˜Y·ÎR77‚šHJCH)„ӰݬRŽZŒÈÅ´GÖÌÌÍC<¨­V9¸9r@P—"1Ebj*ääîÖTš˜»0¶KtÓÕ¬ßw¢*êÆDé¢Ö[³V[í9KÑ.oYP€sm_^‹xþÍoü·ßÿæ°[÷’©šJQ•KÃÄUÚù<¿c·›«”3€‹;]lv•ÖÏ%Ë<Ïó2ϯOb~}{óp7¬ÇBkŠ®uZ:ù@Z31"&FˆjÆè)`kz:ÏÂzû©‘8qh"ëíÎ0¾>=þýoŸkY湜¦¹‰¥aW+,ó —Ž™‰¹1Q7ÁIZ“Z–ó©L/e™Áµ{TM,å4*X>Ĕͧ¹BbÈŽ ±ʟþúùú°ŽC`s8NUÄ:C!æ¬êÓÔBòf3.Í‘±d°ab i®õ|®¥¶Ò:!œ©ã»Ô™¡ßPNMýo_ëvýúþáêöæªïýæ f~w»¸~ùò8™`¯§JYNÇÖ˜÷Ì ¦bîD—û^Td™'•Æ|¹ižç¯çe; éütµ= )ßìûýzù¥v 3w{õŠ—RU`…Ъ’;VEP9päjÓ¹ÖóR½£ ¼‚9'1õ.˜c)µÊâÍæ®Wˆ.Wƒ¼‘‰Â¹¶OÏKÓø›ßþПÔC_ô¥5æÀŒ eÑÖ^ŸžNO¯Óîúj»ßtþô渻´¢b¢ îµ”^ð}}zyy~Nc~¸¿»º»énº‡È=ˆâ½<-:?Ÿ ¦Õfp3êJµˆ©¸6äÜÔOs!„!E "ŠS¬¥ånïnüîfYÊ<—¥4G!€J†ØÌÈùíõ»tˆ°Lóüú|þúež_ÝÕ›z;Ï­ŠçAÆò.)PŸ2ÀMÔÆNS«O¯ç›Ã–™jkó\ À(ĘB-­–†Ö«ŒHµJë»@`ÈCb‚ã©-óìRݼã’(QL¦fÞß~H¢Uñ¿>-?|=¿o9óþUö”"3"z—$]¥Ì‹eŽÙBHÊCtDS•ZêùØóòPšüütþËëyorôo6W·Û«ïnϧr<ÀÄÄ@Ì\Ô¬3×Ö"$BW‘±©ªYkrœª¨!ƒ#TíØ4'& ØC„À‚ÍÛK3«p³½Ž)wæ¦Ùå Dr×ð¿¼: ?þðÍ¿ýæwÇúXîZK­Ër:•¹rÌÄéæÃÇÕz Ýí&'l¥öàRgÂÔZ˲œOç×§—Z[Èc’£‡‰™9 ›€ªKkˆp>Ï_¿<Þ½»s7ÓÖ. &Ñ2Í<òf³¥À­Ê´TO)šªJE¢”@B„œÓ0fD’ _V ±–Ťy äà=ÁIm^–çÇ—¿ÿµ,gcPÀsÑc±/NBÍH_Cz–ÍêñëI€B÷à‚2ˆ+âÓ¼ˆ(§ežU@âG@^Š˜ù˜iÈaZº·ænŽ1ºùé\Ê.ÒÔHìtZÊÒ܉C¸,…†!•ªKµÚLÝÝÝnn¶ÛíÇ«‡u^›éÿ¿ÿ-µ™B J)´*LdÍš‹ F$Khêд™'äà>0M®ªêÍMM¥”ÜÕÁ…e ý‡hÈùáîîÝÝm޽%ào…W" ÿãßþðÛ¿;l×`.­™¨¶y„q³9Ü¿ëì¾^‚‘Vk‘]µVXæYEÌ Ìæã©6 ã0®v㘆̄)§Ã¥^ri€ëa½ßMç©÷/E?iâ˲LÓ2Ÿ§i*yYzÏeÌ!¦˜ë2·yé·V qˆ„ˆîœA•9 id¯¥ jt‘•ÔëéüòóϵÍPÝ_Ïõ?ùïS*µ™zm.b¥LóTÓÒjµÛCŽ–Šf`æ€<-Ë4|>/¥sw#RRQicŠÏ³#¢z €óRµ&_­Óz w«o>®Ö×ÛÕzLãÀÌÔçy*Ë2ÏËy*gã×i¹9Œ]ïÖ.Å7Ûñw?=<ܬþúóÓ/_¦©@L¼Ép5™PÈÃf?Œ+DTUiõüò¢"æ bÏsùË—ç < cl“üüËãwwÏ»Í&‡|µÞ|ûþöe>Â_ i‰‘A,3;4ó¢™(á¤dP+ø ›hU­MºéDÐù±ˆ`Ýë£~Šœn®¯¿ûø~»Zõ˜À×ÞÎáß~÷Ó‚ÕRÊéu~= «q{}‡!æÄÿÀû?ŠRk#f$ª¥N§išf—ÖD4o77ï÷ëícο×c¶)%¬ªf ÂîÚ¿”úüø4 CŒ^ÍÌ›Èù/Ó$ªH\š~}]Œ‘‡`}_F©ßVfÚš”’…ↇyÒ²!ªbÇEŠ˜ª50$€ŒÈÄà†D—‡§»òîÞ¿ô·%‚†~•…Û««û§ß~ÿða=ŽáÒ@¹$4ºîÑó$áùó-v·7Ûý.¦ôFutpsk¢¢Ú¤.¥ÖÒóö¥Ô×çãñxÞ^íß÷M‡~÷Zˆ À›h vãÿß c‡&­ÿbJ îïã—_>Í_žDTÍ–R1æëwûýÍurÇÔ™:!æÆ1uó·ß bf‘³ªeF dH$×»ZÚéùùôz:¿¼ëu$r µV§s“ÚãQ@D®W#Þ¬q{»ÿÝ÷7óëÃj‡*ÿûóÓR–ù|Æ!Çe®î¦j/§åOþ|<-Di 1õ„AÅÐÎfª,eZ¦™¼­WñŸºý¿þùáî°^9†ž_3p|s³;êÇAD‰‰H¤ƒ8{RÜš)!8õ"‹»¨?Oõå\¶nˆ¯ÇðpØ$ŽÃ°7[¢Ë6÷€Zgl¢+8™¨ûRõñ¸üåÓ‰L¿9„¤š˜oÂÛÄ)!ƒ´²LgS1†¥)2‡¡_nàm‡<žŸ?µå¼Ìåõ´|™–¿<ŸO³ÐÀ˜Ã˜#Ž!Õ;5 bÒ¶H+ æà†„ÄØù‚è†n½b€‘xHùîpõíýûoÞ½»:ìÇ<ÄúhÒY@x9Itg8üøûÊãØóÞÁ^Ð=nPQ‘VæR–EÕDš´v|9Vi›Ýæýß­wûa5¾=Û®j­”¾šª(¶Ž!'Žîžs2{ 0ã?âfN„Ì©÷Ÿ(S[–Ù‘ÎEVã *ýž"C‡„€ˆ®fpQ”;sÞßfGW5$é7pçééõñ—_ó8p'B•Ò[²}ù€È˜# ¨ àMúÅÜ U¯¶cŽ<-µ×²cày‘yQÛD3ÿòu: P‚Qô3-U{ ä­’ÕÍ&üë?ßÿëoßí×™°;Éêb€ävY8»·Ù —¨+“™[k ª"&õ’ÑwSÅ\ ??žþüËs;ßoðfÁ˜TT ãÌ@dª"uQ‹ÃPÕÅ f²HæÎˆŒ˜‰7ÌÓó—†Öš–æg±ŸÏËc©³YL¡VÓZzf# ˆˆ¡ãð,U]¦¦jhýŒ@àäêéf»ûþã»ï?~s{};ŽcÏ_ÊRþßOHßâí"@iÇ2™ú…)g¢ÒZk­•jæµÖešOÇé|:…@›Ãæã·ï×»]WÖ™qÍLMZ-óòòåëëæîm9=?¦aæÇåoŸŽ_ÏÓ47‘ZÅÔеCqÍq*úù¹X™Ÿž'i‚HޱUpÕÈœD@DÀÑæ2oFúýoîþÇï¾9l†KJÆUÀ‘¨çÈðâ-9€ª!AßÌÉÍM´žO­i´ã|Uýx>ýò2H¹ÝÚnL9E÷ØæY[‹y×+1¦Rf“J„)ç0nd.œã sM¿Ž™`–EÀ¨9 E‘­R)¾t̉s ­¶œ$æ€=0§0„Ðæ&UW›ñê~±yªµHYF¤b`f‚ýzõîááýÇÍf›bJ)a7?¨Wü/4ìþÒ%ÓL:P­÷“ÚRjm­¶y^ÊRêRêR8ÐíÃõþözܬCL!Ƈ©Më²ÔežÏ§å|>½¼ÖRrލÖfꥶâó×/µÙj»yøøþêæ*yr¸Üù¨ýŠ¡þuwK1¬Wã4-óR˜PU#rŽ”Ã? *—ò`oO;!q_sßOõÃÃõo™éxžZ­fB)ÅbΉC š nÔÕ@ss13kµMÇ®’J[SŒ ÀKm²,®0x$&pê<îÖBJãÕ>äÔgs­óTçÙÝ9tt@{w{#<ÕE¢úžpG¨¢ÀC¸>®îWüøŸsk‘!FJëØצÓkYæËtŒ¥è–ãÍn·Î90“¡©¤! C^!bÓºÊãjX§<0s—›úŠÓ¯6qwsg}²ê¿|Ï -¥™©ù<Ï˼LӲ̥•ÅDÜl†õv}ûþ~½ß®·Û˜sgÈ«ªÌe:Ÿ¦ã럗en¥:@Nìb‰™amK«*b²ø¯.?ÿùχÛÛ‡ÛÝ6)1ô37"©j¿À‚Ys ÛÍК.çsLØ£Ik¦1ÄN«íH sëcœªtì¨ú[H:ŠŒ6ÛõëËi>Óa¹–æ&Ô¹,†Ä §—ÇóBë«ùÔJ5Q¨UkUs ö &ÍØšøñ$çskE‰Â@ÄnÒÇ!Rkbb¥MÞ–Ãõêîvÿùéô:MbVzpŒt3DØ)GÎ9­WCŒ¡ûk„Ô¿¡Ëèªõõˆ‘‘CLcôFÙpÇÖ¬‰›¢a›gY*Ÿ¾¹[®È\[­Òêt|­ËìnD~=_[Yæi2kG+F6ó~%qŽÁ‘Êz•CóyQ)Ó4å”þöǯ¿|þö§îß߯6C_ûL÷vXÁÃÕÕ~»Ý¾¼ÛùD&`FfZDÝ/ØM3C&3¿x$€H¤¢zÀŠ9Dmíé×Ï1GäSŠ­š7ík£‘ˆ¸Ë¼¥üýÝáÓÓdfÚ :º¹ºÖÒ0TòУvŽÐJ•&@CvwtCŠé|n¥T­“Õ)œþû<ç1ÜÞ 9‘6õÐrÀ2Õ²ÔˆºÃn³¹Úï:¿Üà_z+ë9h©i“8 &Õ¡o—ÄÄ1ÄÚÄÔEŠAÄq»×ÃvÓ/TÕº,óéhª€¡©/K]ÇÌ€Ü$ª%À•±VhŒy³ÿðí7W‡}ŒQÝ^Õ¦1¦ 1rdÆ€V,Ç0¬“ªµExfZm‡¬C„¸N›Õ¸z#ì:Óå>7BÂ>J¨H¸&©Gf/Aëbtw¼$ìÃçÇ—~ˆ¹ÚmiX›í:™C$@ìeù¾à™ê|ž¾üü÷Ç_?i]Z+Ðù/ëm7*Bdí㫈:¨»w’ún;6ÑV[`¨µÔÿõÇ?_ßÿôÍj»î˜B3K)99 ÇÄûýÖ[9ùrzÆõv£jú¦"ý¾5"0T¤Vë²*v9šRŠn¶Ù¬j“óñ8Dv…¢H¥D&`ÍÀ!…8Œëë»û»Û[Çÿüùù¯ùß~VëDqsíª€c­jµ™JH+D0­Dc\ª–".•½!„4k¬3nÕp©ÞÔæj1 ‚ïWqi*‚ïÃb²<½–Úv›õf³I#SèáSÇùù±NSp7©¢Y[–å4I©m®*J1äízÿð0n61%~cýé¸L'Q5§ZˆL–דœN[À@1†`!Žû›‡wï®o®×c÷"MDkù?\½×dÉ•æy„™]å*TÊR,§9äLcwgöiŸöÿÇ ÌLÏìf±Š%232Âå&Î9û`l`ã-€ Gdøu³#¾ïû ;Ï*⃠Á±I!µ Þ¹UïÒR¸o¼#¦â¤6Õìê„ñjÀ¬Ó«š–]:*_&¢Šu€J Bã ò¸J€Ì}ó»¯ëÀ…àœM¨—2Ö™b%ꦔT`s7¬ÔLR÷§x™sÌ&Æž›a¸{÷n{w_«U@ƒ¸LóùXRµ¢e™ãªk–ó¡äŒEÖä¢Úó,ži»(p–3{‚špÃŒìÈ´nô£®õž‡ÆfIæ€s0—\J±«d²¶ùVÿµ¤ ÏÐÊ2+’_”œ•=Z¦Ú½µŽ•\; >x3#vuRÀu€lêØ«VÇH©ÐØþü—ËáÙ1˜)‘¯û‰®«.V3ÕyIŒà<§\DÅ1BôÞ€¨4s޽w5ìL‹]žŸÿ–óûo¿¹½¿ó!ÔÎ)Å8cŠÙ‡Ð4!ޣж«Á1Çeù‡V™Á *NÖìî¸.“Àñ•³ÅDÌ<¬úç§CÝî¶y‘Kê±s®éû‡W~»¡àˆ±uÄ<¼}¸Y¯Ú˘²PÍ!,š³,“Š‚aÉÌØ&JYÉJ–¥¤ÈÌäS‘(„¶dú˜3ˆ¡IÂM0 ÄÄxšŠ™ï<'cYNŸ.ÈÜïj"YmŠ}p«¥8_.’¦åpAD-ºŒÑrß„ÕÃÍÝ×_®ï*|ÚÌbŒRÊt>/ãÉLØ9-ÚB‰jŠ„x.åéR&Ä‹?ìþx_EÓ*šR10ÏFj’bZæÔ´ÎŘJ×ølY¤Bðlöïè6xA‡V4TÏŒhZæËáðøËßElµÝ½þâ+fBtµ†°+üE] _¯nϺéRQSÑR´dB\æéøôüó÷ß—8®VAEÙ"#¢•œ#³ëcÞ6ÌÌ"Ö4¡¢Ö9B•z¼¡còž½wH 9K´<]N?ÿ7çœóîr<ýôý_Ç˱”|<ʲ¼}ûvX W£—¡U­r^ÌìÀ¢b×ÂÌÌyˆ5<BãÔ”ûÃöîf{sc{I0[)€ˆ¤yºLþ©]¯C[»è‡àÐù«oÎDó`ýÐ;ï§)ÎsÒ’‚¡Iã!rß7]ë‹Be6¥TÔÀy™¨]&åh9e4õ>™´ž˜ ‰–ìõÚa ™–—ói9@…·}W›ŠhnVÛ·o7¯^uÃÊêͪ v}+ãtV)À®,KZ0A$!ï—锊‚N1ÎG sŒÑñRr,YM‡vu»]?î÷¢r½›EKݪ’yŠmë‚çÀþ2GS[Jœ–%—âœó> S1C$®©=W} ’i©fýéø|9¦i™Ç¡[ù¶CbÑ*™0CHìØ"©¦˜¥Ä<ŸU5Îi>ÛÀZ\ß5E„‰¼¾Ïš’ý#‘™¶ ¡® IDAT½ˆ¦ 9•®oÕ \A9ªÕ.ŸR)`HÂþ¥Tù釟œ]ß|øå—ËéVÔ̌כÍÍíÎ{WÅúˆõJ¢k^ TÁ¸ªÖ (E¤ˆ2W°‰!¢s®iÀQÁÂnÃÁuãçùr8Ì—“€Ó²ØË¶™ +(…Ñ+®5k‡áÝ7ŽùÓÓ”Ó¾äh9!°#®NZNò˜ÍÄÁR”Øi&Ç]çlYDŠ ‰ ômzÞô]ß·¯ï7]PФe¾ìŸŽŸ~))6!4mCªešó ÈwM·ú»ÛÝë·M·"æ”buûKÉ’SžGɱl%g)Ùß píy’±ìÁˆ.¥ôÀ1mœ±'íÃOßܽ»ÛíÚæC’\;"¬Æñ” ;;_¦Ð9ï ¼ç@N£%)Ej3Õ¾š"3!€š€IÕÍËt¾”TJ.‹Ìé|©Ö‰:@0C3‡D¢FDH®NL‘Lr/ó’–i1€¡9-HèÉ!¨ê?ŽöŠ–Óú Vð!©YÎÅ™ç‚+9³ë̬mÚ’“™GfMBÓö—Ç?ýËÿê‡æ|º4œ£&„®nowÞ;0“Rª®£*ñJX…ÚWBEY_í¬”ã´8ÇÔTa…ÕØº¦ !„8ÍF¶Û¦ï»"ns#?ý5§É1U MÕ‹Ô¨% FT«QVHlÅyww·A¤)R`©;} X96*€Å ÄL’ªI‰L”scú>8GÁyBê;þòaøã×w·ëÞ{ÇX;NËr9ÎÏÏÓù±ˆÆ"cL©8ïºÍj¸¿mw»awÛC•K1»RRIIRœ/—ËáIrBä’K)…È…¶ç¦·f½ëq8}þ¼ ŠHÎÅõ!e‡÷inOïî^Ýív1•L† Ì‚)¹ˆAŒ¥ˆyGƒ ¹)¥$™˜Ø9{)9ˆÙ±i¥¢Ö¥i‘4Oãù’RÕà».tôsU·ª`fh®¦“zï_F´5Ïѵ‚óóþ)ÍKßç]#T5õ/ÃkUpL9KÍ–QµúVåiйcçÔ0§ÈÎWzmNY‹e‚÷ÞyÁíÏó¼,Ó·ëæ®Ú¶éû®i["z‰QG«ZªìÐëHDU­¡#ƒœ‹ªÅ˜L«©Ä€Ìd„Î{çÝ<Ï"Ú†›`€ WDöôïŽu*¢1×¼áLÄ€m×1û%êþ¬ãtUí"‚‚Ì5›¼ 5¾E$ª–2A,"`M×¢UKµÁµM „RDDrЇ§§ç!'‡}Ó­_ß·ëv¸½í·7¾íØ;bF€*ª'âœòxØ>}˜.gD@²¢‚Äìwk¿Úù®︮N1åÂæÉs.bDPD žõüzw÷ûßüÖQûãÏ¿D[Ø;bÏ¢jEC#í6Ûvh˜sÊq޹M5NªæP‰™€áZ6ª¨”¼LS\aÄM7ôWË4@ÔêÛ‚äœÌŒˆs•+À‡óòýß~Ý®¼÷+ žMD¨2@UQEæ¤9IYæ%‚¡dÑœ¥ l Ó».c@TUÁyïƒwΩJÍÅ]­Z˪?S–Üj\ÕJçœÕÚŸU¿˜!1Õ+».”kUdV–qyüðÔ· ÝC·œwužç‚Gç/ãœsi+­:EƒoW«Íý›nµeçÍÁŠH.R©æ@Õ$!¸Ý®µiÒó%弘$Be°bL5öÈ £e‚k^‹ŠÕËË”KL ³m6‹Úi*ϧÛu߇ÄιRr^–ñxœN#æÂ@ÝzX¿y»{ý* }7 ì=ûkSåÐÔcrJçý~dzš7.t}ÛmÂæn½ÛùàŽçã4/ö2<±³„žrV‰:ŒVæeþýû¯VÝÆÄ~üð³”\XÙ¡Ie†Zœ§¥¬ ™ÍsœcAFDbv`VJ±Î]'q/Ý–0³ !©å,TéÃa·«€Ï,Óܧý¯}P‘™›æÚ;à4N?|ÿã4Í›ÁÅ”®¢iUõž‰ ‰¹þËy.¢Ë’²è4çbØuíf;Ô[JešãeŠ)—ah§)6kØ7Á™©šJ.¥Šú×)ž•¢¥ˆAÉ%–,õ¼ÿGª[m› RcŒÈ9«³BÉe™Óñ217C߬z_—úW@&bý+$"¦n½»}µ^ÝÞ†¶³üxJÅÔ‚C1vÌ.xº½ ­çÓ%ÆRÒbebT"¬04nÇÌŽÙ{D€\$—’b1„"XÇ)”¼Œè˜BÚ†ìóyù&¦ÕÐ]]Vjóå’ç¹LÉ#Ò*¬Þ¼Ú½ÛoV.ßtW%§)#‹s5ZYŽq¾χýqFt$PÛ¦nw÷mãÀŒÙ³k8•H—Ìs­cæê½„¬Ò†ÐÝ5ÿüO¿+E~üðS.<Õe·¨‚ÆÃþÜ·m»˜„œºÙUo‰iÍG5c¬TV…ùDÞ…æöõ»iŠË´Ü¿~s÷í7~ÕWv.1^u°æþòË÷÷ëÛûõ}@O‚€Bˆ1-Ÿ??>î»ÀŽÑ9—e^²#pžÀ 8"v"¶¤4Ï9¥²Är™c7ô÷ÛÕP†².’ó0Nñxsôœ’,sîúV¬8Çã²zDÈ)—9å’³äâçdÁ±,Ë,²®1ðh@UO^»³š«Që‘*2ëVÝo¾ý̶۽»n,r‘%åcœ§¶a(9îÞ}B㼯áݪW&ŒˆU"";æÝ¶%ÄOq™£H ´çbâ…ƵmÛ÷ÍzÝÞݬn7ýíº3-—%}ØÏ?8~~<^.Q }p¡umC¥@Îy‘!&H1eSEǪ’b<y‰dÀooo¶oß»qÝðQõó_A¬Äu9$%_ûñðç%Eb Ð4ݦ]ßíîî¶ë3æ’ƒ÷mšÆÇ˜U¬ÄBŽÉTc$"”’»Ð¾ºÙüþ7_O§çãA£CÔ¢å|™›>°cF׷ͺ<;Y¦Ët¹Ppm¿&ò%C@ä—E«ãÍíÍ·m€¡ ÝjÀ7ˋƑÜß?<öS|6ëÕn½n¡ Ü–bŸ?=ØzݳsNC¨¶é𤆄dhÓœã’rQ5ÛlW¯^Ýô}Û*Ë‚)f)fëU`GÏûKÉ‚ýÐ!;‡¸L³É}çƒë»Ð6.ÆRÓÜG9§óé|s» ÞC-Ôk´ìµaÿGÙjv­ÃA‹"Bh¼I©Æ‡ZÈ—")åSÎùðø¹ñF´&׺àkúŒš½ì§AD–%_ÆTŒ€‚sØ÷ŒžIJ2IV¢Btl«M³Ùv·wÛ·¯oßßovë®oœg•’¢Âê·ïõó—7ÿí_ýë÷Ÿ÷§â½÷fx÷0|>¦ŸÙ/ÓrVŒ‘†Á±ÊÓa¼Ù¬j«$"9eiVM»[¿ýöÛ»W>øÚCˆFd*ŸNµjOÇñ¸?Ž1er­ˆÒÚõÝöáöþ®k@‰ï·ëõó|$®?„´öŒèAÑ17ÎumZTâ‚¢›¾K RŒc¸¶ñ˘¦&9³Íݶõ-³óm.§ýþÓæ&>Üy×´t â}¾:ﺡ¯šOUƒŠ$¦ü{uS8liLñççϨV±›Çùð´oy¦àY%kÉvåRÕé PNI .SÊEÚ.¼¾ß }ã=8,à‰˜–¥ˆˆó  §Ý¶ú|Ž©8ç»N˜Ñ9öA‹) Pý˜V*AŠ¥k¼ªŽã—Ô¶¯Å1Õ%jµU©âõÙ-uõ*"<«€"¢JÁŠoyYâ2O9ϧÓñîa[¹4W¤2à5B–‰ Òù²d- 5úÖÎ%%AMV&Yuôæaý÷öË÷¯¶ë¡ïF`BGTr*)»à É‹t¯6]pà}¿ /îÚ?üæáù’æqþøx‰Ó–ÜhÆ_OoïwÎ;&b ¿d L¡ï»¾ƒZÃU8‚פºnŒãå¼:<~œÇ™‹æq1¦àú›Û»»õÐyï̘]Ó´Û~í‘=ñ’“€¦¹PpÜ:·jÂnè½#&bç]Jw«ŽÐ™…ñ#DbË žùfÕ¿»}½jCй<N—¸4¡wì{Àš€k"åÚÝ1ëõKèºÊùÇW­YÉ oœw^³AFcÍ%=}zJ˲[…+G´ÔÜ…R ²z1ó%ç_.nÑ×›Î;,iAE#*вä\$e1ÀÊĺ0¶~šâ¼,]rˆÀ¥(a¡à‚wÞs=$‹¨˜ÕÞq‰Ë8Ï>øÆ14æ®"ÿ|UÑÙ5•Ë®–&$Ä v©ÿ‡RT‹g-¥îÙ9WÕ1¥š 1¦Óéòã¯1bçˆh³é–()%Óun½Þ¬Ãw_mÿø»÷o^Ýw]‡®f¿™š ‚9Tj$ñ›{÷¿ÿá«ËbÓ´<ܬîvëÕ Ï__âŸ9§)Bvà÷ôøøA»®-E¨íÔ5bÙ…¶J°œó/ѵ#A¬õJŽqÏãá9.30&-c.‚~³½ÿò‹w÷·[ïsf"R›ž‡Û›þc§TÊ5'H ä97+ײÚ6xo¥;çx5 ¡ Ô‹k00vÌw«­¹Ðt­hIQÞÜÜïú­'GWÛeÑcZd9³óo‡~E@ÈuV¯B/@à*$¨ô˧±jèÝåùÌÈ /Vpׯç-éÓãc)0ô}ç˜jÆ6¢w!8dιÌsœç”K^réºÐ6TÔLÙI†˜är™—913ôèªH©áÞs…ÿåT'íà+Ë’bª¾ÎsªMA9>ï‰hÕ÷w¥ˆûÿ ­*å[A¯‰â`µû6SUªq("ZŠÎKË2O‹”šŠÌìš\½%9ççýé¯ÿüã§¹˜'´Í*€á8F±á¥oàÍýðÇßÞ~óîáæö®ß®×– \ÕÄ^Ej"¤€ÈÎ=ܬ~ÿÝC.òÝ—wÛÍ "ÿé·orJÿóß>Ÿ.E²Îç|ôÍ¿î§iÞmÖsÊûirÃÆ …–™ùŠÛ»à¢•—étœÇs]Ý/ç˜Ûaûåow›U~ÐU«fÀ„¯ïo÷ÍWc\b5q¡FF:vLšUŠålŠHÌž¸_ ÝÐF7cKܲL_¾zûððfYRÉr»]­úžkÝ Rbš¦å¡òÓçO»‡¡*æ;a"CD&®™yHlfj u[½Yîò9:G7_l¾|õênsÛpxÞ?}üõ±©j]æET‰È{rž_b]8årc.òvÓ·­7SÊIUåy?©Z×70N±ï\e"Yõ¡ LˆEÄ &ÅA)ZwU”ZŠžÎóyŒw7+Ïx:îE_=ÐjE¬ˆRÕgÄD€¦v}ÿ ‘ L%W„ˆh/KÜï`hEÙ;ê“Z£ak%¬†ã’üp#ÁÐû&¸§çYÓèq¹[óøj÷ûß¼~óê®m;f ³š±@ jfR^è¡Ä¬5"ªïÂ?ÿö-;nƒgU~ó°ý?Ý7]Ûü鯇S¬8€Tdõé¼j»Æõ]óõoÞßm‡›ÝfX ÕRWÍõŽ€5h-/Ë|9]NOó²áq^Žc?|ùþ‹W÷7më¡rtÙ¹ZJYÑàÜûÛ»Ó×oìr\J+j„%Ê4.!»ö¦½Ýݪ¸¶kÛ®é¦8 @ V y‚¶oA­uìJM GË¥Ìóùpœ¦¼AÃéøîõ[òd•åËüï÷}Þ×ÛÕÀäÅuåL÷M÷þ‹ûo¿üâíÝ«Î7Rìt¼\Æy;8ßz¨[Yâ*ÚD6Ó«‘0WõSð j5`Ö^²»nwƒg(RÁ=5*ÚÀlèﮎ[4dçš¶°œÅy÷2…Cfº\–6¸õªu„ÇÃÞDË]^o7]×"®íû2•jŽP3»ž¯¢Š€Ë÷ûã4]˜UM‡Á7mÏÎÕ ñá46Mhš€À1-OûñÓ>"qïm5„çý\â¹qñõmø/ÿt÷í—¯6Û›¦íë,­‚·€ÌÔ”Ù×ë¯G¯~fεA«D¡ªï‚㇛õÿñÿîõîo¿þöÓþt‰¢–Œ‡Ý滯î·ýzèú¶ Á«¾è7JÖzü˜J‘4OÇçÇÓÓãù°ŸçQÀ–XžŽÓi†o¾½õp; ½sljµ:dâ ™©±.«®ÿîÝì0.9G½œ¦ó4/) CóþíÝW_¼]}pá*CëBÐäRӾ풖ýx|—bàÆb†%çiÖ˜LA.SZýðÌ1fÖÑùrN)5MÃÄU%(R…‚Vsôkmc/–ôzúŠ  ¹ÿûÿú¯w»›>4UÑRr™§IU¥¨Ö ÇóR”ˆŠ$DbCÈ"©”\„šÆU_¿¨@-¢5þÍŠ¶wŒ†˜‹‰aß·ÎÇ)‰s®ë;=öÇÓTD+I¥Jm>?ŸEm½jƒÇR¦_žçiÞÜÞlwë&„ûAm†ФÖ#õ;,ª¦ºÌñx<ÿòácŒóvÓfWQb¸?]þå¯ïn·Ã0xŸŸÏùÛÇÓ%2…¾kŽÇ9Í—>”·wÝýß}ý¦_mØ¢ýžJ‰ÓÒu÷^š ºF7@&’«ÑˆªTÌ,¥ Žh;´}ãß¿ºý§o.?}:¤œ‡–ßßoïwë¶ñ•™hf9çŠ ²š•—ñ|úüñðùñx:W¬–Ì1ŸÆ²ÚÝ}÷ÍW›õʱ»—‰©®-SŠDì˜ØµJðû/ÏDÊ%Ž¿î? Èýz{¿¹arVÃ|̬ >/¢ ͆1.é|:ãÓ)&}>…(¹”ÛeN¨SKfªç%³v=׿‹IAº’êë# (ªJ×lj vWÂàÛ»Wwˆ¨"9¥y\ž>}6F§"ãyBfpŽs.`@**bDèܵ ¾Î ”¯aíEÁÐ{'¢j‹žÎK.„ºÁ7U #&v®ˆ`Û8Çè™E¤˜ýòq—Ö¯_m½#Çöñã‡çÃáõ›‡»‡‡n¼÷Ž*§åªU(¦¢&¢b 9¥\Êù|y|zžçi³njœ+@Î¥~>Lýù”Ì}ó^dJùñóß>\ p;„ÃiÎi‚~ývõ_þðî›/ÖÛ"VÕ—y•œæe9/¯^¿òÞ×’sŽðºl«R=¼ÆØ‹³òЮ$:ÇÌį·Ý¶¥*˜b$€pI‹Š¦¸àØYˆ¨ÇóþÓ¯—ãátÏ1+‚sˆ€—‹µ¿ýú‹íº÷\¥bŠx-ëm@DÕ®–$í/Ob°j׽[÷ÃжY⺆Ðc}Б-•Ëóâ-˜ )jÉ–Ærù¹Ì&Ï) È™¡©c|è»;ì$k!MfãR†…AȰžŠHXŠàÕ}5ºÖƃ™ÔõmWç劢MÓüùñ ̈±í•œ]ðô]ŠÖ ¶KLå^B"d+ż70k??_†Î·­/ó’Æ):ϯïÖŽYRÊfÆ „Xß1æyÉÎ?}Üÿ¿ú´$“"OOG•¸è·ïVÿÛïß}ùå]?8ç*Ñ fN¤é2Ÿûç½¹†ðAEØ!a%›ª¨X.E¥Ô¿ê‰«a¥HqV*R$g»º8²cÐ’%Ç—œ"¨È²Äª]Šó2MSIe\ÒaÊc$ì¼+‚¯_mVm°œrtu©l€Ì@/z¹R*tNÇ4~>>O9­›ËÃú~Ó¯¼s7½kƒóxÑ×´Èü|~~ŸÕÔ°(£lnÌ1'$FBç{¦b´fcƒ ÆÜ['øõY\(}ÑÚÿ¸Š@G¼ åM¥T.2ãþŒèÚ¶CPUÍ9†AUj—ä·]`¦*²«AVàCá%«ÍM,1/)‹%6$ïvÃ᪚³¨)šiŒT 40í:ß¶,žÈ7?q:õýЫ¦ë‘؇_éó¼Ló Ibœóy¾öçýóñýí«íjã¼oCƒ@Œ R´H9Ÿ¿>}Xæ™`˜Äe›ùÖuìP<榠¯‘#ìÐÐÔФNp!\bùï?œødÞ…mÄn·â‡m¸Y㪫aõºNWF®«UqL¨×CÌ ¥,¢ã¶õUïÂÄDX¤¼ìê0 RLã8§\Ààpœ!„®m<Jf¼Ýö|ë*Ч†í䜋Dª/Ò4Oe¢y‰éépÉEÏmã×C«fmã/ÓR3½ú.x‡—)?~>6M¸Ù­½÷Þ«cPMçã§éüTIDXU `0tÄäSF½Œòéé2ÍÉú®Y¯7ìœÍÙ÷Óõù˜Îã£Ã¢¥x”Ðè›ûæ?³ýêíÍÝÃënX‚ªÉi:/ûÇ2Ÿ³”ó\Ô «õÁê#%rudÆyšÆQr–4縌ç‹H1½‚ µÊŽ®Brì†Þ‘QN9.s5‘ûƳs"ÆÎ3‘åL œb–,¢&z¥®&(]¼£¦qXÒr9jZæËÑODD‰ÛaÅ¡“|~>¦˜]ëÉ1;ž4.ñ4þ:ïšÕÍöæþæÖ;§YÐTSÑÓéRc»È`(-ÒŠÃÛU»mšNÉG£I "0³U›ˆ™) KÇ1¦¦oñ¼¨j¢¶îä·ïÚoÞúÝš|fº:³í%Å IJ"¥–Sq‰¥HÛ ˆzﺾ!ªG5Ľw>ø\ÛzQËEާi·؇Æ#€ c.ŠŽIU‹X)Ž«†DÔ¹šFÔ–qŠ1-1?F&XÍvÕÝß®Æii»†šH©³Œà¡¹í™xYâé4…àÛ® ›‰c23B ­GÄœDõ Ô9ççÃ8ÍIÕº®Ý¬W¾ €4Îù‡§Çç)¥r¹Ä›5¨%Ø·øî¾ÿã×7ï^Ý ëkz¬Œš4OÓéùôô)-SŒñÕ›÷oßçêñ)%—”–iB€8Óå$%ƒ$™¦X…ÀUsTD‘1«5$L(dÆÞ¡A©›HG ÇTŠ'¥”z —R$IM¹¢ 0 LLHÅ@Õ$%FÓ9&ï¸Z0 $9>Á’Æt<.ç(Ôqé-;Ðå|™?Œ‡×—Ów¯¿xV”JÇr8ó‡§/ŽåõqÀе®lÏæŠ€V3+¨* ¨šiV †fb jET¤Êt0ái–?ÿ4 g‡¨/°–+a@ÝU¾EÕ1R[sŽUMŠÍS¤‰ØÌbç*_ü4¼Û­ét‰¹HJ¥mB­k–(1IãëÆÅ˜Ø9Ç™HEb’"ºÄòó‡CŒ¥ š&4m3/ÉT  ë¼ÔÐ;F¤® L´,9e.cQ@DvDˆÎSg@9‹ˆ2é8ÍÓ¸ÔÊ]Ôðf·þò7ßlnnèù´üëÏi™=dËxš™änë¿{¿þúÍúõýͰ¹EDtž.hZÒ’ÆcšG´RŠ,ša{ÿê•cš.GMsœÆœbޱ¤Xä"¸Ê¯€I•¬²$I"È\Ô˜€KÁUë]×úeÉÞ9 —x™Ë’ų˜”yNdRJV±9•(ZÔ €šàÚà›¡¶!´(…LYÌZ²’rç™H. † †È`CÐi‰ãQeÁ%€4èi‘çCÖ>lüu‰2.\Ê® ¦¥øhnBZ4Ò‹ÎÜÀ€LÅ«yµîuª´@µ€¢cƒ4/œ™º€xŠúÃDz[Q¸çêöªN>ê}õbf©¹´jUÏ„Á»aÕ^“îªð °ˆŠg2Fî‡ÞcÌqÉDùñéÌÄmׇ*дˆïÚ®í’2qßÔðÍ¥ŒS±qŽÇÓrº,LØxn›ºðDbjÛ` DPÊunêÄR CèÔTƒ¦,1 Vn˜hA$ï¹mBÜwáx¼<>_ŠèÐwww7»»[dwžâ÷?=|¼h3‘4¯÷å›þ?|±y½VCßk:UIËçK™ÏhÌjL¢!·C×¶^¦çñ,&RÒç«·K*É‘¨FÕiIYÄ Š>Ïé<•, †¡iû¾ÙrsÓ­ˆÃÐù0`ÓõMpsLWÚašçýÓót:LÓ8§$ÅrÖ1IkØeÁ,t)F—DDà ƒgP´EÒ…rëÉѤ,¢õ8¸ ö4ÇóXT:®ÒÝ¥èóÏîæ§Uî½$]A#cQPS0ÍZ _†Žˆ 00:F $10ål9PƒÆƒˆdi QÀ@ñ4•¿¤UÞQ¨«y9X‘])™®BznÚnµ^â3ZëW«c*•Ê‚€¢Æ®ºÍ0¥2MQÔÚÆ§´?\º®Y¯Zc‡@ˆ¯g: IDATäÚ‘XJ«ÊT(¹¤”R–eÉãœ>=E´ñÌ„!8v”sÞ9¦6xfÄîN‚óžkrKÎWãWÛ¸¡÷ÄœS.j!4 SCâ’SŠ9§`ÁñÍvu{×­VYì×ÇÓ¿þð4žÏ yÕÑví~ÿõæw_ì6Cß´­o×®é¥ä¼L—ýcY.U˜‚H¥ˆŠJÉΉ.s6KY˜|`(Y¦q) à½"N9¥\Äì QɪÎójåÙ‡îíÛ÷777«¡ ÎA±íWD‹”"fàò<ÑrnmC¿Džæ|ÈBSžf{¸Ý¾ûâív»5tKJ"ZG󼔜²fS™rY :†Àkm³g L½£çó¢FT4cÛxŸq¸dÈ€!0‘_€0±¬¦&f±@¾¶ET¥À ÚµµNÑ Ð Õ%dC TU`³°ˆþ´—û[ZuVgÔªVvfâD¤FV_x)…‰‚wP²äTÑD½§”²"á4ÇÖ°íº¾o ôts!"çôøxˆ±ž‡Uß6¾i¨‰„Ê?(€UáZö‡KQ{|:ÎsÎâz¦¦ñ÷w»¡R"{ÏâWEJJRD UÍZªZÊiYª£MÛ~ˆKL)—¼O—óeY–¨jMnovÛÛ[E~Ü_þûŸ9îliÕÑ›ûæ?}»{w7ô]ëCp¡'nÒOOóéYr¬MÃ2KÕ !‚äÂàÐsÎZDÔAqIe‰ñ8ÆE0kIb) !ˆ@ʉL˜ :†ÀÐxél^7·ÛuE”И¨ë[ðRWMEKN%“d0AUF@‡ÎphÃÝnøÃ?ýöíû÷ÃjUݼõá(¥”"¹ä’SçcÊEÆñr^f³ ªRTÌ ²@ŒÅ H€ÜEûÅû•3O”Ô%pWÃlõÌJAcï(x4`B2¤UÑ€Šá¢€@Š–”Ð<¡¢`6eS¬x^ä×'ÿöÖ^ f•„Îà–enÛ¾Úð É Em^’8rCcV}8 ÈŽ16¯QaÆŒÁ3–"ž©€=Ÿ¦qI›u›Š8&ïÈ{‡u2NDìø|šñpžžs.∺Á5ž½w7»Õzh7] +åØyÏE FÊæ\Š)Šˆ¦†ŽˆŠ™Ú8.óœ.ãr™â²$QèÛvµYû~ýtœþÛŸ~þù×ç´Ä]øzýÝ—Ûíª Î‡Ðq»6rq™/ÏŸ.ûgÓRDKεIe¢J}÷ŽRÊËRûr&#˜c9^rQPsìüz·¹m»ÆSCâšÕþ2øôx>–K̹€Ih»’$TRܰÝ š !››-Ë4ŸÁ$x—±8ò&’S ßþæë‡W¯Ú¦q„Ää}S ¤f±îH•8ärµy.1NórºL?ÿòËñp”¼T>rpp8G×31hº˜¡—ž8"ÕJ¬€š‰š€)yÂÁYC¬ê&ÕbÀhiãñ‚çlf†HJI UÁÐ(8tfLZŒT¥(˜Ùç“M‘×CÚ¢R- ‰À.ç¼÷¾¾`K5G$1$kÏLγwÎ;6ÉZ[ø”rhüzÝS*%xʹ<=)–®óÞ;³¥®²›Æ3;ç”eIBDCëƒ#Qk»nGP–EÚÆûƵ1j•{‹†Æûà+§ÆˆÝ4å¸dUú–Ú†ÆËTtŽ9å"ª©(3 «¡Ym.QþüÇûþSZ–m¯üªÿöÝz»jwä;ã.ƲÌûå|X.g0YJG†€Z,k©=/e05<ÇrœË’Ø·]·¾¹¹]wžÄJlº>£G”m×mîßÿÖ·Ó<ŸÏãx9é—_?>ïOO—4–ܶK.â½GÕúñD¯¢%¥8Mãñ°Œ—yš«å<Š *¶mOä§±Æg8ç3Õ³ ˜HU÷ì*¸WCWDî–øæns§Ÿ>=~z¾” 1ÊR€@T†¶ƒoC«ä6U˪àˆZ'Õê3 ¨!ZÌ ¯Ãè«:¬ê$ ‘•ëʆ˜¨!H"©TgÙeÑó„¯n€*|HUÕýé/ùÝ7¿½ ¶m‰¨ë{缚¦¶õ]Ócð¨jÄÌÎCÉbfÄì½# &E½wjPQ2Hx—ËœSÜSŒ/C 5sLCª^¸~<¶›~µjr*ÆL€`&ã(昱í¬éb¹¨4 Vj(ÆX˜‰¹à‰ÝÏ`–r*u‚e fÁy×v³òŸþÇßüõYKº]ó¿¹ûæU_͉äZ æ|<ާg-I¥T‚œe!+U)ªDÊI¢`6R\5ÛæÝíîõýmË8?Ë´GI ¶Œé‡ÇShüêwßùÐ4mÓuÍv=Tùÿ?ÿò?žÏÏ)FËBÙ¶à¶àûäµî†SCpƒ–­aJâFeE¬9fŠL¬èÌŠaAª¿sùÀ"T@ÐPБCa¥8$2°¢2GSe¬M¨BAQsûyüåïMÓ0 5mëƒÏy&€¸d`&3NIº®Q³’«£TMJÎ1æón7ŒS<¦ÚôTˆŠ(­Vü¸ŒgÇ`öÿõ^Mr%W¶æVîG„H  PUd‘ͱî³™ÿÿCÆîífß&Y,"Ä9î¾Å €hæŽthüéI ^6Ù~øfþ×ïov›¤µ(qm¶|ùx<´5sÁR-©y¨¹K \+sÞÞ~sûp{usµ›Æ”RꙌÖÖe=ÔÃÇhkѦµ9òR+€ŸK}:›¶$‚ÝŒ½ ω‡~’™É Êr^^?üüåxlµ®ÕJÓcmN誺<Ÿ%¥t³ÝÌ9åÞ/#³™ª©º¯ëúñÃGb¸º¹ÞÌ»! ]•ïÝGDó8|{WdsnµßoœkUA @§”0¡Q y>µ€æî¡î ™~§Ý:˜µz:?þr~~ì**P¤—¥µûôåÙÌ‘!LÛEaæÌ”’ ¢™ºvAW-Ç/Ÿßý|xzªk ˆª¶ºµnŒb@IWWW¯^½¹¿¹ºÚoÝ|šÆ­ÇÇÓËñ´<®'7ç—ù–Ãñøpý0›iœ° jñ|ô_>n—¢Û„£{W@SJŒÎ(ÐÀBks€H$›™¨µHÕ9S*£!x°™#:ŒÑ­*IêØG`èpFGo=¼üf‰0™6Ó×eÕÿóÛÏãfzsÿzµÛ]_>‚ º.€ÁÍ‘™ûêë’Z@H@ÃÀÄIúK uS&ìœÐ<œa-Š"ìêý Ä. É‰ä‚ô µjÎî­iîá„hÑ„E†Àó¹2“š¿NÏǵ»´×RK3Øod{#ß¾Ú_ÏâµXkki®jêFÂÌ$Ò*œ ¤a¾½»ùîÕÃíÍÍ8¤qÈÔÚîæDâî=üb©­íôô¸,+1¢ûù\´êÉ`„XKQm_ŸÎÎŽá~t;a<Ô꺞ž¾|úõ§r>© ¶ð³yƒhæiws{uµ§í«‡‡ýv&Ue&aÁ¯(sï JÂ,ǦJ©„þöòåÃóÓ y;ÍcFXªüö²ùrk¤1H  fY ƒ‹‘ô€#2 &’5°F„ƒ¡b h˜¹c@–G'ìatMPúÔÿ"§ ðFÇ K©ÚЫVBDqŒ]MMµÃ—ºü™™‰S×µ˜©5-çÓñéóã»_·£‡§,kÓ“Új€Íy»Ýß?Ü?Üßl7“H’<ä,ˆýî43"ŒtÈy3lÔ•no÷Ï?¾?cBx.ëÇrÈÄ3K~n¯ß—mNÙqX€¾rD‰˜‚1i\ì˜}/ጜ„„¨ÖGªa6°îGØMÔtùüL‡25ˆ’š‘#„Atœ@"‚‡p¿«Iœ¼×÷b’†ÄÏëùo¿üÓ=¶×Û«Û›OïÞ÷a“¤ ˆÃ˜;.ªÃ%PÕúD£ïDÈ<6›¡+gµYÐeð$LIµ™A„š{À9%150ï.@÷`Á$¬Íˆ±ª&áµYOmt‡Öt]µ¶nQX= ª-«zø0$U @Æ»«éz;ΣhY¬®îŠ È1¸›¥<îþ凷ww7»ÝF„“$ffºÜ@­Ç§_þqürÊ9 »‡{yû†rêPeˆðUçQª–Îü¦,¼,¥«’$þ]npÛ¨©wõùtzþòôáÝñå ÁE¤x<×c3ÕJ××7ùËŸîo¯†œRJ€„Ä9åž¼ÃDÝ£qùã!”¶ã¾ZkzØ^Mû›íË—C@ÐÌÄ]ªÆµÑKÉ8Ÿõ«¢P èJ÷¸äS¢(ö%²YA8Æi ËÚ5º=„0#6t3Н‘,èq1&qßF)Ý_Ñ/¶Á êQ…rú²Lû!O’“й•ßýòúúþîÍýãçÇRJ\–6oÆuiÌôu#›íÐA š‡áNL¤æÑãf.ÂØT]›výëñ\¡©Öf9Yß»H„2!skžÍ/ÌnV³ó¹´f§¥ ¢{4 N}o„PK[›ó8¤›«ñÛ7Wc òRZsŒJ±¶Xª©aÐps{ÿöí›ÛÛ»ÍfÓIUÝŽŠ—œ†€Àtƒåe9}zF@X_²™æ»›>ú½T’ˆH´¶fjaˆA\UÏkmͦQ¦qä¯à£nˆ®_jU[YŽÇçïÖ×z^(À ‹ùçózªªŽéÍÃÿüËoÞ¼‡„àDâqÁð›™0áÅ© ½ðw7DœeBºArÓø~zúíÓçZB²»›—NšÓ˜ È<Â<œ Ã5F@—‰#B×C³actÆ—!>y3@¯zŽ`±nÈV¢¬ÈÝž€}3áæ®FÔmFHÃí>sÏÓÜU^äÉ‚BË¡ RÀDÌ´¬ëÇÇÏ×wWwW¿Vl-†H®!‚9‹»çÔ£o{2'%¾`(Õ½©1SB€j_š;€_Hæf^›Nc&"¦!3'J’YÄšv‚S3%1…™›Z)-g @D6‡ã©–j)±šŸ‹BiýŘ{ &I|³ŸþøÍ~3r ¬µ­®U- (½yuÿððfu•s†! _X,L—ÒþR‰w<&—4R¢4]ËÙJ53øjŒ bÆ  ÉÝC£u5ŸÁv3 S¸ý)7Ô—óR–óùåñåóû¶žÀ5"€°h|>¯ÇµL?üá»û×?oæªd!k†„M-ÜqÈèaà]ÄI(ân@´•}N£šÅ=ýáöü׿ýí¿~úétWÄ’Ò8o$gI1_l©Änfààf£¤'‚æaBÝmçA7LD 'µæ¾¸ã˜Ö¢KQ@z¸½Ê)€6íi:­–²,OŸ>?Y=µR„Zø©Ù§óú¼ÔÇçRj|ÿÝý›·4Hƒ(e9•%yºáæNž„ˉê\Ï•D`”H™óÿûïÿ~{{ýŸ?þýçwï[mCñ«Æ×<³ƒô“‡ Lhå"û"æªÆc’9¡z]kUBÌCÙÑ".#!7CBîn{wDh­e ‡¦†Ì<Š›z¢BÈnNoïy‰.=÷å!†ô@ŸœH‹®ŒL2¦Zªž‹lÓù¦Öí%CbHIzé²™²$þ»„ˆáЇŒ€Ôc€¹?t‚’„ÔÔÌÇÌáÆ—® Tm9D,U…¹[=±£ÖÝQÕ7ÛQ—v^j@Ô¦kQóX«@bÊ„ê`=»pÊôp3åÄ«êR}­~\t-–™w#°-íx8œ±‡ÎmÉDW×û«»×Û«;&ÆDØSD…yš7cÙÏHãÈc&Â~†éÒÑöJÌ—¦Å¡x€ëé\΋î÷ûûû»”³Y¸©¶V×óññËáéËéå™ÑÁµ_ËKÓ§õýñü|nÇE[y?æÛáS}ùåçÏaQK{9FIh¤ h;O7ûýwß¼¹Þo¶ÓˆŒˆLáz‚a_  9ç<äy;ŒW¼žÖÍçúöÝš…¹"#EX2öšÑ-.  É”HH‚` Æ°€—)lr€hÚ«RN”‰íTÉ€§y ZiȈnîä€èÍ"8Ü„RJ4à܉#ŽÖ“~E„!`ÚŽ‚¤«–SÃ1ñÄž‰hfM°ÛZ!ì¦!'ʉÍ\„ªZ3ßl6Öéèá(Aî†èE½»{¢«¥™ÙÃ/^é ‚ÚT¿nÿÜ»öÌ‘0'B ¦ª˪ëÚšZ„#âZ:ºRêÛ‘°f2Çy’ïßìvSnO§ú|VwÜNÃÝý|½›÷»Mfd&–œÕñx^ʺžŽ‡ÓRö/Ç›‡×óþz"†p&œÇÍÝÍôz{I†q¶›¯3T ¸ e|]ªQU…ÚN§F”þô‡o_ÝÝ×R¬6kí|x~|ÿÛáñ‹»#D"\«~>­¿½œ?ËyQ¹þf3óù×§ZpBJ¤ƒ¡  Ùzúr|:Þ>¼úöõÃ~;IRNDx±÷  ™Hc^ÝM` š©jÓ”E™E‹9µc™ó0ã$ÚÒ@=~²kÿ™ˆ€°ºaÂËõЉe>{™–¹X5ƒ4ȼ½ÙËÓÙÌ@qÎûïnjµúR8`© @[):M©Ç¤Öª9õB–“³xĸ‚˜™#7sU—D¿7¬ Ì,=¨;ì°6%¼ü¾Ü£©«ùZÛRÚZ4 Ôâp.K©ÓA7ó’E ßÝmò<`ÎeQ ºÙ ¯®wÛiHLã4óÖÌEˆp"Ú̃¶æfÇózxþRÖeÜÞ¼úöí8͆Ä@áIòÛ×»»+‡Psf!¼øÙ»ý#°Cª¹!©[] CÞ<Üÿå?r9žÎ/Ï/Ÿ?žÏËéìf†µ™|:®_NåéPÏ‹QžU?Ê0ªŒ|µ » Â8sB¨ŸŸ–õüÍëWw7æ„L„®ª3 ¡ffÆHw›k?¾ØË*DÀ”º4*€ÉºãŒ‘²ª) B47‹³Û‹ø’ÃÈ1Ü­*:,š S‹Rn•$ifn—yB8 !Špb¡!IÎÄwá"¢»š«œž×œ¥®‘"‹Ls><ŸN‡µ'ÄI¦|;ÕÒtµ¦ÆMÕ-j»Äâ®k5µaHµµ¾­éÒ3KÂ¥P €ªÓ¤üô¡æõd)1D" SÿàÝÀÝ×ÒT½Vf<¯z<—è?îÐc)<‚˜†¯÷Óºª“÷»<σ 䔑˜˜Ì£ÔŠ€@­3 ‘ARÚΔÄJÑÏï~V·×oߎãµ2‹ôN†pË {IgevurD¨<ŸÖcmÍàíÃýÿóýËnšÏÏOÏß-/O­¬Úà Á@¤D<—öTÊã±OzZ3o®¦Íõ(³ £Pgøw2³ªGô‡U˜º„W„…ØAW;ÿøþצííë‡í4а»b¿«¶Òjiå|.åãg9-Äœ »ž¢³…‚ˆ8‚-ÜÔ±î,Ì쨵ÌY3ó”û¤H ¨(ªƒA>ÄØ8¹31B@OrOÀÝ Ø X8‚Ìå ç/“Þ€°p7‰E—s;Ú™7ãf3d‘Ýnf¢ãa-¥ô¥êþõ5µÀ „¨“.ÇAب©UU7ŸÆäÓЛÍ1N§µ»=BMÍ{6¶¦kQblͦ!1ó0ä¦]ðßZ/Y£s$—Ukµffjͽ×`jH㔾ûæf»Ó‡ç³ÚÊ,î.Û)ï6ã@MR’HŸÛ8¤ívf"d‹$€ûŠíåÓË4¦W¯Y±cµ11qOí(""ì’a7žÆ´iúÛ³®¥í7Û¿üñÛyÈŸßýüåݯР†#¢0zPó8¨bõCmOk]Š~y)¸}5ßÞme`d€DŽaapÙEçqÑWýLÇ   J´(ÿüð.%ί_‰°°ô}j‡¯hع,Oǧχ/‡Ï_ÆŸžîjµqœ ùñ3oM‹+g‚Yèì¨äÁÍ;C……x"È\K[Jí©=Z•³¸À“¦Æsä rfÄÖtá½û#bÄî„91ï&±ÀÅÛpÉ;ˆ€›W[÷Î^ÃzjO§CˆPÓæj=§Avóæa{3Zúùÿüôîç_×eíÇûk†]Œƒün}9®ýzO›Ãýb©ÅX‹ºGk>9™I„´9"·fËRÍ£5;/µãWûP³T­Õ™ˆÙ©vªb¨÷,™¦éõ›ãôÏOËZKmM­u2˜ñ»‡Ý~;`Æ ëiéa„Īƙˆ% ì ªš¡µ8?·u—ròîtÄždK¤rÕ7úÚêù/Á£$ IDATðüòå±.ÅÌ< §”ßÞlìüòÏÿü`µ„ëfHˆDQÔU­i…µÙéTÇÌÛÛiÜç†VŠº9¢‡!f¡¾OVs Ê3vyF÷G4³‚‘FÉyæúó‡7××ó4!a|…5UÓ—åðîéýûçÏe]õãñíÓŠA! aÞÙ²Þ©X4‰ 1€ƒE8RsÁËtw˜'ciÅÃÇn™wP´fí¤Cãý0í‰@R7p‹PÂ%ëST¿@õ(%,t½¡” «™Ä#:y ¥®‡i§¼ÝN Y›­kµˆõ\`·Ÿ¾½{øãÃ÷™Òë»ûnÿúÿ}|~˜úиûãÔ# ™zU!y8—µ´ní¢µ¶!‹;ä$Áˆ¦ÞÔý´6óeÕÖšöl7r¦†Y’ù×ÕòÊ$¾üããÏŸÏÍ´.UNk©­Ò0v!‹eq¹›@B:;Ø…ÉØßuXtE “™Vs³ „9Kb$ ],«Üæy‡< ‘¹WmÕ vCºŸùÝ›÷㞈²0 Çfët³Û×o³Þ¹„H0P@Y lnö»ÍÌ„j¶ÖÖŠžŸ\â*m§4ä”§aÜm7·w·ÿù¿þúéý']K«Æ„ÐÜÇÜ£8 ÷¡ÇS±pfDG·h`kQˆî²5!w0ó꾬­6óÀA¨´®§†N—eìðí¸p ˜Øƒ…‡y8çUOOg ÊÃp}sõoÿöÛ‡ëŸ>üÿë?O'+­™›¢¢b¬Èë¢?þ¶Íùj3¾º»Ú΃€°’µênàmõ§Ã4#Þ" £w§ƒÓ…ÅgÚt9Ÿ?}||÷n9-UÛ§ÃÚ,^oÓ<Ð ÈLýbíËžª/¥ž‹=ÛçµÍ` Íõ4L ªi[»É‘å!!w2>õ§ˆ<¢‹Ñ‰j5Њ y4 ¿|üíöúêÍý-"¬µ|9<þ×oÿt|jfáîksµŠ˜“ "T:õäB`nn€ìááÔ{—ii ‰ !>¥aâ±P Š@ =5 ÍJ¯Æùd&HUNÂY$‰B ¹™bfJI _Íi·‘œ©oJ{&@σ’”åtXzBa[*LC朄h?O4³n¶¾{à%^;†axûí›Íf~÷óûüíǧÇ'73C-f—>ß̱gº.%"ZšFÉY´ù²Öumˆ˜3ŸÎe­ÖÔ…Ñû¹%Ê"­»ðB€# Žk1 º½¿v×§¥ž×œ®v»««í›×÷úáÛû»½0l&9_~üÉ&_#|-fêˆA,K”rjÏe@AÚä|w5Mƒx3m- 1RY–×w¥,›«Ûi»íÁáàî»Öu=<~~ÿÓß_çõ¸6@¸y8 åÌMlæ¥) SµçSý|(§UÏ îÏŸ„”çœ'I#§‘‘"€ðRˆæÔ=˜ªnjn€ˆ„ÄÂ5‘0¹·ÐVͪÿøÛ/›y¡ÏŸþû×?Ÿ«©«Yi^m 4 O"B„è_«ßK²¡S$!vŒæîj6Œi@¢>Žt†jÍJóbƒñd<î2f¡`*Ïç¶T`f"$ ^«¦€4&bŒ,|»£!ã×  Üþçž%Y $Q>=»q3 ó~ÜÎiæaj® RfÇohž7÷¯>¾ÿøÓ???>ª!#˜yNÝŽ¥ê%Æ‚ÎKS‹’”‰ºÅŽKéÌhΜ…ÔÀ¿æ"bSO¨ó˜­4d“œ§Í^æ+Ìóͼÿã~s}µ»½Þì·Ó~·™æ¹‡ƒÒÕÕŸøn]—§§Çˆ6æN‰B"Àˆ_½ (<-˧ãi3pb$µÝ˜³™—VùóçÃæãÃwÜÜÜsß’»[Y–åðòù·Ÿž¿|)¥­êYhø B'¤¾Ø.ªEýX´y‹V/.)o£µK)áaÑJmEòœ:xÙÔ»kg/3vçfD²¤‘:¹_„.°Ùˆ5ª¯Qš“ÇOŸ§d¿~ùðùù鸬áîꨑ‰GÎsæLDÕÐ/n-‡D !±¯©“DÍÔu@Hĉ+ìò îÍY@iU$‚$žD™E(D†ëbÅ~}¸‡#íÆôê–†äáè2ÐÅ÷_¥òïÿþ‡ææLåÜb ×ó~›gîbÃŒ®ÆDnÚ•ZªÍš&¦›ëíf3>¼º}üòøÛ/ï>}øt:œZ³¦Ú7õ)³; jÔp¢‹,Bûj$"'‰ˆ¦ kUuG‹œØ1 ‰¬†Ö€†ÍÝÕü»zýæÕëW77×{&/Šh`DIò Ì„o^¿¤¿ÿãŸ>~+ÃÆ«ÚZ´Fæ$ýK‚‚8jÔz†ô¦òrÎÂÌ<.ëõ4Þpªëy¨{¤QYu]O_ž?}xyz,M¥%¢1Ë»ß(, Vk§¥=ŸÚóÒ8óîúûo®%Ëáüüøôt®Kñꀡ ÚªåÔÖc½Xå93'f '–ÄÄÄ‚ÞåÔGf&„ÑÌ{8x ?Ÿžžþ÷qÞ¥¦ºœÖRX„î÷ã«›‡*JOÁ¹¬n!1·04°Þ!¤ˆÚ” ˜'@ršB4D``lîaaÄ*Œ/k"é““”èf‹›LÿüBDý qÒ›»|w-¹ÃÖ£ì@Ïν¤'°üëwª K9÷ûðåptóLI €ššicÂ8[¸Z+u]L‡Ô”b}};]ϯÎçåéùø|X–ÒÖ¦mí†;̉;7Óá‚sÕÖZ'emæÞ!²ÈHL)Š Ãf·Ý]íïn¯®v›Íf»ÝlæaÌ"" ")ª:C®¿7Ùnvß¿Í×û›ß>|üû?~:ŸŽÐŽCî.ç`îxj@@×s @50§PFAV§«{6¥¬­ÖåxXŽÇÓÓãéðÒJé*Ûý< ÂDçÒÌ£Yœ«?ô¸š£lv7ÿòçïþüÃw×û- ÏÇ_Þøñ×_~ýðñp>[XʆäÖÜ-d ɦÞÖZŽ…„Iˆ„ò$³ ,‰yàJænÐá}A@77ôu]Õ´x(DC&Ùoç¿üð‡¿ÜÃý -8œ¢ë­ .4ôˆEë¶GÐ"PXf Ej}ähÀ¸¶ Â<ŠÚâÎK…S%M"|yÚ 7³ä_ë.†HÂ÷WãoÓvCÂ_cÞ û‚±¬!WÃDLu˜J-UëŠçV–óéëˆáîZÛzëÑjªªÚ‹6bîÁén¦bÞð~¾¢o¯OKýô¼]ÖV«®Í-¼¶ p@ !b€p!ÉÓv{ûð°Ý_í÷óýÍv»§i†Ü#Ä!Â{7@ D‡_w:sÇõ÷bGÊyØl¶÷ïÞùù×_žŸŸÖåèÚª9ª…wH¶:%‘,ÚRÌUÂÜ»u:Óîêêj¿¦<ÏÓ8N˜“ÌÓ0Ž9'îcL)1 ]”iîÄŒá®î|#"u6[÷,Ä%ïa²ðÕ~»ýþíýéðòùÓû/OOç>5ëk‰iòfžIr™”®:>žm]–e]ÎçµG*g¦†~h@Áϵ­UO‹~>Tàáá›×oÞÜnvóýÍõýíín3ñ%I",1nÆœÓív¯¯·»þõ§{y>B5 …P(kUmÎL” G$æð‹°`ìÖãK/D@T¿0Ì„Ò (´›wßï¿ýó÷oOí¨Övãæzš§<5÷yp¾ªì="FlŠ.Šajm’lÐé|Ø£É"´¶žšLÑ¥ƒ5Â3“ˆä¯>ï`05míþÒäÓoŸÞ¦1" é&ãÛ›íÛ{s§ey8^äáá½.!F —ÃÇß°¯utÃV àjAˆ©‡<€;B4³Ò~ïB™¬™{HNèÈÍ@¤iŒIò6ow»{bšÆa&t³¾È…Ç!‹H‡¹æ<ÄEdMÞ#í.¢µŽÚt/ër~yÆpÉ9"jiW÷yí‹Eq™Ã‚! B8ä„ûÍ_ß\Ϧµ•êªe!­êÚèt:÷D®¦ffª™¹ùjVÝ 4;žŠY<Ÿê—ceæÛ ßoèõíæÕ«×»ívž'„@ è±Ø=4™(!^o¶CJ¯oî¾»øßÿçoïßi¥rϪb§ˆaQŠ 3RfDt mÍøƒE—W(¡¹‹3Q8#î§ùÕÍõEbJÉÍ[Ûo|H|²r°ˆn´‘pÓêÕlÒ¨D–„̱)Z¢˜[‡EQ"Y8‰02™™V][+Š0d‹ ¢n2|{sýÃÛ´Ûô©*"v×dÐEvݤKh‰nÚ·&î^ÃzzùКˆt7Î8 1 "HKµãÒŽkóÀiæ4ŒÓ¸¶ûÍf³ßoò0 b÷¨Sf2Õ®œrsIIºo˜XDä+9Lö¹âEL¦Z–óç>ÿòó4ÈöúºæyãîÑÇæýŽPèØÃ €¤Õj½€©k]–õ|nå||yiµ.k!ˆž.Ô[К¶¦=£µ™G0C hx@H€š×ÅNEO‹65÷Xª9B(mýôáãf÷Ó<çc"f–>ÈGaî}¤Ín³Ùóõ¼ù¯¿ýø÷­ÖÑÐ-‚RNiUsI BNÁŒfèýª¹„¸uϬ…77h<àÚÖz÷)Ô½*€LB˜¯w~»Ãs €àÞ¿-€™ ±5Ó=XBAó”ÐWŠ¢`aMM- 9]z3Æ‹]Ö#Â[mK7›»)çÍnÀÇÁê˜ÓÛ›Û¿|·¸M9ÃE"è¿+ŸÁ¡#F @ú½Øƒ•Ã=<šyBÀ{OÊaªjgkørjk³È™w›ñæöæêúvÇaš$efÎCî2±N”í¯Ÿ 0œzöC@ÄÜgν–vwì0ùžLâ¦ëéôòøåão¿-§CF¦¼Ùó¦µÖ¥Æîfªnn¦¦M˜‘ÈjYOåt(kA°VkY›$v³Öj˜“}]˜©»×âËZk³ªæ`á@½™vo$Ý—ÎÈ#““™q¦p€µÙ‡—“ýóçRÎþÃw ßÍ»ä˜U"º§¯7D ûy›ßæ›íÕívÿ׿ýüx~ÆŒ"Ø{&¤té‹ý«h „Ñ=À ÍþMè ç óVîï®6›)%!û: ¿|U 9]oìݘš 9àW8•#HR¥ævjëg".€ ™É‰zø‰ Rê݈ €èW÷AÌ‘˜(m§ØOÇbùöjóúf÷Íý´ß¤1sG¶˜5wf¦ $~÷ D"‰›@«­N˜p3HeaˆhÝàÌtZÛ©ú¹ùRôjN·Ûa;¦”d›‰ÛÑ} 4·S74×ð!×0 ó¸¸ëz¾¦$«XÝ/vd$H½Cbïÿ&´kµœ^ΧåðòbáÛëÛy»7IÙ[;¯Ë‚ÝÍlµkõ|x)çS@¸©2Ek-ÜkS³Hé"' ¨E»±v­¶”jfM}Uí¬ëM©÷z4\#’]°gƒPfnfü»®õˆ¢õ×÷ŸFˆ1‹äœòá_»¿ ÃûçÑ­rwwã0ÜßÝÿÇßþþîã§sY@Ü!$·¦Mé"dërZ »$Ib †ê‰ÓÝÝþ»o_½¹»}{ÿz3„¦¦õ¢”„4Oö殽{ô/6ÁqiT/ª4S[Ì©À ƒ#;b!xJR!;†Ì BÕ‚õ]¥GfBb`ržæëo÷r#Û‰“`·>_ o53„ øµ ¹ì"\×6$F Fµ@UEwkÍqNœrš§(RjuGÏLÄ´1{o¼ñÿ‹7žñØý)²êéEâPí^(:Ô½ÅS8x$ p2á°Jq%óLn= M½‚åÚ8œt÷ë6άóLÓrŽmÓàWOƒÏšÅYتÎsƈf!‡æ¾<óåí|›çY%3};O3 Ò Ûêö…£H-" ]#áÛíz»Ýn·ûõããÃýv¹\öÛõºµœü¦ºSJ$æœo¯¯Jµ__ßÎãøûßÿû—_~>X]&+¨‘,W' 47/VƒÅ”’×õz¹^¯>|þøô·ùé—ß~þù·_ýòüvœ³Ò4Äæ—ëþt¿þôù»‡×q½îW7„|ûú0÷(geÁ 4ëœÓbßì~µë%>Üòõôc6\)¼“æîHÒ2‹œîYi¨9…wÒŠyL×êŸv‹À¾íŸÜî—øóÇøôß?mhe1Ü— XÎy¾¾œ/_Þ^¾Ìó ujGZì—Æ³’f½ãҠѹ@}Ž#ÿùòúpÛ†ÛkÖ¯ÿóöxß?Þ÷»†£Y›{Žªó$Ü2ë<à^rC»l7Ÿ™Ç9Åd‹°"3ç™1|hôY³ãYj?Ö–•ocøØ6Š|;Ž¨Êªf¯άcNê˜ÇqŽˆ}Û|æ1óõœÇœ™uœéao€¦“¤àVi!Ù1€mcÜn×ëõúôøá‡ÏŸ¾ûîév½lû&…ïó%«leu]•™WUKçyºÛOýóóóKï·«”RÍ-s*`€›{xG1݃գ#6˜¹ÍÛõr»^?}|ú©*‹U•55™™»ìûe¿ŒØ´4JLq©Í\EþåSMuã~Ã_¾·Û%‹çþcþúŒYFƒz6”öý‡»S-#@nU¶ðë~ùôàŸî¸ >Ýðx‡ëöð0ö½êŒØ=†^7@æ”DcΓÌÊ ô„³&²çkT¯Ë– * ¨Ù´}0Ý_3ùvüvž,‡à¯Z3»(ŽQro$%¾ºe÷ˆºŠªÎ*¢¢@–ç™F SÀf¹Û~‰3‹•Îpâœ9Ë|Øùúzδ"Áœ³jQ¿çœ6k#ž³Ž3Ï9¡Ü2‹€ŸYG„Gì—4ai—m\o{xÜn—Ïß}|üp¿\ömŒ± ît¦˜G•`×rï ÌYœóâO‚ľo~Ù·ëíJrŒ£‹Ü"­$'¢ˆèuºæ=Å!ˆÀzúŸ;v˜“jâ#±z¤Ž”#\ÿÐÌ ï’Ÿž™½(¯‡ –s×Ôk†ãOŸýûú_>û?~žÿõ¿ç//u¦’6ÉRÍ ÆL dV0ë.âuÛžîþéƒÿé»íÇOñឨdÚPŸ`1¼4×Ѐ ±BÍ9Ï3IÜŸ.ׇËÃcl»y,[5÷ÿñïÿöv/__Ÿ¿~}ùzÇñõõíàÁ8óÈ—£Üááî|y;PX[|„„É4Ý„°re{fg¡Š8Kz`@ÿ¶¢(Ì³àÆ·Câ4b3 blƒ¯oªfáYg¡Ï¡.|„O"iЦ3ºœ5”Ñ7 /Û6Æv»_I\÷íá¾ßn·ëõ2Æ8Ïܶq¹l—ml#ÄÚb ¨éAšwŸç @“WŽ°Ö ÌŠ)ù3MyNww³Jv."b,‘Ÿ(Ò5clpËn E£“ÖyEV¦úŸ„±èö‡R5lóaÀœ“KÒš„{èÿm·9¶ÎUÔÉ1•‰ïBUEƒ]wÝÌ.»üpùéOóçßì×çüí¹^^¡¶ÙqÖÌð͈z9æÑÒf4`þpý>þüÙŸ¶ÇãzÕj„±–Wz„ÅX¤“^–,ÖÌ·¯_ûåwûáÓOׇûõæ±ÙºýîÄøÛ¿þ$06«4‘ñåË—_~ùõõõõíÔ®ùõí|{{{=Ž——W–<&Ϥ;b˜'4Ç6< n`” ™BM@„R ÌbÃ.æY6Ik@Έ€á•i†8g¹;Ã݃f#< ™m1>>}¸]/î1¶±ï’û6.ûæÕ1¶Ý̪¦#<Æ0ëýD‚MÜ ·ÅMÓ•òX ŸxH¶ZOŒÒeîž™¹G 5ZÜ4R^f ˜¹%bfckAPÉ%N·È².ãqÍø˜IàÅÝ»T¢äÆ8ÆèÅnž¶8ŒéVTÕ¶…Y6ÔÍ÷ÖÁª…hæ±EV²ÊÆðm³ûݼXçÉ9íÎ<¾|yýý¹29+ÿù[½¼‘Ü6Ü/ñùqûáÓxzÜ®·Ø·5åfd ;_”NCe~ çy¼~ùý|}yüüÃõö°ßî.*fi0{Õr*cÛÔt%ðôpûñûï¦öÁÕ1x~y~~yþýùùõíØ¶È™ÇyfeV9|ùözœ3 Æ6´¦B³áò4ûæùzfµŽ¡°Ç0sÀn·Ë¶_¾¾ž—mxøqœ0|¼_/—]«G#¶×ëår¹H6¤öSq˜U¦’ÝÛ%bÌ™çñ –v׊ÇR¡–Iû$[åE  ÚÉRÅ\š% ‹‚2лÂÜÜBnÃÃ…à«æÐ&™ˆñ¾qxÎiUÛv‰.#~ÅAMЯ©²rÂ1`¦Ô±4xxÀz:TíbéÇ@‹å…ç³µ¤¼óBÍžZ"=B†•q®Ñxr|~¼œ³ÎÌó<ÿò5çiUNú>ÆåºÝo—ûmìûRPÑ—t+x=`'ôŒL3±{Äýññéû÷ÛÝc¬Ä¦“m 6î÷;AÆSæyŽ1"â~¿ÃŒ%ûx<ÆJQĉN5~ú'æIDAT¶>þ Áѧ2¶MÕ‹Ûö]&¸_ö Zàñ. %FÛ¶÷§÷‰ÑÌçœÒ0{ÿRS%‘YÝ˹GO¢¡Â-b,Ê-¿²9Ž’Lc•D²¨)¡u4ß wªVUK{ÂôxfV4ý€:fp·1B0ðŠ_ígÓm sI&*wjÞŒ~ó*ÃÜ+“Fk̬Ìt§¹oÛîݱÓM±mÛ¦7,Ж‹˜Òi%ÌXððœS@úUÉEÐ?©*ÿ–Àt¢J.B@v<ÒœYh’õÒÑ•àJôµS=Ÿç°å/çïšJ·´>Èz“²é«õa¾P*ý)ö«ris÷±Ýž>‚ÛÎbë`±–õx7Š7­6ƒûÌ4Æ9 @D¨ó!ŒIQÈ=›ÈRºf&%c]Cè“­>GÙkU‚ÜwoÄÊ}Œ‚è]ÑJVК)w—Ej‹±Ø€1+#Ø"ÆÂ.;õ^’íëŃS o¬¡ }Á‰kL]#@V‰vuëÝTgžšÃ˸*wó1Þï¼.ªØ¥Hš7TgZkª>¾»P°ˆÑɱYÈFCº¤6ÏÉLR["èÞ`m“’5ÏWRÚ©V³žS snz´‘Kê]í&BY <úï½ïÒJîûÊ ‘‘¨m¹09Êo÷¤ŠûaÖíѧ…z¤ÛáckÆ‚ZÈTŠ´™×!'bl$Ñ«³-bë'4›çìs^*¸àAÀÍ\Ã?\¢"€,S—ïÛMu¨l:-4¥dZ]¤ ½žLÆ0°J4׆ Ðm:¬Ì²%Š&SnüR‘AR„ÊèõõÑþ¬û¦+¤x'yë¹ÖZ V”è¦_ĨJCóƒ9¸ÜC³%]k³¹ŸmF)Æûûò^øü-š… H˜D4NNwŸ™l½oX,.K•5Ê‹w ¿XѰC¨âg?¶Kü'"–‹é§×Œˆªš™YiïÙ.âÝÅöß­¡—6u%E€ûr^R…[ßJÓu2:¬bú½Ê+«ÌSºU^ÅŽ®rc0ƒ±í£§Å<6©K˜g 亙>Ñ@—çê&%ú`nQÿŸ #o±´µ²¾h×ÂE¨eeM«ZxZG0Ñ3 æ ¯+îb»ºÙÙ9;nº§ £Ž;j¸¾£‡d-W=ÕH…HtàµhûkTH´Fu1 dÄp)Rj_ªb{ÉeL™© ©z³–··e(Ë´ð3‚L# 6b+W“O?о_}7§ÑhU)oU)CèøÛ_±þhz4;q‘D”½‰ÌÊ „€ÀÍ£ŒeSŽ. Ùàú­ûÝÊ@¾9Úøžj®ÄFW?Ç•×Úbá­ŒÑ㟌¡ðÔÞÈ,Üg÷KÝŽžƒihf&`bE~GUf’¥bsêtLf6DF‘5ûÔšçZ#Æê\#Æ0smÐÚìL‹u õɰZÀx_ ·*ÄèZUå§õÍ“œ™ÈmФ]ÁÀJÝ¿oy›ÜŒ¥Qæå¶¯Ù!€w,ëlZ?C²VsÑͬ» kUK[½»EÒ6ñî;”Ï™dʬåÕl¹.´êhßä¬"3ÍÜ´òÊTŒ9ȈMD§žb¹9Æ2&9`Ðú%¨¬öñˆúb·U5^±¥ÃËÊè@À""S“x˜„xu­ÜðÊüÖjÅ€éf#b¬Ó¡™.M?±ÑhÐíïsïËn$4:¢cõ#Ó]*ë²a(â~O½þÑ´=ÓÝ’ý±ÉyyË7̪ىÎ{,­"óžäX1T&¥S~ù^*UU[¶µY’dó›ª3 ´{]Þ²Ä.JÌæÔ“uL‚p t]Òø±ä˜àµ¢V]=ºáÁåxÐðÍJm,è!ˆòU!¼øU*Ý„»ÉDÌŒ* scÑÝH„»¼§:±ü‹ LyÐU›ö“ºGó›|H7S½@¤à9yçÒþ±uåH+ÂÃ"FV2«£N÷xÿ¿– èoË+4BaVW•r!$Í1ÜŒBžåhj ôtñ{TOغî&Ò…÷1­Š£E‹± UúŠ€=>•'ÐAÙà16÷ˆLÕöV¥B.KEgŒªHTÁ±X9›kð±)í⇄êôJ–£QÌtw¬†©AU¹+£ÂŠø‚V¬dAkÖŠF2óôå^2S¨P‹0Á6Ä•)úÒÄ"˜ëU»«5•Þ¥+è`•òÈp«ì„ä[¡ß·ˆ:'6Ì´ V‡ø¯±æFúrªxZ¼ø†P‘€®“¸H¬öÀºo ®UEx–ì £Éåü”$ £ÑÚ‚ ¼ßÒwlKfª–‡(VlôÕWÙ¨ÌÊns˜Yev _kÜŒ•ÝlŸÔêk=ÁWé£×–ùlm;zIfií}à)kïO¹åè¿øŽÌ’Ì’Ì’Ì’Ì’Ì’Ì’Ì’Ì’ÌÒ%™oü̹ÛÑæZIEND®B`‚ocr-1.2.2/doc/html/images/overview.png0000644000175000017500000001344512401076126020437 0ustar dalitzdalitz00000000000000‰PNG  IHDRùýÇ®sRGB®ÎébKGDÿÿÿ ½§“ pHYs M MÒέNtIMEÙ !âT9¥IDATxÚíÝÍ‘ã8š` "͘XªîmåÔÆ:Bú0×9O– s°‡"ÔH¤Hý¤Hêy"Ý¥¤(Š©^€˜R ÷úXû€cB芻†‰M‡”ÒàÀ{ˆ·ôT”Á"¥g~>ð~ÆØ9Õ«E°Âß—F4”ËE¾^Z¡b 'ßÀW ç€Ýúø†çZß¾§BFD®l;¤”†âÛý—÷‡”Ò©Ún(ö[¤ÇÕêY8×”ûN)Å+ûì¯W= û‘RZ}¿Q§?_¿íx__m×ÛõÇwÕ}i¼uõ}ço=w~üyßÅý_Žm渺‰cM}N>×ܱº¹¹¹¹¹íéö”ÑsÛÿ>ÕßÂËQ#c/Bßê½hí#ƘZÇÓºͶ3ûèëž„úøu\àòÇ|É0s©¤»òØÓ+OZyÙ£1²Åå „Š'Ëm74büò%½Ûh#Ým!ÜÀ»…ŠÃ‘Œ1ö‚ÀqåËÒÕÜ®qßù‹ð;· ?žüËhtÈ5sÃ'‡%¡dË“j™ð à0N)¥ó­üAuÿÛ_þ~Z¨¨æ]&Ãd¹@¦BIÙ˜?«a/ÞT]ë9Æ µû·xhÿwôPl}ΧOzÑ9P ¡åQÌ´Ùj»òX$¿.ƘbŒ}¾ûªÃJ¹M÷Ä_ÄÅó·tGZ-ç·ÐÛ@ݶnº]øq˺h¬jîyj%¼rm*( ­®¥pYäÙºf5ÔÛZ½e@YqW½†ü|ùØÊa®oÍ}ÖAÅŸÀk­í}xVoÅ^f]¾iž xãƒ4ó÷ZÏBsÖår­•¾ë™ë}]é˜ >ã¼¹àôÃÛžÒ³Ð5ÖÀêcŒ§”Ru`8‡†ò±cØH­Pp%Ptõ$‹åÈÅÜÒ˜Üñ&?üêàáææYšš41ß߇nj$™:†næX£§hí¬Ëãåó „ÐXÆâå!ZEžùy¿,=qk°*à±VϺ\ÕR<:äôKÍ.îš¼ë™óTœGƒ·Þ0IÞÁµ ýƒž¿ËõS£1‹ðSο”nm«Ÿ*3 s/ Þ(ptÒœþàŽý÷Kc,¡˜Ú`S¡ÞÑÚY—«†½œôñžžýî–íîn\¨€Ç[3ër½8åԪ޷̺ÜUÏ}6Å~ú™ã*àÑÊÙ–«{jXæµY—Ï£.ª}tû_<ërk6êêòF¹ÔE×Xã¦bͧϨ95óXqBº”R¬g +&æhÞߨO}2›oˆê—öeíÆ,g¡õó™çîê!<ð.^2¤tl´‡0‚”ë~äQ§¥\tRÍvu¶°:Ôûk„«³ µöW(x;/ë©(Ö˜ cÑý­í&î;Ï£Þ ]cÛ¡*¢¹x|¾.Vïï;–¾=¶p¶°µ=(KgA;ÏB–ƒ„@€P±ss³…-P>fÍ,hå d›Vöj×£?ÊÊÙ³…•ó›—ã‚O3!eRk¢{'+¡âurMÄÕ‚rxMQK±úrE=>¸6ô%´€PqPUÆ©(V΂ÖÔS³íh¬aÕ/ÙîÎçë^ðZ›e­ÚÇF„öìcçå[Ëá£ÕáMÍ=QO4’÷])½¨£P[pØ/¥]«]ªå±­¹¹˜k3¿+XŒ¯!Ü2ïÒÓBE9¹UÕЗ »õþ*,”÷•¡à4.ëzma•Scö¾>îF€¨ƒÇ P¼g¨ÈmÃ=“ ¶æJzÁëM› [|ST_}™ÕL˜¼:TläõÞ*>Þå Q¦¾±7¡žÜ ¸ÃáCE1íws”È+ŠaxEÏøì:W/¹ÝÄõÂc‹ÖÄ¿PŸfŽoÍk)}¹ìÿ=y’ª>Æ8,=ÑðåLÍÍu®ªõ¬B«‘ŸY—ª¹ÆšV}Ýk?³íšçÿ².ÖᇔŽ/ôT„‹n.%ÀÛ òÒû¹ñ-§0¨~>Õ&5G06z(ê¸ËKþõSÛN…£¹Ÿý9†w)Ô€[Ü[¨ù€éºB¿à¹/ɬ{+ò~¯lÛ|þ™PqIÞ.°iݽÛÓ+ܲÏV¬‹õÃï ¸fÉHI¡æ OÚö!z1ŠqXÐø½÷Ø:¡®¨fSî¯4úÞr]ªj´Æp­Ñ/¶i-o±úùóë>/a¡P}ÏâÔX7jrzí{ 5Ïÿe]ªj»\Tzj…¡jHèÅ:#õ¯†±cÙºXB,Í¡˜abí§V#]4Æõý]Õ{0LÌ!1;-Â’Iª¦ö7ÞúÐ^óêúºXBðj*¡*¡@¨„ `Ë>BX6Ÿ÷^XÊ^#÷Tty=_)¼FL)5×ßå‹9Èë€=RSÀv|¬ÙxKQA'<¸ ^SS1.“:àuwj/àÅ¡BA'Т¦*€íøxÆNtpDf ¾r~žQS¡ €ƒ† íÛ+B…‚NŽ*´oÓÔTB°¯|òß/xùßÇøSA'|«cþ¬L<¤”g„½?ßrÀÂKk*>cLñ/)„î—Ú ¶Ýð^ýÀ(>`rC}zñ±§©ãuH) ãv§BÿêãK÷±ÿ¦ XØjOÅQ¾áþyóÀ¿…t!„SýM¾øÙP†‹"X 8öÐ Dùgc È?ÿ¿©í¿û8£áèá=W¶oj*àÍÅ’»W_No{ š w>ΔRL)ýÏÌößé6ÐË/ûìyõå#\68Êëà=E¹]ÝøÍ]rØÒkÛÂå½oû÷ej…{xQ¿wP𢠓Š%Å‹õ%‘Î\tޝ֩ÀÊ¿ÙCcJé´‹ž °ü[Åø¿§µ—0®õpT,CnXËoëS nµM?õóêñ]J)Ö÷Uah¨·_ø¡84êLêcŸâÏóù(ÎÑ9”Uµ(“E®Õ1\œçúÇývã1Üô;eû=‡+èL)ï߮ݽݿ¼ÝÖoGynÛ¼O 7¾Ïòã÷熭k5"¯¥19Kd(ZA¢d†߸›Û4挘­›¨·¯ 1/z'fÂÁÅöŹ:÷ŠLÜ?4æò­×è™ë)7.ƒ°iN.XÄÜ…_L†5Ìô”…“e£uÃIžœ'¯ š»r2¦zÔGX0Tk2§æ¤ZWz5¦¶ÏÇ?´Ž©˜´ª_ÓPOd5õ#Zº:°çjöáá4õ»Q¼ÉìfFMk„À›~H-\˾éýø’ör/çÅå`¶O¨¶ö§« ;çæ­„ €Iç‚ÉñÒ‡Ë T¬7$æL—=`GŒþ¶,€ÑS€P—j° åròF pó„hãz-ÿ !üGÈ*à^ BBG1®«Ð9À…Œÿ†þ)d<Ѽâè©xSã•?,`7bŒiÅæ.}p!B¨!B¨€ï5BÜ-¥tr¶Íè@¨„ @¨*¡*¡@¨„ àbŒ½³BÀ#tNwɽ1FÁ„ €‡*à |Ôß(ž&þáTÀñåž kÓÏô—SÇ÷Âò5êcŒÉ)~^ôÕ¿»”’/0p`j*€ï¢®„ €‡„uppk  ¸‘º 8¸µ=®‡7ù0_Ûªž À:_Nà¸ÔTÏ -ê*@¨xup`ÏÚñ’‚Î;ÿp8×þöÍWÁÑßãïìY=>0€)Š5Ù3íÛŒ§ôT,-èüTÐ ïÔU°[,ÌSS|7upPNð(ã<u÷°a¤ T|ß ^à0ÆÌ¡ ii—1,d¿]ûöÒËÑ·Žé-Û·˜ÒökI>cL¿RŠ{?ÙGy°òÛZJÞ÷pè¿ü:j!T±›Ñ :8¢#tâ( S¨ Gz»(Ôvý-L¡&¼ 5€P€P TB TB€P€PB T<ÏgŒ–a¡*¡@¨„ @¨„ ¡*¡`¡”Rt@¨*¡*¡@¨„ @¨*¡*¡@¨žìÃ)žáwŒý­ý™ÒÉ¡xŸÐÐ…ºBHãçĆ©ŸÕÿŒ1Í<~ø™Òà7ÛSJ‡~Ÿ1¦_)E¿j¸;Dô­ðìÆ~ÁóêÙ¡B¨€=‰ï Bl4LŒAbÓ—ò¥˜ò˜… *„ ØP˜ØcÃ|„×B…PÂÄv_ÓIq'<Ÿy*@ èR] aø•R<Ê7ûŸ)~¥cC ¡/‘BðDÝ‘/üLé4Ö… TÏ’/¼AÝÁBèôV€P<'Pœ/{ýµþLiÈ—Aüæáy̨ùÂo‡ùßùC½õŸžØ5ãÞBÖÏ“*û}6´ŸQ 3ð8z*^(†pjÆýLé”oã]Où&™ƒÍ=k4äÇ&׫wë. ä^¿m*ù!^q+>ð†*x(—†º/fF<µößøÀï¦zê0píØçÁöݽO §Æï»k½—9Òbì…Ø]lâÃzÆ0ÓO5ðÃÒ†þÔ?Ôû˜hLæz;µ“ñ¾Sù˜¢D ØaOÆLÏCæôæÂBEåû6Mô,>cæÛòoáÖsóˆ}€P±kf²,>´†…½!¸˜¹b TìÄÚµ–~0Œ!d˜XÛ£+zú‰o”ÇXŠêþæ6SÏÙšžü(“.Á£üLéî0ðˆ}€P±qcãÙ7îën 1„SúS‰Ÿr È wªæ­(z .¶›¨yR£f£¸¿,Ꜻ$òeÞ‹²£ìMIÏ‹`yj¶Ð¨Ÿž¹=QLéØSáƘ\£ßd°ªÉÅlˆ>¤ý}líõ–Å›SïÏz¶ÛV±äD/a] }¾[ÌXÛÜ÷Ô1=bõ¾&¾„•ƒž ^'…Ð7–|$Ø{XîÃß“…5/=¶êˆÆåçûV¨ž¢Þ÷ð+¥˜{½z_ùxsïhqŸ@ž ¶ù{§þöAåïc—=Õvïã<µ}=-}—#ŸZ÷¯Ùöû˜Yÿ§o­=Ä{3¤”פÙNu]Gî–(8ŠÖû¸± âd„ xÀ‡íP¨ºüÁÛ¸¶vÎN¾ÿl*\¼Eˆ(½0o&.Š”ó*[ýÍ—:Š•‹M¥P¯RÎl8®ôù¯Â„Œ÷ y5á½…ÿŒ§¡^2þ2ÞÒ.ë'êú'*@È`;¿ï®5äs‹w± r]Tê½É{† 34r€ñßÂ?}oã3¤_~­™rËmÊú„1P˜§‚¥Ÿ£¹†¢^ÆÞ\¼U¨€­4f+BÄ·]ú*X(nù9ïEM¼:Ù«Ÿ`Ãb¼ÔÓµÞ›&îB¨!–Âß«7ß«z)*àûƒÄ D°7cÑè)‹y?3ûY§¦Þ—šŠû•#7Š·ù­_#ŒPhd½ÞÅÁ¢µÿr¥Oá‚#ûá<×Ï”N1„Sú»>áæGÃä*‹!_I!Ü òì–Î&B€`q*ª`‘#sÙ2£?¾Q ahõ6´BÆÔ,–­•BçBÀÕ ‰ÕÓ]Ƙòl•caçgf­ÃµÇ;Û|—?60ZCQ—ÖOÜûxx=/VC];:äÞǃP°c­ž…¢.â¦y,î}<;2Q¤Ù…º²þá3.Ÿ£ëÞÇ㨩ø&ù²Dü3«æydÇ=CE 5E¨xÃ@Q4þåå‰{ëÔO T¼[ ˆ! 1„SkíB÷;ÆþwŒ}::>öbZïüïòþ5¡`›¡¯çœ(ocßaâ"PŒë‚äé»Ï‹’•…œù11„Ó¸]?ÎA1¬y<<›UJáY¥x$=€P€PÉÚ%¸½N`Š5áåe¸?cLy´À‘VµÌA"ŠÈ£'€'}¦ýäiž‹Æwf}¼@¨õí~óòD8T¯ ÀáÆVjA„ `Ÿá¢Ù€¾¥#Cþ÷ƒBpGÀ¸øPù8ÂÒÞƒztFšY(KÀ„¥ÁàJ`(  T\‹¹"\º€ãøáR3µFbÅIEND®B`‚ocr-1.2.2/doc/html/images/overview.fig0000644000175000017500000000352612401076126020417 0ustar dalitzdalitz00000000000000#FIG 3.2 Produced by xfig version 3.2.5 Landscape Center Inches Letter 100.00 Single -2 1200 2 6 4650 3300 5850 4500 5 1 0 2 20 7 50 -1 -1 0.000 0 1 0 0 5250.000 3364.286 4800 4275 5235 4380 5700 4275 1 2 0 2 20 7 50 -1 -1 0.000 1 0.0000 5250 3525 450 150 4800 3375 5700 3675 2 1 0 2 20 7 50 -1 -1 0.000 0 0 -1 0 0 2 4800 3525 4800 4275 2 1 0 2 20 7 50 -1 -1 0.000 0 0 -1 0 0 2 5700 3525 5700 4275 -6 2 2 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 975 1500 1725 1500 1725 2475 975 2475 975 1500 2 2 0 2 0 7 40 -1 20 0.000 0 0 -1 0 0 5 1125 1575 1875 1575 1875 2550 1125 2550 1125 1575 2 2 0 2 0 7 30 -1 20 0.000 0 0 -1 0 0 5 1275 1650 2025 1650 2025 2625 1275 2625 1275 1650 2 2 0 2 20 7 10 -1 20 0.000 0 0 -1 0 0 5 1575 1800 2325 1800 2325 2775 1575 2775 1575 1800 2 2 0 2 20 7 20 -1 20 0.000 0 0 -1 0 0 5 1425 1725 2175 1725 2175 2700 1425 2700 1425 1725 2 1 0 2 20 7 50 -1 -1 0.000 0 0 -1 1 0 3 1 1 2.00 120.00 240.00 1800 3000 1800 3900 4500 3900 2 2 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 7875 1650 8625 1650 8625 2625 7875 2625 7875 1650 2 1 0 2 -1 7 50 -1 -1 0.000 0 0 -1 1 0 2 1 1 2.00 120.00 240.00 2850 2175 7650 2175 2 1 0 2 -1 7 50 -1 -1 0.000 0 0 -1 1 0 2 1 1 2.00 120.00 240.00 5175 3150 5175 2400 2 2 0 2 0 7 40 -1 20 0.000 0 0 -1 0 0 5 8025 1725 8775 1725 8775 2700 8025 2700 8025 1725 2 2 0 2 0 7 30 -1 20 0.000 0 0 -1 0 0 5 8175 1800 8925 1800 8925 2775 8175 2775 8175 1800 4 0 0 50 -1 0 20 0.0000 4 300 915 1275 1200 Images\001 4 0 20 50 -1 0 20 0.0000 4 225 645 6000 4200 Data\001 4 0 20 50 -1 0 20 0.0000 4 300 1140 5850 3900 Training\001 4 0 -1 50 -1 0 20 0.0000 4 225 960 7800 1050 Text as\001 4 0 -1 50 -1 0 20 0.0000 4 225 1065 7800 1380 Unicode\001 4 0 20 50 -1 1 20 0.0000 4 315 1080 2325 3750 Training\001 4 0 0 50 -1 0 20 0.0000 4 225 1350 1125 900 Document\001 4 0 -1 50 -1 1 20 0.0000 4 315 1800 4575 1950 Classification\001 ocr-1.2.2/doc/html/images/OneBit_generic.png0000644000175000017500000000065212401076126021441 0ustar dalitzdalitz00000000000000‰PNG  IHDRFd1{ pHYsêe¤\IDAT(‘ÓÍJ1 à”Y˜ËB}¡>ˆ0¯åAhaŽû¾Ê,{ð1¬xðZØKÅÚ˜¤?+î¸8§–6i’äo0Àv™*\tSš²jB×µÜ7ùÛ¦pÝ·çJ'mšr%q¦¼"t]+kkÚœéˆ-—Cn9Ï©IGyÑÓø•YQ1<+èÕ×À µZlÞ–ú9~¢ÔTaØ*®sÐ뙕ª-²âˆP¥sU0ñF±ü¼N¬eÚ{Evöe×Ù/'•»~ªìæ“ԺʉÔ׊8F}bWº,‰öù§lUþ%l2ES‚¹$§©H]átÍò§påÊ¿Ý÷BúJ™Äþ®l)IEND®B`‚ocr-1.2.2/doc/html/images/GreyScale_generic.png0000644000175000017500000000760112401076126022140 0ustar dalitzdalitz00000000000000‰PNG  IHDRaD´ ú pHYsêe¤3IDATX…5˜ë®]WrǨª¹ÖÚûœÃËZ-“­´Znuì°“6 üyÛ¼A€ #¾ÀIÛpÇ’£Ö”HŠ<—½×ZsVü ]/P¿ê5>þ[©W_f2*÷’cDìiÖ«@XihÔÃŽËý(Ynna÷ç} w‹ÃØGI O"ûH{p@ÈÕòìVHP14$qêÈf€s3:{Ê©ì£4em0çyÜú–ƒ1,-g¢ÌÈrOg– :9Dk苗ԠeY4  j,o±í{ŸåqqÚ"{‡€!ÅJÁÚUi uw¤hVô‘5öšfVï×ÔyžVˆŒ(T§ÃÌÐ×T4ƒ/nuZK!U¤gã*Er”jP¢Ñ±§ ±ÃÏœSy×ÑYûhJá_‡B×T…³ˆ²¥­ûV–«5+ª Gv‹, Cäà\ (I£5A<ÚJSJ°¾9ŠVE€p¾Ë>Ï™=·w‹YªYÁ  À4*TH³OIåÊ,`+-l,IÂŒª )•D Ú7¯–Шý8OG)‘jVETxÉ ¾ €¡’Ò8àS¹ª¨à(,9Æ:rD”*m¯cÞöRExUI§å0`…H9œé’‘ Ê4 ’œ*b J4ª$Ì3º„af“úÞ§XšjŠ#=lÒ”lÒ &r+%HШ2$ÁÊIw‘ˆ)SÀX&ç{ÛïÌ[CÊŒ(z;¸‹TxÈa h½"‚’` `(Alä,ÙE0ú°©Å´ÿÓ7óñúÉ£EZa>¸@‘Ö‹,ît£™Ñ›C©¥*¸“V"ßí°æYŒ@™ÄÌÊI·ÄÍÿûŸ§c|üëO®zŠ”™ÌAaâÔÌ¥pY“— ¹›KƒYTL£±¼ fV\{U©ßŸr|óÅ«C¯|2רaî€8uù„ìt›:hUÕÚ6ÆM‡ó̃«J«Dh>¯£ˆ*TÕ?ôøò¥5çrÙªÁ²È!X0w‡J¤Û^’Y˜é¼i^n†‚„‘æm d*ê€×÷ß¿é¯+ƒþðÉÜI$ÛÁO£t1Öa­@£Ñ:ÔX5ÅÄ=‹ E)²Šªä`e®]±ì[o¼½¹…Ö¼ÁÊVe‚&uЪÑ<%`dŸ\sX»ÛŒHS©2l/©h5tëpûí/¶˜Ç^‡>|“ùîŽî‡‹nÓ–ékb/c)1aUèa" Uí»4 jгlʯþ÷‹©ÆáòyzÜu£s+ì¦é0ï9h$FÁª`‘Â;Š,ÈQk—›+›UÍÑK5T6Ý~óËìW¦ ­ÓÏx?Y´Ú77ݟͪÜ%cf@R6EªÄ’gª F›‡TÎZî• ÜÝâƒë6®³aäíõÏ޳ʘ2³'=Ý]þ•뤽9) ÔÎ1hÞÝXY B\k&p¬ãá/kãÅÛr}ríᨡ.t€‘í°• ô–›`ŒT9M}ÎaÖF Fš3Ñ)¸¯Ãx>\߯߿­ÖO¢ªT^`ZåÛ»Í}:žO}7#¢„ØÖÕÊYµW¶ÉC™ßœŒ±\µ»›N—ç0f¥â<® j½åÅ4wÙ=Þ¼±õ~n* ÀØ&!nÊÊ"‘QDx?<ÿ¾ŽäÁÚÕÐÏ·[Em©^¼Û=GcŽfCÇV°ŽÕ¯ëíÍæJ7ÄL òCh$«rÔùÛoŸ¼ßòõÝA&Ÿo«¦ãØÍ°ÌSåèó… Hë+ÂÝa—ÒhÈÖBm.²n$§ID³æY€ˆ×_àéÃZWÄìê›w-AK˜ûñ²v`™y1²p?m%›‡î‚ë~̱TªD°g‡CÖr¸»énÓ›/öOŽc¿Ïåp°—wå{P‚O‡ÜlnPLîcBÚiLæÖÇð¸˜võ¤€=ÐŽã®ÁMFE²æ¶}ñöé³ÖOçxìwKãìá³Ç„5î]1Ï(6ÒÚuNš.F¹3írJŽ˜Úè0É–”J”TëþÝ‹‡?¿>UŽÊ¯¿ynõÁÇW€Ó,j˜s7wÓ >»ÜPâ4µ¡2÷eš–óŒˆûaš°Ò/„Fzno¿;~tyê66‘õæ÷?6“ÍSß-f·À^ó¸Ëy¹h;D÷inWá7s"M :Ô–mOóI1m*ò5?üéžÆ±iܦ[õÅóöÍÓ¨Ö`Óv>ÍÇ9h÷Õð.Çx.ušJi%…ò` £iöò½©Ò/l P[œíý” £c¬ÚÕ·`ìw0tM5ìѦ¹rœOÇù®3IˆžJ¨x°Ï‡Sõ&ŸX~¥8EJm:m[¿{<1޾ímÎm=nôöp&rD˱k§Õ‰>Ú«ÜEºÊ)Àâf±ÒÙ¾B,óZ ¥×ýÙír;Ëví§ˆùAez7»|dšO§#½-‡ûnä1œ1¶A'¤½$XDsìôiŸ[;¢ª©Ç©ïÓ‚íäËígÏÎ7¶‡iÒrýòËu÷t °M…ë~b™O#Ĭ1ˆÁ°e¢Õn0*Ápשh`sKä>|ûáË—G‹hõôÚ‡˜_=ßd¾ážmŸæм&Íac‹y–0´ŒóÜö2À€•è´,³jm>JëÐþ¼>~í-£ÕÝv>˜»µµ]\úð˜Yu2soc—&ÓEnc·Ž¾šïg4#@E9ÆŽgã¾ n{µ»ïòÃÇwˆá‘müxóÞƒþã^n°Â¤1Eõ°¶hëéshfcSṫ>Ð|z8˜GözŒoϾØY²eña>8…këî¢]C±õ¡ç7—OÞóµFúH4¨úÖG–lŒa7_üðð}¾†ÅaìåÚÖ­®/>ï[³¸=ÎëâQ}Í:WŽ>@cUŽ|µÞÞ sz+Þ¿zå6N7ZNóq…ªÕÌÂà ±í)míæË¿ó_>¸~3]?ågŸ?n»{ìc%×–”8è÷¿ýÝü häܳÄ~ûãäZŽ6åýz±U ~ô T)ßöq÷u{Ì›7ýúâå¶þp‹ÔÖb÷©b¯9ÒæuŸ—þöw¿yñôuûÉÚ‰0·÷¸ œ#¹?œ[ËJVŸ²Då(н'ë´mO~¶õ}Õ}?t÷€Ï{ ft1òÌÇþý?ýí®ýñû  ÏóéûG¿¸}~˜µ›åPc€²¸tËÓ|µK@ÍñÖæû“Åt¸yþÛ/ñ“Ëó™aµë8Ù}FÔÎåòÓÿþãOûÕßàÍGÇ‹mº¬ù‡¼äÚúÊ‘^}úóŸãžËñ ™c åW·¿ß›.ìÅËOÿêÓóë–°ÈÜÒîÞøzãìQ¾üËÿõG4?}ûã×ÿã‹?þÕ·ö`Ÿ_}Þ_]^ÞFŸŒËþY~t­>øèO®q·_]ÌcCTÛ>/_,¿ÿê«Ïýç_?‹=Ì¢m·­þá¹+Kãââõ/Ÿ<¸½É'ö·—õùaÞ˜U¼éãóƒ÷ÓÓîõ¢æ‚úöã¦ûK ‰->ø§xsuýÍsïÏÿÓ/â\N®ùòb6À÷žüéáäå食Çvu¹öÕ—6D C{¸­«ìY`ŽÞâ²y†åèë£>¿ÿ¼»ÿùü¤nãšùéÞ×mÛ;HÆ6Å«ªÚ* S0_N[£½3}M}niF Ú¥Ž1ʉ%Tr/‡Í4ƒaL-‡{ÈÐñ—–ÙÖ/ƒIŠwE—1G “±•„w_>Áw}¿¡'hƒÖhMpópfÑB¦ªÑqm{ri-º˜ tj²Éc%#ˆ¤“~è÷Ý °fI”éSÀb>¬{‹6zÁœ%îÙæØ,Űe% ˆ[ïló4O{/¾ 3ÄòpÝá>DÐhÙi¦jËl {ZóæªUï:¡a£M‡èkHÑ`lFæyºèšIs”Ì'ãUߨf™kÈ}Žm.’F›ŒÀ²ÜP`(WRƒ©¼TÑ*y€0µ²¹‚½hØ»Ï9M¶å²œ“4NóÅrŸ6ÐÜçÔTU†¢›d—" @a¯æîT0¹CœrcL÷dD5 ãÁ“þ/JÄZ„ ¾j4ÜЪdVbξ‘CV!'TÙ0MF$@æ“KÔ^[Í f!ª·³‘"£J ÉÜŠ¨w–ÅZÉ xHˆL08…¶ 'd̰¬®ÓÛº4NÒ"'R•æôR´ý¾Y ÈúÛ&SqOWEÊ5/0³y©ÁÌ1Ìz•€}Øl­¹-E´˜ºP¥ÛT¹†fÏ’H#¡f¸ŘËYá]]!`Ø^^’Ì-ÍÜD·‘éWû}•Bé6µ4p²@Ñ#U0PÀžNô°Š&&J‘€Å*LÆÌQ«Å„„­»Íi ³z iVT+IV…]cŒ­†C '¯¹Ÿ/›I‘(Ò5¤£kµ«¶ŽZ³Á R‘ š¦•iUÌÖ“bUÁÌ#Gˆ +ÓÚæW„+ú(˜ïi„d±Œ{+³\Ï»?Â@f²à €A˜²TÕI2‘›-3Åʬøÿ]wþdŃÏIEND®B`‚ocr-1.2.2/doc/html/usermanual.html0000644000175000017500000004260412401041767017662 0ustar dalitzdalitz00000000000000 OCR Toolkit User's Manual

OCR Toolkit User's Manual

Last modified: September 01, 2014

This documentation is for those who want to use the toolkit for OCR, but are not interested in extending the toolkit itself.

Overview

The toolkit provides the functionality to segment an image page into text lines, words and characters, to sort them in reading-order, and to generate an output string.

Before you can use the OCR toolkit, you must first train characters from sample pages, which will then be used by the toolkit for classifying characters:

images/overview.png

Hence the proper use of this toolkit requires the following two steps:

  • training of sample characters on representative document images. This step is interactive and is done with the Gamera GUI, as described in the Gamera training tutorial
  • recognition of documents with the aid of this training data. This step usually runs automatically without user interaction. For this purpose, the tools from the present toolkit can be used.

There are two options to use this toolkit: you can either use the script ocr4gamera.py as provided by the toolkit, or you can build your own recognition scripts with the aid of the python library functions provided by the toolkit. Both alternatives are described below.

Using the script ocr4gamera.py

The ocr4gamera.py script takes an image and already trained data and segments the picture into single glyphs. The training-data is used to classify those glyphs and converts them into strings. The final text is written to standard-out or can optionally be stored in a textfile. Also a word by word correction can be performed on the recognized text.

The end user application ocr4gamera.py will be installed to /usr/bin unless you habe explicitly chosen a different location. It can be either be applied to a single image with the following typical call:

ocr4gamera.py x <traindata> --deskew --automatic_group -o <outfile> <imagefile>

or simultaneously on multiple images with the following typical call:

ocr4gamera.py x <traindata> --deskew --automatic_group -od <outdir> <imagefile1> <imagefile2> ...

Note that in the latter case an output directory must be given, into which the recognised texts will be written for each <imagefile> as <outdir>/`basename <imagefile>`.txt. Strictly speaking, the call modus for multiple image files is redundant, because the same result can be achieved by calling ocr4gamera.py for each image file separately, but it can speed up the recognition because the training data only needs to be loaded once.

The options --deskew and --automatic_group in the above examples are optional, but useful in most cases (see below). The complete synopsis of the script is:

ocr4gamera.py -x <trainingdata> [options] <imagefile>

Options can be in short (one dash, few characters) or long form (two dashes, string). When called with -h, --help or an invalid option, a usage message will be printed. The other options are:

-x trainingdata, --xml-file=trainingdata
This option is required. trainingdata must be an xml file created with Gamera's training dialog.
-k k, --k==k
Number of neighbors used by kNN classifier (default is k = 1).
-o outfile, --output=outfile
Writes the output text to outfile. When not given, the result is printed to stdout. Note that this option can anly be used when a single image file is processed.
-od outdir, --output_directory=outdir
Writes for each input image imgfile the recognized text to outdir/imgfile.txt. Note that this option cannot be used in combination with -o (--outfile).
-a, --automatic_group
Uses Gamera's automatic grouping algorithm during classification. This can be helpful when glyphs are broken.
-d, --deskew
Does a skew correction before page segmentation.
-mf windowsize, --median_filter=windowsize
Smooth the input image with a median filter with window size windowsize. Default is windowsize = 0, which means no smoothing.
-ds size, --despeckle=size
Remove all speckles with size <= size. Default is size = 0, which means no despeckling.
-f, --filter
Filter out connected components that are very big or very small.
-D, --dictionary_correction
Post-processing step called dictionary-check can be enabled here. For using this, you must have a unix spell tool installed: by default aspell is used; when this is not found, ispell is tried instead. Do not forget to install the needed language and turn it on by changing the LANG environment variable or set it with the -L option.
-L language, --dictionary_language=language
Sets the dictionary for the correcting-process. Otherwise the locale-settings language (aspell) or the default language (ispell) is used.
-e number, --edit_distance=number
Sets the max. distance between two words, the recognized and the corrected word. The actual distance is calculated by the gamera built in function edit_distance. It has to be integer. The default value is 2.
-c csv-file, --extra_chars_csvfile=csv_file

Use a user defined translation table of class names to character strings. The csv_file must contain a list of comma separated pairs (classname, output) one pair per line as in the following example (the output string after the comma can be any string consisting of unicode characters):

latin.small.ligature.st,st
latin.small.ligature.ft,ft
latin.small.letter.long.s,s
-R rules, --heuristic_rules=rules
apply heuristic rules rules for disambiguation of some chars rules can be roman (default) or none (for no rules)
-v level, --information=level
Set verbosity level to level. When one, debug information is printed to stdout. When two, additionally three images are written to the current directory: debug_lines.png has the detected textlines marked, debug_chars.png has all segmentated characters marked, and debug_words.png has all words marked. This can be usefull to identify segmentation errors.

Using hOCR format as input or output

In addition to plaintext output it is also possible to use the hOCR format to also save segmentation information with the recognized text. If the ''-ho'' option is selected, you have to make sure that their is an output file or directory asigned in either the ''-o'' or ''-od'' option. In addition to the text data, the hOCR file will contain the bounding box information of the entire image, the textlines and words. The file extension ''.html'' will be automaticly added.

If you want to use another textline algorithm that saves its data in the hOCR format you can read the textline bounding box information by using the hOCR-input optin ''-hi''. Even if there is more information given in the hOCR file, only the information stored in the title of the class ''ocr_line'' will be used. This option only works on single images.

-ho changes output to hOCR format

-hi hocrfile uses textline information of the given hOCR file

Writing custom scripts

If you want to write your own scripts for recognition, you can use ocr4gamera.py as a good starting point.

In order to access the OCR Toolkit classes and functions, you must import them at the beginning of your script:

from gamera.toolkits.ocr.ocr_toolkit import *
from gamera.toolkits.ocr.classes import Textline,Page,ClassifyCCs

After that you can segment an image with the Page class and its method segment():

img = load_image("image.png")
if img.data.pixel_type != ONEBIT:
   img = img.to_onebit()
result_page = Page(img)
result_page.segment()

The Page object result_page now contains all segment information like textlines, words and characters in reading order. You can then classify the characters line-per-line with a knn classifier and print the document text:

# load training data into classifier
cknn = knn.kNNInteractive([], \
          ["aspect_ratio", "moments", "volume64regions"], 0)
cknn.from_xml_filename("trainingdata.xml")

# classify characters and create output text
for line in page.textlines:
    line.glyphs = \
           cknn.classify_and_update_list_automatic(line.glyphs)
    line.sort_glyphs()
    print "Text of line", textline_to_string(line)

Note that the function textline_to_string is global and not bound to a class instance. This function requires that class names for characters have been chosen according to the standard unicode character names, as in the examples of the following table:

Character Unicode Name Class Name
! EXCLAMATION MARK exclamation.mark
2 DIGIT TWO digit.two
A LATIN CAPITAL LETTER A latin.capital.letter.a
a LATIN SMALL LETTER A latin.small.letter.a

For more information on how to fine control the segmentation process, see the developer's manual.

ocr-1.2.2/doc/html/developermanual.html0000644000175000017500000002270211716171512020666 0ustar dalitzdalitz00000000000000 OCR Toolkit Developer's Manual

OCR Toolkit Developer's Manual

Last modified: May 21, 2010

This documentation is for those who want to extend the functionality of the OCR toolkit, or who want to customize specific steps of the recognition process. For a comprehensive overview over the architecture of this toolkit, see section 3 of

C. Dalitz, R. Baston: Optical Character Recognition with the Gamera Framework. In C. Dalitz (Ed.): "Document Image Analysis with the Gamera Framework." Schriftenreihe des Fachbereichs Elektrotechnik und Informatik, Hochschule Niederrhein, vol. 8, pp. 53-65, Shaker Verlag (2009)

Overview

The core functionality of this toolkit is implemented in the Page class. This class provides a method segment, which segments the page into lines, and the lines into characters and words. The segmentation result is stored in the property textlines, which is a list of objects from type Textline.

To customize the page segmentation process, you can derive a custom class from Page, and overwrite some methods. While it is theoretically possible to directly overwrite the segment method, it is in most cases more desirable to only overwrite one of the methods called in segment, so that only a specific part of the segmentation process is replaced. See the documentation of Page.segment for information which other methods are called in this method.

In the subsequent sections, we describe two typical use cases:

  • the replacement of the standard page segmentation algorithm with something else
  • the replacement of the rule based character segmentation with Gamera's classification based grouping algorithm.

Replacing the page segmentation method

Let us assume you want to use the Gamera core plugin projection_cutting for segmenting the page into text lines. To do so, simply derive a custom class MyPage from Page and overwrite the page_to_lines method:

class MyPage(Page):
    def page_to_lines(self):
        self.ccs_lines = self.img.projection_cutting()

This example is obviously very basic; in practice you might want to experiment with the input arguments of projection_cutting. You can the use MyPge just like Page, and the following code does the same segmentation as Page.segment, but with only page_to_lines replaced:

result = MyPage(image)
result.segment()

Let Gamera's training based grouping attach diacritics

Now let us assume that you want to let Gamera's classification based grouping algorithm join connected components to characters, rather than the rule based method built into Page.lines_to_chars. To do so, derive a custom class MyPage from Page, that segments the line into characters only by a connected component analysis, without any joining of CCs to characters (this will be done at a later point):

# segment lines into chars only by CC analysis
class MyPage(Page):
    def lines_to_chars(self):
        dummy, subbccs = self.img.sub_cc_analysis(self.ccs_lines)
    self.textlines = []
    for i,segment in enumerate(self.ccs_lines):
        self.textlines.append(Textline(segment, subccs[i]))

Then you must make sure that a classification with grouping is done during Page.segment. This is done by passing a callable class derived from ClassifyCCs to the contructor of MyPage. As the default definition of ClassifyCCs already does what we need, we simply need to create an instance thereform:

# create an instance of ClassifyCCs ...
cknn = knn.kNNInteractive([], \
           ["aspect_ratio", "moments", "volume64regions"], 0)
cknn.from_xml_filename("trainingdata.xml")
classify = ClassifyCCs(cknn)
# ... and set its property parts_to_group such that the
#     grouping algorithm will be used during classification
classify.parts_to_group = 4

# pass the ClassifyCCs instance to the constructor of MyPage
page = MyPage(image, classify_ccs=classify)
page.segment()  # will call classify
ocr-1.2.2/doc/gendoc.py0000644000175000017500000000273012401075507015460 0ustar dalitzdalitz00000000000000#!/usr/bin/env python from gamera import gendoc if __name__ == '__main__': # Step 1: # Import all of the plugins to document. # Be careful not to load the core plugins, or they # will be documented here, too. # If the plugins are not already installed, we'll just ignore # them and generate the narrative documentation. try: from gamera.toolkits.ocr.plugins import bbox_merging_mcmillan except ImportError: print "WARNING:" print "This `ocr` toolkit must be installed before generating" print "the documentation. For now, the system will skip generating" print "documentation for the plugins." print # Step 2: # Generate documentation for this toolkit # This will handle any commandline arguments if necessary gendoc.gendoc(classes=[("gamera.toolkits.ocr.classes", "Textline", "__init__ add_glyph add_glyphs sort_glyphs"), ("gamera.toolkits.ocr.classes", "Page", "__init__ segment page_to_lines order_lines lines_to_chars chars_to_words show_lines show_glyphs show_words"), ("gamera.toolkits.ocr.classes", "hocrPage", "__init__"), ("gamera.toolkits.ocr.classes", "ClassifyCCs", "__init__ __call__")], plugins=["PageSegmentation"], sourceforge_logo=False) ocr-1.2.2/doc/src/0000775000175000017500000000000012433405322014433 5ustar dalitzdalitz00000000000000ocr-1.2.2/doc/src/ocr_toolkit.txt0000644000175000017500000001071011716171512017525 0ustar dalitzdalitz00000000000000================== ocr_toolkit_py ================== Functions '''''''''' return_char ------------ Returns a result character for building the string. Signature: ``return_char(unicode_str)`` with **unicode_str**: This expeted string has to be in unicode format, e.g. ``latin.small.letter.a`` will return the character ``a`` The returned character should be used for creating the result string. chars_make_words ----------------- Splits the amount of grouped characters which has been detected in a textline to single words. Signature: ``chars_make_words(lines_glyphs,threshold=None)`` with *lines_glyphs*: ``lines_glyphs`` has to be the list of connected-components which represents the amount of characters for a textline. *threshold*: For splitting the amount of characters to single words is a ``threshold`` needed. If two characters have more or equal empty space between each other there is a white space detected. The default value for this parameter is ``None`` which will make the function calculate a threshold value automatic The ``Textline`` Objects keep a word list as an attribute which expects a list like this functions return value. textline_to_string ------------------ Gives a full text string as result. Signature: ``textline_to_string(line, heuristic_rules="roman")`` with *line*: The ``Textline`` Object which keeps the characters. *heuristic_rules*: Some classified characters need some further heuristic classification rules. Take the apostroph as an example which might get classified as a comma but is placed at the top of a textline. Therefore the apostroph can be classified \"manual\" as a comma. On default this function includes some rules for often noticed classification errors for _roman_ alphabet. The result of this function can be used as a final result as this is the full text string. check_upper_neighbors ---------------------- Check two glyphs for beeing grouped to one single character. This function is for unit connected-components like quotation marks. Signature: ``check_upper_neighbors(item,glyph,line)`` with *item*: Some connected-component. *glyph*: Some connected-component. *line*: The ``Textline`` Object which includes ``item`` and ``glyph`` There is returned an array with two elements. The first element keeps a list of characters (images that has been united to a single image) and the second image is a list of characters which has to be removed as these have been united to a single character. check_glyph_accent ------------------- Check two glyphs for beeing grouped to one single character. This function is for unit connected-components like i, j or colon. Signature: ``check_glyph_accent(item,glyph)`` with *item*: Some connected-component. *glyph*: Some connected-component. There is returned an array with two elements. The first element keeps a list of characters (images that has been united to a single image) and the second image is a list of characters which has to be removed as these have been united to a single character. get_line_glyphs ---------------- Segmentates the glyphs which are included in every single textline with simple rules. Signature: ``get_line_glyphs(image,textlines)`` with *image*: The textdocument image which is beeing segmentated. *textlines*: A list of connected-components which keeps every textline of the document. A ``Page`` Object has a list named ``textlines`` which should include ``Textline`` Objects. This list is filled within this function as it is called in a ``Page`` method. show_bboxes ------------ Draws hollow rects in an copy of image based on the rects in the ``glyphs`` list. If the ``save`` parameter is set to ``1`` a file with the name of ``filename`` will be created. Signature: ``show_bboxes(image,glyphs,filename=\"segmenated_glyphs.PNG\",save=1)`` with: *image*: An image of the textdokument which has to be segmentated. *glyphs*: A list of rects which will be drawn on ``image`` as hallow rects. *filename*: A filename for the image file that might be created. *save*: On default ``save`` is set to ``1`` which will cause the function to create a new image file named ``filename``. If ``save`` is set to ``0`` the function will try to display the image on-the-fly in a box. This function is usefull for debugging or for getting information about the segmentation process. The ``Page`` object useses this function in three methods for displaying the segmentation of textlines, all single characters and all detected words. ocr-1.2.2/doc/src/plugins.txt0000644000175000017500000000040612401076126016654 0ustar dalitzdalitz00000000000000======= Plugins ======= By categories ------------- - PageSegmentation_ - bbox_mcmillan_ Alphabetical ------------- **B** bbox_mcmillan_ .. _PageSegmentation: pagesegmentation.html#pagesegmentation .. _bbox_mcmillan: pagesegmentation.html#bbox-mcmillanocr-1.2.2/doc/src/usermanual.txt0000644000175000017500000002400412401041724017343 0ustar dalitzdalitz00000000000000========================= OCR Toolkit User's Manual ========================= This documentation is for those who want to use the toolkit for OCR, but are not interested in extending the toolkit itself. Overview '''''''' The toolkit provides the functionality to segment an image page into text lines, words and characters, to sort them in reading-order, and to generate an output string. Before you can use the OCR toolkit, you must first train characters from sample pages, which will then be used by the toolkit for classifying characters: .. image:: images/overview.png Hence the proper use of this toolkit requires the following two steps: - training of sample characters on representative document images. This step is interactive and is done with the Gamera GUI, as described in the `Gamera training tutorial`__ - recognition of documents with the aid of this training data. This step usually runs automatically without user interaction. For this purpose, the tools from the present toolkit can be used. .. __: http://gamera.sourceforge.net/doc/html/training_tutorial.html There are two options to use this toolkit: you can either use the script ``ocr4gamera.py`` as provided by the toolkit, or you can build your own recognition scripts with the aid of the python library functions provided by the toolkit. Both alternatives are described below. Using the script ``ocr4gamera.py`` '''''''''''''''''''''''''''''''''' The *ocr4gamera.py* script takes an image and already trained data and segments the picture into single glyphs. The training-data is used to classify those glyphs and converts them into strings. The final text is written to standard-out or can optionally be stored in a textfile. Also a word by word correction can be performed on the recognized text. The end user application *ocr4gamera.py* will be installed to ``/usr/bin`` unless you habe explicitly chosen a different location. It can be either be applied to a *single* image with the following typical call:: ocr4gamera.py x --deskew --automatic_group -o or simultaneously on *multiple* images with the following typical call:: ocr4gamera.py x --deskew --automatic_group -od ... Note that in the latter case an output *directory* must be given, into which the recognised texts will be written for each ** as */`basename `.txt*. Strictly speaking, the call modus for multiple image files is redundant, because the same result can be achieved by calling *ocr4gamera.py* for each image file separately, but it can speed up the recognition because the training data only needs to be loaded once. The options *--deskew* and *--automatic_group* in the above examples are optional, but useful in most cases (see below). The complete synopsis of the script is:: ocr4gamera.py -x [options] Options can be in short (one dash, few characters) or long form (two dashes, string). When called with ``-h``, ``--help`` or an invalid option, a usage message will be printed. The other options are: ``-x`` *trainingdata*, ``--xml-file``\ =\ *trainingdata* This option is required. *trainingdata* must be an xml file created with `Gamera's training dialog`__. .. __: http://gamera.sourceforge.net/doc/html/training_tutorial.html ``-k`` *k*, ``--k=``\ =\ *k* Number of neighbors used by kNN classifier (default is *k* = 1). ``-o`` *outfile*, ``--output``\ =\ *outfile* Writes the output text to *outfile*. When not given, the result is printed to stdout. Note that this option can anly be used when a *single* image file is processed. ``-od`` *outdir*, ``--output_directory``\ =\ *outdir* Writes for each input image *imgfile* the recognized text to *outdir*/*imgfile*.txt. Note that this option cannot be used in combination with ``-o`` (``--outfile``). ``-a``, ``--automatic_group`` Uses Gamera's automatic grouping algorithm during classification. This can be helpful when glyphs are broken. ``-d``, ``--deskew`` Does a skew correction before page segmentation. ``-mf`` *windowsize*, ``--median_filter``\ =\ *windowsize* Smooth the input image with a median filter with window size *windowsize*. Default is *windowsize* = 0, which means no smoothing. ``-ds`` *size*, ``--despeckle``\ =\ *size* Remove all speckles with size <= *size*. Default is *size* = 0, which means no despeckling. ``-f``, ``--filter`` Filter out connected components that are very big or very small. ``-D``, ``--dictionary_correction`` Post-processing step called dictionary-check can be enabled here. For using this, you must have a unix ``spell`` tool installed: by default ``aspell`` is used; when this is not found, ``ispell`` is tried instead. Do not forget to install the needed language and turn it on by changing the ``LANG`` environment variable or set it with the ``-L`` option. ``-L`` *language*, ``--dictionary_language``\ =\ *language* Sets the dictionary for the correcting-process. Otherwise the locale-settings language (aspell) or the default language (ispell) is used. ``-e`` *number*, ``--edit_distance``\ =\ *number* Sets the max. distance between two words, the recognized and the corrected word. The actual distance is calculated by the gamera built in function edit_distance. It has to be integer. The default value is 2. ``-c`` *csv-file*, ``--extra_chars_csvfile``\ =\ *csv_file* Use a user defined translation table of class names to character strings. The *csv_file* must contain a list of comma separated pairs (classname, output) one pair per line as in the following example (the output string after the comma can be any string consisting of unicode characters): :: latin.small.ligature.st,st latin.small.ligature.ft,ft latin.small.letter.long.s,s ``-R`` *rules*, ``--heuristic_rules``\ =\ *rules* apply heuristic rules *rules* for disambiguation of some chars *rules* can be ``roman`` (default) or ``none`` (for no rules) ``-v`` *level*, ``--information``\ =\ *level* Set verbosity level to *level*. When one, debug information is printed to stdout. When two, additionally three images are written to the current directory: ``debug_lines.png`` has the detected textlines marked, ``debug_chars.png`` has all segmentated characters marked, and ``debug_words.png`` has all words marked. This can be usefull to identify segmentation errors. Using hOCR format as input or output ''''''''''''''''''''''''''''''''''''' In addition to plaintext output it is also possible to use the hOCR format to also save segmentation information with the recognized text. If the ''-ho'' option is selected, you have to make sure that their is an output file or directory asigned in either the ''-o'' or ''-od'' option. In addition to the text data, the hOCR file will contain the bounding box information of the entire image, the textlines and words. The file extension ''.html'' will be automaticly added. If you want to use another textline algorithm that saves its data in the hOCR format you can read the textline bounding box information by using the hOCR-input optin ''-hi''. Even if there is more information given in the hOCR file, only the information stored in the title of the class ''ocr_line'' will be used. This option only works on single images. ``-ho`` changes output to hOCR format ``-hi`` *hocrfile* uses textline information of the given hOCR file Writing custom scripts '''''''''''''''''''''' If you want to write your own scripts for recognition, you can use ``ocr4gamera.py`` as a good starting point. In order to access the *OCR Toolkit* classes and functions, you must import them at the beginning of your script: .. code:: Python from gamera.toolkits.ocr.ocr_toolkit import * from gamera.toolkits.ocr.classes import Textline,Page,ClassifyCCs After that you can segment an image with the Page__ class and its method *segment()*: .. __: gamera.toolkits.ocr.classes.Page.html .. code:: Python img = load_image("image.png") if img.data.pixel_type != ONEBIT: img = img.to_onebit() result_page = Page(img) result_page.segment() The ``Page`` object *result_page* now contains all segment information like textlines, words and characters in reading order. You can then classify the characters line-per-line with a knn classifier and print the document text: .. code:: Python # load training data into classifier cknn = knn.kNNInteractive([], \ ["aspect_ratio", "moments", "volume64regions"], 0) cknn.from_xml_filename("trainingdata.xml") # classify characters and create output text for line in page.textlines: line.glyphs = \ cknn.classify_and_update_list_automatic(line.glyphs) line.sort_glyphs() print "Text of line", textline_to_string(line) Note that the function `textline_to_string`_ is global and not bound to a class instance. This function requires that class names for characters have been chosen according to the `standard unicode character names`_, as in the examples of the following table: .. _`textline_to_string`: functions.html#textline-to-string .. _`standard unicode character names`: http://www.unicode.org/charts/ +-----------+----------------------------+----------------------------+ | Character | Unicode Name | Class Name | +===========+============================+============================+ | ``!`` | ``EXCLAMATION MARK`` | ``exclamation.mark`` | +-----------+----------------------------+----------------------------+ | ``2`` | ``DIGIT TWO`` | ``digit.two`` | +-----------+----------------------------+----------------------------+ | ``A`` | ``LATIN CAPITAL LETTER A`` | ``latin.capital.letter.a`` | +-----------+----------------------------+----------------------------+ | ``a`` | ``LATIN SMALL LETTER A`` | ``latin.small.letter.a`` | +-----------+----------------------------+----------------------------+ For more information on how to fine control the segmentation process, see the `developer's manual`__. .. __: developermanual.html ocr-1.2.2/doc/src/developermanual.txt0000644000175000017500000001060211716171512020360 0ustar dalitzdalitz00000000000000============================== OCR Toolkit Developer's Manual ============================== This documentation is for those who want to extend the functionality of the OCR toolkit, or who want to customize specific steps of the recognition process. For a comprehensive overview over the architecture of this toolkit, see section 3 of C. Dalitz, R. Baston: `Optical Character Recognition with the Gamera Framework.`__ In C. Dalitz (Ed.): "Document Image Analysis with the Gamera Framework." Schriftenreihe des Fachbereichs Elektrotechnik und Informatik, Hochschule Niederrhein, vol. 8, pp. 53-65, Shaker Verlag (2009) .. __: http://lionel.kr.hsnr.de/~dalitz/data/publications/sr09-ocr-gamera.pdf Overview ''''''''' The core functionality of this toolkit is implemented in the Page_ class. This class provides a method *segment*, which segments the page into lines, and the lines into characters and words. The segmentation result is stored in the property *textlines*, which is a list of objects from type Textline_. .. _Page: gamera.toolkits.ocr.classes.Page.html .. _Textline: gamera.toolkits.ocr.classes.Textline.html To customize the page segmentation process, you can derive a custom class from ``Page``, and overwrite some methods. While it is theoretically possible to directly overwrite the *segment* method, it is in most cases more desirable to only overwrite one of the methods called in *segment*, so that only a specific part of the segmentation process is replaced. See the `documentation of Page.segment`__ for information which other methods are called in this method. .. __: gamera.toolkits.ocr.classes.Page.html#segment In the subsequent sections, we describe two typical use cases: - the replacement of the standard page segmentation algorithm with something else - the replacement of the rule based character segmentation with Gamera's classification based grouping algorithm. Replacing the page segmentation method '''''''''''''''''''''''''''''''''''''' Let us assume you want to use the Gamera core plugin *projection_cutting* for segmenting the page into text lines. To do so, simply derive a custom class *MyPage* from *Page* and overwrite the *page_to_lines* method: .. code:: Python class MyPage(Page): def page_to_lines(self): self.ccs_lines = self.img.projection_cutting() This example is obviously very basic; in practice you might want to experiment with the input arguments of *projection_cutting*. You can the use *MyPge* just like *Page*, and the following code does the same segmentation as *Page.segment*, but with only *page_to_lines* replaced: .. code:: Python result = MyPage(image) result.segment() Let Gamera's training based grouping attach diacritics '''''''''''''''''''''''''''''''''''''''''''''''''''''' Now let us assume that you want to let Gamera's classification based grouping algorithm join connected components to characters, rather than the rule based method built into ``Page.lines_to_chars``. To do so, derive a custom class *MyPage* from *Page*, that segments the line into characters only by a connected component analysis, without any joining of CCs to characters (this will be done at a later point): .. code:: Python # segment lines into chars only by CC analysis class MyPage(Page): def lines_to_chars(self): dummy, subbccs = self.img.sub_cc_analysis(self.ccs_lines) self.textlines = [] for i,segment in enumerate(self.ccs_lines): self.textlines.append(Textline(segment, subccs[i])) Then you must make sure that a classification with grouping is done during ``Page.segment``. This is done by passing a callable class derived from ClassifyCCs_ to the contructor of *MyPage*. As the default definition of *ClassifyCCs* already does what we need, we simply need to create an instance thereform: .. _ClassifyCCs: gamera.toolkits.ocr.classes.ClassifyCCs.html .. code:: Python # create an instance of ClassifyCCs ... cknn = knn.kNNInteractive([], \ ["aspect_ratio", "moments", "volume64regions"], 0) cknn.from_xml_filename("trainingdata.xml") classify = ClassifyCCs(cknn) # ... and set its property parts_to_group such that the # grouping algorithm will be used during classification classify.parts_to_group = 4 # pass the ClassifyCCs instance to the constructor of MyPage page = MyPage(image, classify_ccs=classify) page.segment() # will call classify ocr-1.2.2/doc/src/gamera.toolkits.ocr.classes.ClassifyCCs.txt0000644000175000017500000000041112401076126024655 0ustar dalitzdalitz00000000000000class ``ClassifyCCs`` ===================== ``ClassifyCCs`` --------------- In module ``gamera.toolkits.ocr.classes`` .. docstring:: gamera.toolkits.ocr.classes ClassifyCCs :no_title: .. docstring:: gamera.toolkits.ocr.classes ClassifyCCs __init__ __call__ ocr-1.2.2/doc/src/index.txt0000644000175000017500000002400512353014640016302 0ustar dalitzdalitz00000000000000====================== OCR toolkit for Gamera ====================== :Editor: Rene Baston, Christoph Dalitz :Version: 1.1.0 Use the 'Addons' section on the `Gamera home page`__ for access to file releases of this toolkit. .. __: http://gamera.informatik.hsnr.de/addons/ Overview ''''''''' The purpose of the *OCR Toolkit* is to help building optical character recognition (OCR) systems for standard text documents. Even though it can be used as is, it is specifically designed to make individual steps of the recognition system customizable and replacable. The toolkit is based on and requires the `Gamera framework`__ for document analysis and recognition. As an addon package for Gamera, it provides .. __: http://gamera.sf.net/ - python library functions for building a custom OCR system - a ready-to-run python script ``ocr4gamera`` which acts as a basic OCR-system A comprehensive overview of design, usage and customization of the OCR toolkit can be found in the paper C. Dalitz, R. Baston: `Optical Character Recognition with the Gamera Framework.`__ In C. Dalitz (Ed.): "Document Image Analysis with the Gamera Framework." Schriftenreihe des Fachbereichs Elektrotechnik und Informatik, Hochschule Niederrhein, vol. 8, pp. 53-65, Shaker Verlag (2009) .. __: http://lionel.kr.hsnr.de/~dalitz/data/publications/sr09-ocr-gamera.pdf The recognition process ----------------------- *Optical character recognition* (OCR) means the extraction of a machine readable text code from bitmap images of text documents. This process typically consists of the following steps: **Preprocessing:** Includes binarization, skew correction, image enhancement, text/graphics separation **Segmentation:** Segmentation of the page in text lines (page segmentation) and characters (character segmentation) **Classification:** Identification of the individual characters **Postprocessing:** Includes the generation of the output string and maybe detection and correction of possible errors The OCR toolkit only covers the process from segmentation to postprocessing. For preprocessing, the standard routines shipped with Gamera must be used beforehand, e.g. *rotation_angle_projections* for skew correction, or *despeckle* for noise removal. For classification, the kNN classifier shipped with Gamera must be used. This means in particular, that you must train some sample pages before doing the classification. At present, the toolkit does not include training databases for common fonts. Provided Components -------------------- The toolkit consists of two python modules, a plugin image function and one end user application. The modules are - *classes* which contains all class definitions - *ocr_toolkit* for global functions used across the classes The end user application is - *ocr4gamera.py* is a script that acts as a basic OCR-system There is also one image plugin *bbox_seg* for textline segmentation which is simply a wrapper around the Gamera core plugin ``bbox_segmentation``. Limitations ----------- As the segmentation of the individual characters is based on a connected component analysis, the toolkit cannot deal with touching characters, unless they have been trained as ligaturae. It is therefore in general only applicable to printed documents, rather than handwritten documents. From a user's perspective, there are some points to beware in this toolkit: - It does not provide methods for text/graphics separation. Hopefully, some generic methods for this purpose will be added at some point in the Gamera core. - It does not provide prototypes of latin characters. This means that characters must be trained on sample pages before using the toolkit. - The standard page segmentation algorithm for textline separation is currently very basic. User's Manual '''''''''''''' This documentation is written for those who want to use the toolkit for OCR, but are not interested in extending the toolkit itself. - `Using the toolkit`_: gives an explanation on how to use the toolkit. .. _`Using the toolkit`: usermanual.html Developer's Manual ''''''''''''''''''' This documentation is for those who want to extend the functionality of the OCR toolkit, or who want to customize specific steps of the recognition process. - `Developer's manual`_: describes how to customize the recognition process - Classes_: reference for the classes involved in the segmentation process. These are: * Page_ for doing the page segmentation * Textline_ for storing the segmentation result within ``Page`` * ClassifyCCs_ for (optionally) doing the classification during page segmentation - Functions_: the global functions defined by the toolkit - Plugins_: Reference for the plugin functions shipped with this toolkit .. _`Developer's manual`: developermanual.html .. _Functions: functions.html .. _Classes: classes.html .. _Page: gamera.toolkits.ocr.classes.Page.html .. _Textline: gamera.toolkits.ocr.classes.Textline.html .. _ClassifyCCs: gamera.toolkits.ocr.classes.ClassifyCCs.html .. _Plugins: plugins.html Installation '''''''''''' We have only tested the toolkit on Linux and MacOS X, but as the toolkit is written entirely in Python, the following instructions should work for any operating system. Prerequisites ------------- First you will need a working installation of Gamera 3.x. See the `Gamera website`__ for details. It is strongly recommended that you use a recent version, preferably from SVN. .. __: http://gamera.sourceforge.net/ If you want to generate the documentation, you will need two additional third-party Python libraries: - docutils_ for handling reStructuredText documents. - pygments_ for colorizing source code. .. _docutils: http://docutils.sourceforge.net/ .. _pygments: http://pygments.org/ .. note:: It is generally not necessary to generate the documentation because it is included in file releases of the toolkit. Building and Installing ----------------------- To build and install this toolkit, go to the base directory of the toolkit distribution and run the ``setup.py`` script as follows:: # 1) compile python setup.py build # 2) install sudo python setup.py install Command 1) compiles the toolkit from the sources and command 2) installs it. As the latter requires root privilegue, you need to use ``sudo`` on Linux and MacOS X. On Windows, ``sudo`` is not necessary. Note that the script *ocr4gamera* is installed into ``/usr/bin`` on Linux, but into ``/System/Library/Frameworks/Python.framework/Versions/2.x/bin`` on MacOS X. As the latter directory is not in the standard search path, you could either add it to your search path, or install the scripts additionally into ``/usr/bin`` on MacOS X with:: # install scripts into standard path (MacOS X only) sudo python setup.py install_scripts -d /usr/bin If you want to regenerate the documentation, go to the ``doc`` directory and run the ``gendoc.py`` script. The output will be placed in the ``doc/html/`` directory. The contents of this directory can be placed on a webserver for convenient viewing. .. note:: Before building the documentation you must install the toolkit. Otherwise ``gendoc.py`` will not find the plugin documentation. Installing without root privileges ---------------------------------- The above installation with ``python setup.py install`` will install the toolkit system wide and thus requires root privileges. If you do not have root access (Linux) or are no sudoer (MacOS X), you can install the MusicStaves toolkit into your home directory. Note however that this also requires that Gamera is installed into your home directory. It is currently not possibole to install Gamera globally and only toolkits locally. Here are the steps to install both Gamera and the OCR toolkit into ``~/python``:: # install Gamera locally mkdir ~/python python setup.py install --prefix=~/python # build and install the OCR toolkit locally export CFLAGS=-I~/python/include/python2.3/gamera python setup.py build python setup.py install --prefix=~/python Moreover you should set the following environment variables in your ``~/.profile``:: # search path for python modules export PYTHONPATH=~/python/lib/python # search path for executables (eg. gamera_gui) export PATH=~/python/bin:$PATH Uninstallation -------------- The installation uses the Python *distutils*, which do not support uninstallation. Thus you need to remove the installed files manually: - the installed Python library files of the toolkit - the installed standalone scripts Python Library Files ```````````````````` All python library files of this toolkit are installed into the ``gamera/toolkits/ocr`` subdirectory of the Python library folder. Thus it is sufficient to remove this directory for an uninstallation. Where the python library folder is depends on your system and python version. Here are the folders that you need to remove on MacOS X and Debian Linux ("2.3" stands for the python version; replace it with your actual version): - MacOS X: ``/Library/Python/2.3/gamera/toolkits/ocr`` - Debian Linux: ``/usr/lib/python2.3/site-packages/gamera/toolkits/ocr`` Standalone Scripts `````````````````` The standalone scripts are installed into ``/usr/bin`` (linux) or ``/System/Library/Frameworks/Python.framework/Versions/2.3/bin`` (MacOS X), unless you have explicitly chosen a different location with the options ``--prefix`` or ``--home`` during installation. For an uninstall, remove the following script: - ``ocr4gamera.py`` .. note:: In older versions (1.0.0 and 1.0.1) this script was named ``ocr4gamera``. Remove this old script, if you are upgrading from one of these versions. About this documentation '''''''''''''''''''''''' The documentation was written by Rene Baston and Christoph Dalitz. Permission is granted to copy, distribute and/or modify this documentation under the terms of the `Creative Commons Attribution Share-Alike License (CC-BY-SA) v3.0`__. In addition, permission is granted to use and/or modify the code snippets from the documentation without restrictions. .. __: http://creativecommons.org/licenses/by-sa/3.0/ ocr-1.2.2/doc/src/pagesegmentation.txt0000644000175000017500000000355312401076126020533 0ustar dalitzdalitz00000000000000 PageSegmentation ================ ``bbox_mcmillan`` ----------------- [object] **bbox_mcmillan** ([object *glyphs*] = None, float *section_search_size* = 1.00, float *noise_mltplk* = 1.00, float *large_mltplk* = 20.00, float *stdev_mltplk* = 5.00) :Operates on: ``Image`` [OneBit] :Returns: [object] :Category: PageSegmentation :Defined in: bbox_merging_mcmillan.py :Author: Robert Butz, Karl MacMillan Returns the textlines in an image as connected components. The segmentation method is adapted from McMillan's segmentation method in roman_text.py. It allows a more individual segmentation through parameterization. Options: *glyphs*: This list can be build out of a ``cc_analysis``. On default, this parameter is blank, which will cause the function to call ``cc_analysis`` itself. *section_search_size* This optional parameter adjusts the calculated avg_glyph_size by multipling its value (default=1). *noise_mltplk* With this optional parameter one can adjust the noise_recognition rate independently from the calculated avg_glyph_size (default = 1). Values greater than 1 let the noise_removal detect bigger noise (but maybe even glyphs). Chose smaller values to avoid assigning small glyphs to noise. *large_mltplk* Analog to noise_mltplk one can set this parameter to manipulate the recognition of very large ccs according to the avg_glyph_size (default=20). Higher values lead to a better acceptance of above-average ccs. Beneficial, for example for big capital initials at the beginning of paragraphs such as seen in bibles. *stdev_mltplk* This parameter affects the line finding algorithm by excluding abnormally tall glyphs (default=5). The standard deviation will be calculated and multiplied by this parameter. ocr-1.2.2/doc/src/gamera.toolkits.ocr.classes.Page.txt0000644000175000017500000000047012401076126023370 0ustar dalitzdalitz00000000000000class ``Page`` ============== ``Page`` -------- In module ``gamera.toolkits.ocr.classes`` .. docstring:: gamera.toolkits.ocr.classes Page :no_title: .. docstring:: gamera.toolkits.ocr.classes Page __init__ segment page_to_lines order_lines lines_to_chars chars_to_words show_lines show_glyphs show_words ocr-1.2.2/doc/src/html4css1.css0000644000175000017500000001301511716171512016771 0ustar dalitzdalitz00000000000000/* :Author: David Goodger :Contact: goodger@users.sourceforge.net :Date: $Date: 2007/08/10 14:44:25 $ :Revision: $Revision: 1.1 $ :Copyright: This stylesheet has been placed in the public domain. Default cascading style sheet for the HTML output of Docutils. See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to customize this style sheet. */ /* used to remove borders from tables and images */ .borderless, table.borderless td, table.borderless th { border: 0 } table.borderless td, table.borderless th { /* Override padding for "table.docutils td" with "! important". The right padding separates the table cells. */ padding: 0 0.5em 0 0 ! important } .first { /* Override more specific margin styles with "! important". */ margin-top: 0 ! important } .last, .with-subtitle { margin-bottom: 0 ! important } .hidden { display: none } a.toc-backref { text-decoration: none ; color: black } blockquote.epigraph { margin: 2em 5em ; } dl.docutils dd { margin-bottom: 0.5em } /* Uncomment (and remove this text!) to get bold-faced definition list terms dl.docutils dt { font-weight: bold } */ div.abstract { margin: 2em 5em } div.abstract p.topic-title { font-weight: bold ; text-align: center } div.admonition, div.attention, div.caution, div.danger, div.error, div.hint, div.important, div.note, div.tip, div.warning { margin: 2em ; border: medium outset ; padding: 1em } div.admonition p.admonition-title, div.hint p.admonition-title, div.important p.admonition-title, div.note p.admonition-title, div.tip p.admonition-title { font-weight: bold ; font-family: sans-serif } div.attention p.admonition-title, div.caution p.admonition-title, div.danger p.admonition-title, div.error p.admonition-title, div.warning p.admonition-title { color: red ; font-weight: bold ; font-family: sans-serif } /* Uncomment (and remove this text!) to get reduced vertical space in compound paragraphs. div.compound .compound-first, div.compound .compound-middle { margin-bottom: 0.5em } div.compound .compound-last, div.compound .compound-middle { margin-top: 0.5em } */ div.dedication { margin: 2em 5em ; text-align: center ; font-style: italic } div.dedication p.topic-title { font-weight: bold ; font-style: normal } div.figure { margin-left: 2em ; margin-right: 2em } div.footer, div.header { clear: both; font-size: smaller } div.line-block { display: block ; margin-top: 1em ; margin-bottom: 1em } div.line-block div.line-block { margin-top: 0 ; margin-bottom: 0 ; margin-left: 1.5em } div.sidebar { margin-left: 1em ; border: medium outset ; padding: 1em ; background-color: #ffffee ; width: 40% ; float: right ; clear: right } div.sidebar p.rubric { font-family: sans-serif ; font-size: medium } div.system-messages { margin: 5em } div.system-messages h1 { color: red } div.system-message { border: medium outset ; padding: 1em } div.system-message p.system-message-title { color: red ; font-weight: bold } div.topic { margin: 2em } h1.section-subtitle, h2.section-subtitle, h3.section-subtitle, h4.section-subtitle, h5.section-subtitle, h6.section-subtitle { margin-top: 0.4em } h1.title { text-align: center } h2.subtitle { text-align: center } hr.docutils { width: 75% } img.align-left { clear: left } img.align-right { clear: right } ol.simple, ul.simple { margin-bottom: 1em } ol.arabic { list-style: decimal } ol.loweralpha { list-style: lower-alpha } ol.upperalpha { list-style: upper-alpha } ol.lowerroman { list-style: lower-roman } ol.upperroman { list-style: upper-roman } p.attribution { text-align: right ; margin-left: 50% } p.caption { font-style: italic } p.credits { font-style: italic ; font-size: smaller } p.label { white-space: nowrap } p.rubric { font-weight: bold ; font-size: larger ; color: maroon ; text-align: center } p.sidebar-title { font-family: sans-serif ; font-weight: bold ; font-size: larger } p.sidebar-subtitle { font-family: sans-serif ; font-weight: bold } p.topic-title { font-weight: bold } pre.address { margin-bottom: 0 ; margin-top: 0 ; font-family: serif ; font-size: 100% } pre.literal-block, pre.doctest-block { margin-left: 2em ; margin-right: 2em ; background-color: #eeeeee } span.classifier { font-family: sans-serif ; font-style: oblique } span.classifier-delimiter { font-family: sans-serif ; font-weight: bold } span.interpreted { font-family: sans-serif } span.option { white-space: nowrap } span.pre { white-space: pre } span.problematic { color: red } span.section-subtitle { /* font-size relative to parent (h1..h6 element) */ font-size: 80% } table.citation { border-left: solid 1px gray; margin-left: 1px } table.docinfo { margin: 2em 4em } table.docutils { margin-top: 0.5em ; margin-bottom: 0.5em; background-color: #f7fffd; border-color: #72ada8; border: solid thin #aaaaaa; } table.footnote { border-left: solid 1px black; margin-left: 1px } table.docutils td, table.docutils th, table.docinfo td, table.docinfo th { padding-left: 0.5em ; padding-right: 0.5em ; vertical-align: top } table.docutils th.field-name, table.docinfo th.docinfo-name { font-weight: bold ; text-align: left ; white-space: nowrap ; } td.field-body, th.field-name { padding: 0.5em; border: solid thin #aaaaaa; } h1 tt.docutils, h2 tt.docutils, h3 tt.docutils, h4 tt.docutils, h5 tt.docutils, h6 tt.docutils { font-size: 100% } tt.docutils { } ul.auto-toc { list-style-type: none } ocr-1.2.2/doc/src/gamera.toolkits.ocr.classes.Textline.txt0000644000175000017500000000041712401076126024311 0ustar dalitzdalitz00000000000000class ``Textline`` ================== ``Textline`` ------------ In module ``gamera.toolkits.ocr.classes`` .. docstring:: gamera.toolkits.ocr.classes Textline :no_title: .. docstring:: gamera.toolkits.ocr.classes Textline __init__ add_glyph add_glyphs sort_glyphs ocr-1.2.2/doc/src/functions.txt0000644000175000017500000000240611716171512017210 0ustar dalitzdalitz00000000000000============================= OCR Toolkit: Global Functions ============================= The toolkit defines a number of free function which are not image methods. These are defined in *ocr_toolkit.py* and can be imported in a python script with .. code:: Python from gamera.toolkits.ocr.ocr_toolkit import * Output text generation '''''''''''''''''''''' While the class Page_ splits the image into Textline_ objects and possibly classifies the characters, it does not generate an output string. For this purpose, you can use the function `textline_to_string`_. .. _Page: gamera.toolkits.ocr.classes.Page.html .. _Textline: gamera.toolkits.ocr.classes.Textline.html .. _`textline_to_string`: #textline-to-string .. docstring:: gamera.toolkits.ocr.ocr_toolkit textline_to_string .. docstring:: gamera.toolkits.ocr.ocr_toolkit return_char .. docstring:: gamera.toolkits.ocr.ocr_toolkit chars_make_words Segmentation '''''''''''' These functions are used in the segmentation methods of class Page_. You will generally not need to call them, unless you are implementing a custom segmentation method. .. _Page: gamera.toolkits.ocr.classes.Page.html .. docstring:: gamera.toolkits.ocr.ocr_toolkit get_line_glyphs .. docstring:: gamera.toolkits.ocr.ocr_toolkit show_bboxes ocr-1.2.2/doc/src/default.css0000644000175000017500000001104411716171512016573 0ustar dalitzdalitz00000000000000@import url(html4css1.css); @import url(pygments.css); body { margin: 2em 2em 2em 2em; background-color: #effffd; } a.toc-backref { text-decoration: none ; color: black } h1 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 4px } h2 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 2px } h3 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 1px } h4 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 1px } h5 { background-color: #e1f0ee; color: #29493c; border-top-color: #72ada8; border-top-style: solid; border-top-width: 0.5px } div.code-block, div.highlight { margin-left: 2em; margin-right: 2em; background-color: #f0f0e0; font-family: "Andale Mono", "Bitstream Vera Sans Mono", monospace; border-color: #e0e0d0; border-style: solid; border-width: 1px; font-size: 10pt; padding: 1em; } /* The following is for SilverCity syntax highlighting */ .code_default { FONT-FAMILY: "Andale Mono", "Bitstream Vera Sans Mono", monospace; FONT-SIZE: 10pt; } .c_character { color: olive; } .c_comment { color: green; font-style: italic; } .c_commentdoc { color: green; font-style: italic; } .c_commentdockeyword { color: navy; font-weight: bold; } .c_commentdockeyworderror { color: red; font-weight: bold; } .c_commentline { color: green; font-style: italic; } .c_commentlinedoc { color: green; font-style: italic; } .c_default { } .c_identifier { color: black; } .c_number { color: #009999; } .c_operator { color: black; } .c_preprocessor { color: navy; font-weight: bold; } .c_regex { color: olive; } .c_string { color: olive; } .c_stringeol { color: olive; } .c_uuid { color: olive; } .c_verbatim { color: olive; } .c_word { color: navy; font-weight: bold; } .c_word2 { color: navy; font-weight: bold; } .h_asp { color: #ffff00; } .h_aspat { color: #ffdf00; } .h_attribute { color: #008080; } .h_attributeunknown { color: #ff0000; } .h_cdata { color: #ffdf00; } .h_comment { color: #808000; } .h_default { } .h_doublestring { color: olive; } .h_entity { color: #800080; } .h_number { color: #009999; } .h_other { color: #800080; } .h_script { color: #000080; } .h_singlestring { color: olive; } .h_tag { color: #000080; } .h_tagend { color: #000080; } .h_tagunknown { color: #ff0000; } .h_xmlend { color: #0000ff; } .h_xmlstart { color: #0000ff; } .pl_array { color: black; } .pl_backticks { color: olive; } .pl_character { color: olive; } .pl_commentline { color: green; font-style: italic; } .pl_datasection { color: olive; } .pl_default { } .pl_error { color: red; font-weight: bold; } .pl_hash { color: black; } .pl_here_delim { color: olive; } .pl_here_q { color: olive; } .pl_here_qq { color: olive; } .pl_here_qx { color: olive; } .pl_identifier { color: black; } .pl_longquote { color: olive; } .pl_number { color: #009999; } .pl_operator { color: black; } .pl_pod { color: black; font-style: italic; } .pl_preprocessor { color: navy; font-weight: bold; } .pl_punctuation { color: black; } .pl_regex { color: olive; } .pl_regsubst { color: olive; } .pl_scalar { color: black; } .pl_string { color: olive; } .pl_string_q { color: olive; } .pl_string_qq { color: olive; } .pl_string_qr { color: olive; } .pl_string_qw { color: olive; } .pl_string_qx { color: olive; } .pl_symboltable { color: black; } .pl_word { color: navy; font-weight: bold; } .p_character { color: olive; } .p_classname { color: blue; font-weight: bold; } .p_commentblock { color: gray; font-style: italic; } .p_commentline { color: green; font-style: italic; } .p_default { } .p_defname { color: #009999; font-weight: bold; } .p_identifier { color: black; } .p_number { color: #009999; } .p_operator { color: black; } .p_string { color: olive; } .p_stringeol { color: olive; } .p_triple { color: olive; } .p_tripledouble { color: olive; } .p_word { color: navy; font-weight: bold; } .yaml_comment { color: #008800; font-style: italic; } .yaml_default { } .yaml_document { color: #808080; font-style: italic; } .yaml_identifier { color: navy; font-weight: bold; } .yaml_keyword { color: #880088; } .yaml_number { color: #880000; } .yaml_reference { color: #008888; } ocr-1.2.2/doc/src/classes.txt0000644000175000017500000000076512401076126016640 0ustar dalitzdalitz00000000000000======= Classes ======= Alphabetical ------------- **C** ClassifyCCs_ (gamera.toolkits.ocr.classes.ClassifyCCs) **H** hocrPage_ (gamera.toolkits.ocr.classes.hocrPage) **P** Page_ (gamera.toolkits.ocr.classes.Page) **T** Textline_ (gamera.toolkits.ocr.classes.Textline) .. _ClassifyCCs: gamera.toolkits.ocr.classes.ClassifyCCs.html .. _hocrPage: gamera.toolkits.ocr.classes.hocrPage.html .. _Page: gamera.toolkits.ocr.classes.Page.html .. _Textline: gamera.toolkits.ocr.classes.Textline.htmlocr-1.2.2/doc/src/gamera.toolkits.ocr.classes.hocrPage.txt0000664000175000017500000000035612401076126024251 0ustar dalitzdalitz00000000000000class ``hocrPage`` ================== ``hocrPage`` ------------ In module ``gamera.toolkits.ocr.classes`` .. docstring:: gamera.toolkits.ocr.classes hocrPage :no_title: .. docstring:: gamera.toolkits.ocr.classes hocrPage __init__ ocr-1.2.2/doc/src/images/0000775000175000017500000000000012433405322015700 5ustar dalitzdalitz00000000000000ocr-1.2.2/doc/src/images/overview.png0000644000175000017500000001344511716171512020265 0ustar dalitzdalitz00000000000000‰PNG  IHDRùýÇ®sRGB®ÎébKGDÿÿÿ ½§“ pHYs M MÒέNtIMEÙ !âT9¥IDATxÚíÝÍ‘ã8š` "͘XªîmåÔÆ:Bú0×9O– s°‡"ÔH¤Hý¤Hêy"Ý¥¤(Š©^€˜R ÷úXû€cB芻†‰M‡”ÒàÀ{ˆ·ôT”Á"¥g~>ð~ÆØ9Õ«E°Âß—F4”ËE¾^Z¡b 'ßÀW ç€Ýúø†çZß¾§BFD®l;¤”†âÛý—÷‡”Ò©Ún(ö[¤ÇÕêY8×”ûN)Å+ûì¯W= û‘RZ}¿Q§?_¿íx__m×ÛõÇwÕ}i¼uõ}ço=w~üyßÅý_Žm渺‰cM}N>×ܱº¹¹¹¹¹íéö”ÑsÛÿ>ÕßÂËQ#c/Bßê½hí#ƘZÇÓºͶ3ûèëž„úøu\àòÇ|É0s©¤»òØÓ+OZyÙ£1²Åå „Š'Ëm74büò%½Ûh#Ým!ÜÀ»…ŠÃ‘Œ1ö‚ÀqåËÒÕÜ®qßù‹ð;· ?žüËhtÈ5sÃ'‡%¡dË“j™ð à0N)¥ó­üAuÿÛ_þ~Z¨¨æ]&Ãd¹@¦BIÙ˜?«a/ÞT]ë9Æ µû·xhÿwôPl}ΧOzÑ9P ¡åQÌ´Ùj»òX$¿.ƘbŒ}¾ûªÃJ¹M÷Ä_ÄÅó·tGZ-ç·ÐÛ@ݶnº]øq˺h¬jîyj%¼rm*( ­®¥pYäÙºf5ÔÛZ½e@YqW½†ü|ùØÊa®oÍ}ÖAÅŸÀk­í}xVoÅ^f]¾iž xãƒ4ó÷ZÏBsÖår­•¾ë™ë}]é˜ >ã¼¹àôÃÛžÒ³Ð5ÖÀêcŒ§”Ru`8‡†ò±cØH­Pp%Ptõ$‹åÈÅÜÒ˜Üñ&?üêàáææYšš41ß߇nj$™:†næX£§hí¬Ëãåó „ÐXÆâå!ZEžùy¿,=qk°*à±VϺ\ÕR<:äôKÍ.îš¼ë™óTœGƒ·Þ0IÞÁµ ýƒž¿ËõS£1‹ðSο”nm«Ÿ*3 s/ Þ(ptÒœþàŽý÷Kc,¡˜Ú`S¡ÞÑÚY—«†½œôñžžýî–íîn\¨€Ç[3ër½8åԪ޷̺ÜUÏ}6Å~ú™ã*àÑÊÙ–«{jXæµY—Ï£.ª}tû_<ërk6êêòF¹ÔE×Xã¦bͧϨ95óXqBº”R¬g +&æhÞߨO}2›oˆê—öeíÆ,g¡õó™çîê!<ð.^2¤tl´‡0‚”ë~äQ§¥\tRÍvu¶°:Ôûk„«³ µöW(x;/ë©(Ö˜ cÑý­í&î;Ï£Þ ]cÛ¡*¢¹x|¾.Vïï;–¾=¶p¶°µ=(KgA;ÏB–ƒ„@€P±ss³…-P>fÍ,hå d›Vöj×£?ÊÊÙ³…•ó›—ã‚O3!eRk¢{'+¡âurMÄÕ‚rxMQK±úrE=>¸6ô%´€PqPUÆ©(V΂ÖÔS³íh¬aÕ/ÙîÎçë^ðZ›e­ÚÇF„öìcçå[Ëá£ÕáMÍ=QO4’÷])½¨£P[pØ/¥]«]ªå±­¹¹˜k3¿+XŒ¯!Ü2ïÒÓBE9¹UÕЗ »õþ*,”÷•¡à4.ëzma•Scö¾>îF€¨ƒÇ P¼g¨ÈmÃ=“ ¶æJzÁëM› [|ST_}™ÕL˜¼:TläõÞ*>Þå Q¦¾±7¡žÜ ¸ÃáCE1íws”È+ŠaxEÏøì:W/¹ÝÄõÂc‹ÖÄ¿PŸfŽoÍk)}¹ìÿ=y’ª>Æ8,=ÑðåLÍÍu®ªõ¬B«‘ŸY—ª¹ÆšV}Ýk?³íšçÿ².ÖᇔŽ/ôT„‹n.%ÀÛ òÒû¹ñ-§0¨~>Õ&5G06z(ê¸ËKþõSÛN…£¹Ÿý9†w)Ô€[Ü[¨ù€éºB¿à¹/ɬ{+ò~¯lÛ|þ™PqIÞ.°iݽÛÓ+ܲÏV¬‹õÃï ¸fÉHI¡æ OÚö!z1ŠqXÐø½÷Ø:¡®¨fSî¯4úÞr]ªj´Æp­Ñ/¶i-o±úùóë>/a¡P}ÏâÔX7jrzí{ 5Ïÿe]ªj»\Tzj…¡jHèÅ:#õ¯†±cÙºXB,Í¡˜abí§V#]4Æõý]Õ{0LÌ!1;-Â’Iª¦ö7ÞúÐ^óêúºXBðj*¡*¡@¨„ `Ë>BX6Ÿ÷^XÊ^#÷Tty=_)¼FL)5×ßå‹9Èë€=RSÀv|¬ÙxKQA'<¸ ^SS1.“:àuwj/àÅ¡BA'Т¦*€íøxÆNtpDf ¾r~žQS¡ €ƒ† íÛ+B…‚NŽ*´oÓÔTB°¯|òß/xùßÇøSA'|«cþ¬L<¤”g„½?ßrÀÂKk*>cLñ/)„î—Ú ¶Ýð^ýÀ(>`rC}zñ±§©ãuH) ãv§BÿêãK÷±ÿ¦ XØjOÅQ¾áþyóÀ¿…t!„SýM¾øÙP†‹"X 8öÐ Dùgc È?ÿ¿©í¿û8£áèá=W¶oj*àÍÅ’»W_No{ š w>ΔRL)ýÏÌößé6ÐË/ûìyõå#\68Êëà=E¹]ÝøÍ]rØÒkÛÂå½oû÷ej…{xQ¿wP𢠓Š%Å‹õ%‘Î\tޝ֩ÀÊ¿ÙCcJé´‹ž °ü[Åø¿§µ—0®õpT,CnXËoëS nµM?õóêñ]J)Ö÷Uah¨·_ø¡84êLêcŸâÏóù(ÎÑ9”Uµ(“E®Õ1\œçúÇývã1Üô;eû=‡+èL)ï߮ݽݿ¼ÝÖoGynÛ¼O 7¾Ïòã÷熭k5"¯¥19Kd(ZA¢d†߸›Û4挘­›¨·¯ 1/z'fÂÁÅöŹ:÷ŠLÜ?4æò­×è™ë)7.ƒ°iN.XÄÜ…_L†5Ìô”…“e£uÃIžœ'¯ š»r2¦zÔGX0Tk2§æ¤ZWz5¦¶ÏÇ?´Ž©˜´ª_ÓPOd5õ#Zº:°çjöáá4õ»Q¼ÉìfFMk„À›~H-\˾éýø’ör/çÅå`¶O¨¶ö§« ;çæ­„ €Iç‚ÉñÒ‡Ë T¬7$æL—=`GŒþ¶,€ÑS€P—j° åròF pó„hãz-ÿ !üGÈ*à^ BBG1®«Ð9À…Œÿ†þ)d<Ѽâè©xSã•?,`7bŒiÅæ.}p!B¨!B¨€ï5BÜ-¥tr¶Íè@¨„ @¨*¡*¡@¨„ àbŒ½³BÀ#tNwɽ1FÁ„ €‡*à |Ôß(ž&þáTÀñåž kÓÏô—SÇ÷Âò5êcŒÉ)~^ôÕ¿»”’/0p`j*€ï¢®„ €‡„uppk  ¸‘º 8¸µ=®‡7ù0_Ûªž À:_Nà¸ÔTÏ -ê*@¨xup`ÏÚñ’‚Î;ÿp8×þöÍWÁÑßãïìY=>0€)Š5Ù3íÛŒ§ôT,-èüTÐ ïÔU°[,ÌSS|7upPNð(ã<u÷°a¤ T|ß ^à0ÆÌ¡ ii—1,d¿]ûöÒËÑ·Žé-Û·˜ÒökI>cL¿RŠ{?ÙGy°òÛZJÞ÷pè¿ü:j!T±›Ñ :8¢#tâ( S¨ Gz»(Ôvý-L¡&¼ 5€P€P TB TB€P€PB T<ÏgŒ–a¡*¡@¨„ @¨„ ¡*¡`¡”Rt@¨*¡*¡@¨„ @¨*¡*¡@¨žìÃ)žáwŒý­ý™ÒÉ¡xŸÐÐ…ºBHãçĆ©ŸÕÿŒ1Í<~ø™Òà7ÛSJ‡~Ÿ1¦_)E¿j¸;Dô­ðìÆ~ÁóêÙ¡B¨€=‰ï Bl4LŒAbÓ—ò¥˜ò˜… *„ ØP˜ØcÃ|„×B…PÂÄv_ÓIq'<Ÿy*@ èR] aø•R<Ê7ûŸ)~¥cC ¡/‘BðDÝ‘/üLé4Ö… TÏ’/¼AÝÁBèôV€P<'Pœ/{ýµþLiÈ—Aüæáy̨ùÂo‡ùßùC½õŸžØ5ãÞBÖÏ“*û}6´ŸQ 3ð8z*^(†pjÆýLé”oã]Où&™ƒÍ=k4äÇ&׫wë. ä^¿m*ù!^q+>ð†*x(—†º/fF<µößøÀï¦zê0píØçÁöݽO §Æï»k½—9Òbì…Ø]lâÃzÆ0ÓO5ðÃÒ†þÔ?Ôû˜hLæz;µ“ñ¾Sù˜¢D ØaOÆLÏCæôæÂBEåû6Mô,>cæÛòoáÖsóˆ}€P±kf²,>´†…½!¸˜¹b TìÄÚµ–~0Œ!d˜XÛ£+zú‰o”ÇXŠêþæ6SÏÙšžü(“.Á£üLéî0ðˆ}€P±qcãÙ7îën 1„SúS‰Ÿr È wªæ­(z .¶›¨yR£f£¸¿,Ꜻ$òeÞ‹²£ìMIÏ‹`yj¶Ð¨Ÿž¹=QLéØSáƘ\£ßd°ªÉÅlˆ>¤ý}líõ–Å›SïÏz¶ÛV±äD/a] }¾[ÌXÛÜ÷Ô1=bõ¾&¾„•ƒž ^'…Ð7–|$Ø{XîÃß“…5/=¶êˆÆåçûV¨ž¢Þ÷ð+¥˜{½z_ùxsïhqŸ@ž ¶ù{§þöAåïc—=Õvïã<µ}=-}—#ŸZ÷¯Ùöû˜Yÿ§o­=Ä{3¤”פÙNu]Gî–(8ŠÖû¸± âd„ xÀ‡íP¨ºüÁÛ¸¶vÎN¾ÿl*\¼Eˆ(½0o&.Š”ó*[ýÍ—:Š•‹M¥P¯RÎl8®ôù¯Â„Œ÷ y5á½…ÿŒ§¡^2þ2ÞÒ.ë'êú'*@È`;¿ï®5äs‹w± r]Tê½É{† 34r€ñßÂ?}oã3¤_~­™rËmÊú„1P˜§‚¥Ÿ£¹†¢^ÆÞ\¼U¨€­4f+BÄ·]ú*X(nù9ïEM¼:Ù«Ÿ`Ãb¼ÔÓµÞ›&îB¨!–Âß«7ß«z)*àûƒÄ D°7cÑè)‹y?3ûY§¦Þ—šŠû•#7Š·ù­_#ŒPhd½ÞÅÁ¢µÿr¥Oá‚#ûá<×Ï”N1„Sú»>áæGÃä*‹!_I!Ü òì–Î&B€`q*ª`‘#sÙ2£?¾Q ahõ6´BÆÔ,–­•BçBÀÕ ‰ÕÓ]Ƙòl•caçgf­ÃµÇ;Û|—?60ZCQ—ÖOÜûxx=/VC];:äÞǃP°c­ž…¢.â¦y,î}<;2Q¤Ù…º²þá3.Ÿ£ëÞÇ㨩ø&ù²Dü3«æydÇ=CE 5E¨xÃ@Q4þåå‰{ëÔO T¼[ ˆ! 1„SkíB÷;ÆþwŒ}::>öbZïüïòþ5¡`›¡¯çœ(ocßaâ"PŒë‚äé»Ï‹’•…œù11„Ó¸]?ÎA1¬y<<›UJáY¥x$=€P€PÉÚ%¸½N`Š5áåe¸?cLy´À‘VµÌA"ŠÈ£'€'}¦ýäiž‹Æwf}¼@¨õí~óòD8T¯ ÀáÆVjA„ `Ÿá¢Ù€¾¥#Cþ÷ƒBpGÀ¸øPù8ÂÒÞƒztFšY(KÀ„¥ÁàJ`(  T\‹¹"\º€ãøáR3µFbÅIEND®B`‚ocr-1.2.2/doc/src/images/overview.fig0000644000175000017500000000352611716171512020245 0ustar dalitzdalitz00000000000000#FIG 3.2 Produced by xfig version 3.2.5 Landscape Center Inches Letter 100.00 Single -2 1200 2 6 4650 3300 5850 4500 5 1 0 2 20 7 50 -1 -1 0.000 0 1 0 0 5250.000 3364.286 4800 4275 5235 4380 5700 4275 1 2 0 2 20 7 50 -1 -1 0.000 1 0.0000 5250 3525 450 150 4800 3375 5700 3675 2 1 0 2 20 7 50 -1 -1 0.000 0 0 -1 0 0 2 4800 3525 4800 4275 2 1 0 2 20 7 50 -1 -1 0.000 0 0 -1 0 0 2 5700 3525 5700 4275 -6 2 2 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 975 1500 1725 1500 1725 2475 975 2475 975 1500 2 2 0 2 0 7 40 -1 20 0.000 0 0 -1 0 0 5 1125 1575 1875 1575 1875 2550 1125 2550 1125 1575 2 2 0 2 0 7 30 -1 20 0.000 0 0 -1 0 0 5 1275 1650 2025 1650 2025 2625 1275 2625 1275 1650 2 2 0 2 20 7 10 -1 20 0.000 0 0 -1 0 0 5 1575 1800 2325 1800 2325 2775 1575 2775 1575 1800 2 2 0 2 20 7 20 -1 20 0.000 0 0 -1 0 0 5 1425 1725 2175 1725 2175 2700 1425 2700 1425 1725 2 1 0 2 20 7 50 -1 -1 0.000 0 0 -1 1 0 3 1 1 2.00 120.00 240.00 1800 3000 1800 3900 4500 3900 2 2 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 7875 1650 8625 1650 8625 2625 7875 2625 7875 1650 2 1 0 2 -1 7 50 -1 -1 0.000 0 0 -1 1 0 2 1 1 2.00 120.00 240.00 2850 2175 7650 2175 2 1 0 2 -1 7 50 -1 -1 0.000 0 0 -1 1 0 2 1 1 2.00 120.00 240.00 5175 3150 5175 2400 2 2 0 2 0 7 40 -1 20 0.000 0 0 -1 0 0 5 8025 1725 8775 1725 8775 2700 8025 2700 8025 1725 2 2 0 2 0 7 30 -1 20 0.000 0 0 -1 0 0 5 8175 1800 8925 1800 8925 2775 8175 2775 8175 1800 4 0 0 50 -1 0 20 0.0000 4 300 915 1275 1200 Images\001 4 0 20 50 -1 0 20 0.0000 4 225 645 6000 4200 Data\001 4 0 20 50 -1 0 20 0.0000 4 300 1140 5850 3900 Training\001 4 0 -1 50 -1 0 20 0.0000 4 225 960 7800 1050 Text as\001 4 0 -1 50 -1 0 20 0.0000 4 225 1065 7800 1380 Unicode\001 4 0 20 50 -1 1 20 0.0000 4 315 1080 2325 3750 Training\001 4 0 0 50 -1 0 20 0.0000 4 225 1350 1125 900 Document\001 4 0 -1 50 -1 1 20 0.0000 4 315 1800 4575 1950 Classification\001 ocr-1.2.2/doc/src/ocr.txt0000644000175000017500000000353711716171512015771 0ustar dalitzdalitz00000000000000 OCR === segmentation ------------ ``bbox_mcmill`` ``````````````` [object] **bbox_mcmill** ([object *glyphs*], float *section_search_size* = 1.00, float *noise_mltplk* = 1.00, float *large_mltplk* = 20.00, float *stdev_mltplk* = 5.00) :Operates on: ``Image`` [OneBit] :Returns: [object] :Category: OCR/segmentation :Defined in: bbox_merging_mcmillan.py :Author: Robert Butz, Karl MacMillan Returns the textlines from image as connected components. The segmentation method is adapted from McMillan's segmentation method in roman_text.py. It allows a more individual segmentation through parameterization. Options: *glyphs*: This list can be build out of a ``cc_analysis``. On default, this parameter is blank, which will cause the function to call ``cc_analysis`` itself. *section_search_size* This optional parameter adjusts the calculated avg_glyph_size by multipling its value (default=1). *noise_mltplk* With this optional parameter one can adjust the noise_recognition rate independently from the calculated avg_glyph_size (default = 1). Values greater than 1 let the noise_removal detect bigger noise (but maybe even glyphs). Chose smaller values to avoid assigning small glyphs to noise. *large_mltplk* Analog to noise_mltplk one can set this parameter to manipulate the recognition of very large ccs according to the avg_glyph_size (default=20). Higher values lead to a better acceptance of above-average ccs. Beneficial, for example for big capital initials at the beginning of paragraphs such as seen in bibles. *stdev_mltplk* This parameter affects the line finding algorithm by excluding abnormally tall glyphs (default=5). The standard deviation will be calculated and multiplied by this parameter. ocr-1.2.2/CHANGES0000644000175000017500000000536112433405246014102 0ustar dalitzdalitz00000000000000Changelog of the OCR Toolkit for Gamera ======================================= Version 1.2.2, Nov 20, 2014 --------------------------- - handled hOCR bounding box data outside the image boundaries more gracefully Version 1.2.1, Oct 27, 2014 --------------------------- - fixed a possible exception when reading hOCR files (thanks to Georg Drees) Version 1.2.0, Sep 01, 2014 --------------------------- - hOCR support (thanks to Fabian Schmitt): o new option -hi for reading text line segmentation from hOCR file o new option -ho for writing hOCR output Version 1.1.0, Jun 26, 2014 --------------------------- - significant speed up of recognition process - can now batch process several files in combination with new option -od (--output_directory) to avoid loading the training data for each image file Version 1.0.6, Feb 14, 2012 --------------------------- - fixed some minor errors reported by Jakub Wilk - extrachars file can contain arbitrary unicode texts - heuristic rule for disambiguating roman letters somewhat improved Version 1.0.5, Sep 19, 2011 --------------------------- - FSF address corrected Version 1.0.4, Jul 27, 2010 --------------------------- - grouping of quotation marks somewhat corrected (should this be removed some day?) - more heuristic rules for disambiguation of roman characters - heuristic rules are now choosable in ocr4gamera.py with option "-R" to switch them off, use "-R none" Version 1.0.3, Jun 15, 2010 --------------------------- - Option "-o" now works again Version 1.0.2, Jun 08, 2010 --------------------------- - changes in recognition script "ocr4gamera.py": o renamed from "ocr4gamera" to "ocr4gamera.py" (for Windows compatibility) o changes in dictionary correction + language choosable with option "-L" + edit distance threshold choosable with option "-e" + when aspell not found, ispell is tried o additional translation table class_name -> character can be provided in a CSV file with option "-c" o option "-i" removed; replaced with verbosity level in option "-v" o option "-a" for grouping of broken characters now works - new option extra_chars_dict in functions return_char() and textline_to_string() - new plugin bbox_merging_mcmillan in analogy to roman_text - plugin bbox_seg removed (now in the Gamera core) - replaced the non standard module "pexpect" with "popen" Version 1.0.1, Feb 25, 2010 --------------------------- - 'ocr4gamera -a' now uses the provided training file - ocr4gamera no behaves properly when no image file given - GPL copyright information added to all source file; documentation licensed under CC-BY Version 1.0.0, Oct 16, 2009 --------------------------- - first creation ocr-1.2.2/PKG-INFO0000664000175000017500000000047012433405322014175 0ustar dalitzdalitz00000000000000Metadata-Version: 1.0 Name: ocr Version: 1.2.2 Summary: An addon OCR toolkit for the Gamera framework for document analysis and recognition. Home-page: http://gamera.sourceforge.net/ Author: Rene Baston and Christoph Dalitz Author-email: UNKNOWN License: GNU GPL version 2 Description: UNKNOWN Platform: UNKNOWN ocr-1.2.2/MANIFEST.in0000644000175000017500000000072511716171512014643 0ustar dalitzdalitz00000000000000recursive-include src *.cpp *.c *.h makefile.* *.hpp *.hxx *.cxx *.txt ANNOUNCE CHANGES INSTALL KNOWN_BUGS LICENSE README TODO recursive-include include *.cpp *.c *.h makefile.* *.hpp *.hxx *.cxx *.txt ANNOUNCE CHANGES INSTALL KNOWN_BUGS LICENSE README TODO recursive-include scripts ocr4gamera include ACKNOWLEDGEMENTS CHANGES TODO INSTALL LICENSE README KNOWN_BUGS MANIFEST.in version recursive-include doc *.txt *.html *.css *.py *.jpg *.jpeg *.png *.gif *.fig ocr-1.2.2/setup.py0000644000175000017500000000277411716171512014625 0ustar dalitzdalitz00000000000000#!/usr/bin/env python from distutils.core import setup, Extension from gamera import gamera_setup # Some meta data of the toolkit TOOLKIT_NAME = "ocr" VERSION = open("version", 'r').readlines()[0].strip() AUTHOR = "Rene Baston and Christoph Dalitz" HOMEPAGE = "http://gamera.sourceforge.net/" DESCRIPTION = "An addon OCR toolkit for the Gamera framework for document analysis and recognition." LICENSE = "GNU GPL version 2" # ---------------------------------------------------------------------------- # You should not usually have to edit anything below, but it is # implemented here and not in the Gamera core so that you can edit it # if you need to do something more complicated (for example, building # and linking to a third- party library). # ---------------------------------------------------------------------------- PLUGIN_PATH = 'gamera/toolkits/%s/plugins/' % TOOLKIT_NAME PACKAGE = 'gamera.toolkits.%s' % TOOLKIT_NAME PLUGIN_PACKAGE = PACKAGE + ".plugins" plugins = gamera_setup.get_plugin_filenames(PLUGIN_PATH) plugin_extensions = gamera_setup.generate_plugins(plugins, PLUGIN_PACKAGE) # This is a standard distutils setup initializer. If you need to do # anything more complex here, refer to the Python distutils documentation. setup(name=TOOLKIT_NAME, version=VERSION, license=LICENSE, url=HOMEPAGE, author=AUTHOR, description=DESCRIPTION, ext_modules = plugin_extensions, packages = [PACKAGE, PLUGIN_PACKAGE], scripts = ['scripts/ocr4gamera.py']) ocr-1.2.2/gamera/0000775000175000017500000000000012433405322014333 5ustar dalitzdalitz00000000000000ocr-1.2.2/gamera/toolkits/0000775000175000017500000000000012433405322016203 5ustar dalitzdalitz00000000000000ocr-1.2.2/gamera/toolkits/ocr/0000775000175000017500000000000012433405322016766 5ustar dalitzdalitz00000000000000ocr-1.2.2/gamera/toolkits/ocr/__init__.py0000644000175000017500000000201111716411750021074 0ustar dalitzdalitz00000000000000""" Toolkit setup This file is run on importing anything within this directory. Its purpose is only to help with the Gamera GUI shell, and may be omitted if you are not concerned with that. """ from gamera import toolkit import plugins #import wx # You can inherit from toolkit.CustomMenu to create a menu # for your toolkit. Create a list of menu option in the # member _items, and a series of callback functions that # correspond to them. The name of the callback function # should be the same as the menu item, prefixed by '_On' # and with all spaces converted to underscores. # class OcrMenu(toolkit.CustomMenu): # _items = ["Ocr Toolkit", # "Ocr Toolkit 2"] # def _OnOcr_Toolkit(self, event): # wx.MessageDialog(None, "You clicked on Ocr Toolkit!").ShowModal() # main.main() # def _OnOcr_Toolkit_2(self, event): # wx.MessageDialog(None, "You clicked on Ocr Toolkit 2!").ShowModal() # main.main() # ocr_menu = OcrMenu() ocr-1.2.2/gamera/toolkits/ocr/plugins/0000775000175000017500000000000012433405322020447 5ustar dalitzdalitz00000000000000ocr-1.2.2/gamera/toolkits/ocr/plugins/bbox_merging_mcmillan.py0000644000175000017500000002377011716416536025362 0ustar dalitzdalitz00000000000000# # Copyright (C) 2010 Robert Butz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # from gamera.plugin import * from gamera.args import NoneDefault class bbox_mcmillan(PluginFunction): # overwrite find_tall_glyphs to adjust deviation """Returns the textlines in an image as connected components. The segmentation method is adapted from McMillan's segmentation method in roman_text.py. It allows a more individual segmentation through parameterization. Options: *glyphs*: This list can be build out of a ``cc_analysis``. On default, this parameter is blank, which will cause the function to call ``cc_analysis`` itself. *section_search_size* This optional parameter adjusts the calculated avg_glyph_size by multipling its value (default=1). *noise_mltplk* With this optional parameter one can adjust the noise_recognition rate independently from the calculated avg_glyph_size (default = 1). Values greater than 1 let the noise_removal detect bigger noise (but maybe even glyphs). Chose smaller values to avoid assigning small glyphs to noise. *large_mltplk* Analog to noise_mltplk one can set this parameter to manipulate the recognition of very large ccs according to the avg_glyph_size (default=20). Higher values lead to a better acceptance of above-average ccs. Beneficial, for example for big capital initials at the beginning of paragraphs such as seen in bibles. *stdev_mltplk* This parameter affects the line finding algorithm by excluding abnormally tall glyphs (default=5). The standard deviation will be calculated and multiplied by this parameter. """ pure_python = True category="PageSegmentation" self_type = ImageType([ONEBIT]) args = Args([ImageList("glyphs", default=NoneDefault), Float("section_search_size", default=1.0), Float("noise_mltplk", default=1.0), Float("large_mltplk", default=20.0), Float("stdev_mltplk", default=5.0)]) return_type = ImageList("line_cc_list") author = "Robert Butz, Karl MacMillan" def __call__(self, glyphs=None, section_search_size=1, noise_mltplk=1, large_mltplk=20, stdev_mltplk=5): from gamera import core from gamera.roman_text import Section as Roman_Section #from gamera.plugins.image_utilities import union_images def find_sections(image, glyphs, section_search_size=1, noise_mltplk=1, large_mltplk=20, stdev_mltplk=5): """Find the sections within an image - this finds large blocks of text making it possible to find the lines within complex text layouts.""" FUDGE = __avg_glyph_size(glyphs) * section_search_size # remove noise and large objects noise_size = FUDGE * noise_mltplk large_size = FUDGE * large_mltplk new_glyphs = [] for g in glyphs: if __section_size_test(image, g, noise_size, large_size): new_glyphs.append(g) # Sort the glyphs left-to-right and top-to-bottom new_glyphs.sort(lambda x, y: cmp(x.ul_x, y.ul_x)) new_glyphs.sort(lambda x, y: cmp(x.ul_y, y.ul_y)) # Create rectangles for each glyph that are bigger by FUDGE big_rects = [] for g in new_glyphs: ul_y = max(0, g.ul_y - FUDGE) ul_x = max(0, g.ul_x - FUDGE) lr_y = min(image.lr_y, g.lr_y + FUDGE) lr_x = min(image.lr_x, g.lr_x + FUDGE) ul_x = int(ul_x); ul_y = int(ul_y) nrows = int(lr_y - ul_y + 1) ncols = int(lr_x - ul_x + 1) big_rects.append(core.Rect(core.Point(ul_x, ul_y), core.Dim(ncols, nrows))) # Search for intersecting glyphs and merge them. This is # harder than it seems at first because we want everything # to merge together that intersects regardless of the order # in the list. It ends up being similar to connected-component # labeling. This is prone to be kind-of slow. current = 0 rects = big_rects while(1): # Find the indexes of any rects that interesect with current inter = __find_intersecting_rects(rects, current) # If we found intersecting rectangles merge them with them current # rect, remove them from the list, and start the whole process # over. We start over to make certain that everything that should # be merged is. if len(inter): g = rects[current] new_rects = [g] for i in range(len(rects)): if i == current: continue if i in inter: g.union(rects[i]) else: new_rects.append(rects[i]) rects = new_rects current = 0 # If we didn't find anything that intersected move on to the next # rectangle. else: current += 1 # Bail when we are done. if current >= len(rects): break # Create the sections sections = [] for rect in rects: sections.append(Section(rect, stdev_mltplk)) # Place the original (small) glyphs into the sections for g in glyphs: if __section_size_test(image, g, noise_size, large_size): for s in sections: if s.bbox.intersects(g): s.add_glyph(g) break # Fix up the bounding boxes for s in sections: s.calculate_bbox() return sections def __avg_glyph_size(glyphs): """Compute the average glyph size for the page""" total = 0.0 for g in glyphs: total += g.nrows total += g.ncols return total / (2 * len(glyphs)) def __section_size_test(image, glyph, noise_size, large_size): """Filter for section finding - removes very small and very large glyphs""" black_area = glyph.black_area()[0] if black_area > noise_size and \ glyph.nrows < large_size and \ glyph.ncols < large_size: return 1 else: return 0 def __find_intersecting_rects(glyphs, index): """For section finding - return the index of glyphs intersecting the glyph and the index passed in.""" g = glyphs[index] inter = [] for i in range(len(glyphs)): if i == index: continue if g.intersects(glyphs[i]): inter.append(i) return inter # overwrite find_tall_glyphs to adjust deviation class Section(Roman_Section): def __init__(self, bbox, stdev_mltplk=5): self.bbox = core.Rect(bbox) self.lines = [] self.glyphs = [] # stats self.avg_glyph_area = 0 self.avg_glyph_height = 0 self.avg_glyph_width = 0 self.avg_line_height = 0 self.agv_line_width = 0 self.stdev = 0 self.stdev_mltplk = stdev_mltplk def find_tall_glyphs(self): from gamera import stats if self.stdev == 0: self.stdev = stats.samplestdev([g.nrows for g in self.glyphs]) tall = [] for i in range(len(self.glyphs)): g = self.glyphs[i] if (g.nrows - self.avg_glyph_height) > self.stdev*self.stdev_mltplk: tall.append(i) return tall # this is the actual beginning of the __call__-method if glyphs == None: glyphs = self.cc_analysis() sections = find_sections(self, glyphs, section_search_size, noise_mltplk, large_mltplk, stdev_mltplk) for s in sections: s.find_lines() # create a Cc for each line lines = [] label = 1 for s in sections: for l in s.lines: if len(l.glyphs) == 0: continue # label the lines in input image label += 1 for g in l.glyphs: self.highlight(g, label) line_rect = l.glyphs[0].union_rects(l.glyphs) lines.append(core.Cc(self, label, line_rect)) return lines __call__ = staticmethod(__call__) class BboxModule(PluginModule): category = "OCR" functions = [bbox_mcmillan] author = "Robert Butz, Karl MacMillan" url = "http://gamera.sourceforge.net/" module = BboxModule() ocr-1.2.2/gamera/toolkits/ocr/plugins/__init__.py0000644000175000017500000000003611716171512022561 0ustar dalitzdalitz00000000000000import bbox_merging_mcmillan ocr-1.2.2/gamera/toolkits/ocr/ocr_toolkit.py0000644000175000017500000003632312372413374021705 0ustar dalitzdalitz00000000000000# # Copyright (C) 2009-2010 Rene Baston, Christoph Dalitz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # from gamera.core import * init_gamera() from gamera import knn from gamera.plugins import pagesegmentation from gamera.classify import ShapedGroupingFunction from gamera.plugins.image_utilities import union_images from gamera.plugins.listutilities import median from gamera.toolkits.ocr.classes import Textline import unicodedata import sys import time def return_char(unicode_str, extra_chars_dict={}): """Converts a unicode character name to a unicode symbol. Signature: ``return_char (classname, extra_chars_dict={})`` with *classname*: A class name derived from a unicode character name. Example: ``latin.small.letter.a`` returns the character ``a``. *extra_chars_dict* A dictionary of additional translations of classnames to character codes. This is necessary when you use class names that are not unicode names. The character 'code' does not need to be an actual code, but can be any string. This can be useful, e.g. for ligatures: .. code:: Python return_char(glyph.get_main_id(), {'latin.small.ligature.st':'st'}) When *classname* is not listed in *extra_chars_dict*, it must correspond to a `standard unicode character name`_, as in the examples of the following table: .. _`standard unicode character name`: http://www.unicode.org/charts/ +-----------+----------------------------+----------------------------+ | Character | Unicode Name | Class Name | +===========+============================+============================+ | ``!`` | ``EXCLAMATION MARK`` | ``exclamation.mark`` | +-----------+----------------------------+----------------------------+ | ``2`` | ``DIGIT TWO`` | ``digit.two`` | +-----------+----------------------------+----------------------------+ | ``A`` | ``LATIN CAPITAL LETTER A`` | ``latin.capital.letter.a`` | +-----------+----------------------------+----------------------------+ | ``a`` | ``LATIN SMALL LETTER A`` | ``latin.small.letter.a`` | +-----------+----------------------------+----------------------------+ """ if len(extra_chars_dict) > 0: try: return extra_chars_dict[unicode_str] except: pass name = unicode_str.upper() # some xml-files might be corrupted due to wrong grouping if name.startswith('_GROUP.'): name = name[len('_GROUP.'):] if name.startswith('_PART.'): name = name[len('_PART.'):] name = name.replace(".", " ") try: return unicodedata.lookup(name) except KeyError: strings = unicode_str.split(".") if(strings[0] == "collated"): return strings[1] if(strings[0] == "cursive"): return return_char(unicode_str[8:]) else: print "ERROR: Name not found:", name return "" def chars_make_words(lines_glyphs,threshold=None): """Groups the given glyphs to words based upon the horizontal distance between adjacent glyphs. Signature: ``chars_make_words (glyphs, threshold=None)`` with *glyphs*: A list of ``Cc`` data types, each of which representing a character. All glyphs must stem from the same single line of text. *threshold*: Horizontal white space greater than *threshold* will be considered a word separating gap. When ``None``, the threshold value is calculated automatically as 2.5 times teh median white space between adjacent glyphs. The result is a nested list of glyphs with each sublist representing a word. This is the same data structure as used in `Textline.words`_ .. _`Textline.words`: gamera.toolkits.ocr.classes.Textline.html """ glyphs = lines_glyphs[:] wordlist = [] if(threshold == None): spacelist = [] total_space = 0 for i in range(len(glyphs) - 1): spacelist.append(glyphs[i + 1].ul_x - glyphs[i].lr_x) if(len(spacelist) > 0): threshold = median(spacelist) threshold = threshold * 2.5 else: threshold = 0 word = [] for i in range(len(glyphs)): if i > 0: if((glyphs[i].ul_x - glyphs[i - 1].lr_x) > threshold): wordlist.append(word) word = [] word.append(glyphs[i]) if(len(word) > 0): wordlist.append(word) return wordlist def __char_touches_top(glyph, line): """Returns true when the top of the character is close to the top of the line.""" #if glyph.ul_y < line.bbox.center_y-(line.bbox.nrows/4): if glyph.ul_y <= line.bbox.ul_y+(line.bbox.nrows/5): return True else: return False def textline_to_string(line, heuristic_rules="roman", extra_chars_dict={}): """Returns a unicode string of the text in the given ``Textline``. Signature: ``textline_to_string (textline, heuristic_rules="roman", extra_chars_dict={})`` with *textline*: A ``Textline`` object containing the glyphs. The glyphs must already be classified. *heuristic_rules*: Depending on the alphabeth, some characters can very similar and need further heuristic rules for disambiguation, like apostroph and comma, which have the same shape and only differ in their position relative to the baseline. When set to \"roman\", several rules specific for latin alphabeths are applied. *extra_chars_dict* A dictionary of additional translations of classnames to character codes. This is necessary when you use class names that are not unicode names. Will be passed to `return_char`_. As this function uses `return_char`_, the class names of the glyphs in *textline* must corerspond to unicode character names, as described in the documentation of `return_char`_. .. _`return_char`: #return-char """ wordlist = line.words s = "" char = "" for i in range(len(wordlist)): if(i): s = s + " " for glyph in wordlist[i]: char = return_char(glyph.get_main_id(), extra_chars_dict) if (heuristic_rules == "roman"): # disambiguation of similar roman characters if (char == "x" or char == "X"): if __char_touches_top(glyph, line): glyph.classify_heuristic("latin.capital.letter.x") else: glyph.classify_heuristic("latin.small.letter.x") char = return_char(glyph.get_main_id()) if (char == "p" or char == "P"): if __char_touches_top(glyph, line): glyph.classify_heuristic("latin.capital.letter.p") else: glyph.classify_heuristic("latin.small.letter.p") char = return_char(glyph.get_main_id()) if (char == "o" or char == "O"): if __char_touches_top(glyph, line): glyph.classify_heuristic("latin.capital.letter.o") else: glyph.classify_heuristic("latin.small.letter.o") char = return_char(glyph.get_main_id()) if (char == "w" or char == "W"): if __char_touches_top(glyph, line): glyph.classify_heuristic("latin.capital.letter.w") else: glyph.classify_heuristic("latin.small.letter.w") char = return_char(glyph.get_main_id()) if (char == "v" or char == "V"): if __char_touches_top(glyph, line): glyph.classify_heuristic("latin.capital.letter.v") else: glyph.classify_heuristic("latin.small.letter.v") char = return_char(glyph.get_main_id()) if (char == "z" or char == "Z"): if __char_touches_top(glyph, line): glyph.classify_heuristic("latin.capital.letter.z") else: glyph.classify_heuristic("latin.small.letter.z") char = return_char(glyph.get_main_id()) if (char == "s" or char == "S"): # not for long s if (glyph.get_main_id().upper() != "LATIN.SMALL.LETTER.LONG.S"): if __char_touches_top(glyph, line): glyph.classify_heuristic("latin.capital.letter.s") else: glyph.classify_heuristic("latin.small.letter.s") char = return_char(glyph.get_main_id()) #if(char == "T" and (float(glyph.nrows)/float(glyph.ncols)) > 1.5): # glyph.classify_heuristic("LATIN SMALL LETTER F") # char = return_char(glyph.get_main_id()) if (char == "'" or char == ","): if (glyph.ul_y < line.bbox.center_y): glyph.classify_heuristic("APOSTROPHE") char = "'" else: glyph.classify_heuristic("COMMA") char = "," s = s + char return s def check_upper_neighbors(item,glyph,line): """Check for small signs grouped beside each other like quotation marks. Signature: ``check_upper_neighbors(item,glyph,line)`` with *item*: Some connected-component. *glyph*: Some connected-component. *line*: The ``Textline`` Object which includes ``item`` and ``glyph`` Returns an array with two elements. The first element keeps a list of characters (images that has been united to a single image) and the second image is a list of characters which has to be removed as these have been united to a single character. """ remove = [] add = [] result = [] minheight = min([item.nrows,glyph.nrows]) # glyphs must be small, of similar size and on the same height if(not(glyph.lr_y >= line.center_y and glyph.lr_y-(glyph.nrows/3) <= line.lr_y)): if (glyph.contains_y(item.center_y) and item.contains_y(glyph.center_y)): minwidth = min([item.ncols,glyph.ncols]) distance = item.lr_x - glyph.lr_x if(distance > 0 and distance <= minwidth*3): remove.append(item) remove.append(glyph) new = union_images([item,glyph]) add.append(new) result.append(add) #result[0] == ADD result.append(remove) #result[1] == REMOVE return result def check_glyph_accent(item,glyph): """Check two glyphs for beeing grouped to one single character. This function is for unit connected-components like i, j or colon. Signature: ``check_glyph_accent(item,glyph)`` with *item*: Some connected-component. *glyph*: Some connected-component. There is returned an array with two elements. The first element keeps a list of characters (images that has been united to a single image) and the second image is a list of characters which has to be removed as these have been united to a single character. """ remove = [] add = [] result = [] if(glyph.contains_x(item.ul_x) or glyph.contains_x(item.lr_x) or glyph.contains_x(item.center_x)): ##nebeinander? if(not(item.contains_y(glyph.ul_y) or item.contains_y(glyph.lr_y) or item.contains_y(glyph.center_y))): ##nicht y-dimensions ueberschneident remove.append(item) remove.append(glyph) new = union_images([item,glyph]) add.append(new) result.append(add) #result[0] == ADD result.append(remove) #result[1] == REMOVE return result def get_line_glyphs(image,textlines): """Splits image regions representing text lines into characters. Signature: ``get_line_glyphs (image, segments)`` with *image*: The document image that is to be further segmentated. It must contin the same underlying image data as the second argument *segments* *segments*: A list ``Cc`` data types, each of which represents a text line region. The image views must correspond to *image*, i.e. each pixels has a value that is the unique label of the text line it belongs to. This is the interface used by the plugins in the \"PageSegmentation\" section of the Gamera core. The result is returned as a list of Textline_ objects. .. _Textline: gamera.toolkits.ocr.classes.Textline.html """ i=0 show = [] lines = [] ret,sub_ccs = image.sub_cc_analysis(textlines) for ccs in sub_ccs: line_bbox = Rect(textlines[i]) i = i + 1 glyphs = ccs[:] newlist = [] remove = [] add = [] result = [] glyphs.sort(lambda x,y: cmp(x.ul_x, y.ul_x)) for position, item in enumerate(glyphs): if(True): #if(not(glyph.lr_y >= line_bbox.center_y and glyph.lr_y-(glyph.nrows/3) <= line_bbox.lr_y)): ## is this part of glyph higher then line.center_y ? left = position - 2 if(left < 0): left = 0 right = position + 2 if(right > len(glyphs)): right = len(glyphs) checklist = glyphs[left:right] for glyph in checklist: if (item == glyph): continue result = check_upper_neighbors(glyph,item,line_bbox) if(len(result[0]) > 0): #something has been joind... joind_upper_connection = result[0][0] #joind glyph add.append(joind_upper_connection) remove.append(result[1][0]) #first part of joind one remove.append(result[1][1]) #second part of joind one for glyph2 in checklist: #maybe the upper joind glyphs fits to a glyph below... if(glyphs == joind_upper_connection): continue if(joind_upper_connection.contains_x(glyph2.center_x)): #fits for example on ae, oe, ue in german alph new = union_images([glyph2,joind_upper_connection]) add.append(new) remove.append(glyph2) add.remove(joind_upper_connection) break for elem in remove: if (elem in checklist): checklist.remove(elem) for glyph in checklist: if(item == glyph): continue result = check_glyph_accent(item,glyph) if(len(result[0]) > 0): #something has been joind... add.append(result[0][0]) #joind glyph remove.append(result[1][0]) #first part of joind one remove.append(result[1][1]) #second part of joind one for elem in remove: if(elem in glyphs): glyphs.remove(elem) for elem in add: glyphs.append(elem) new_line = Textline(line_bbox) final = [] if(len(glyphs) > 0): for glyph in glyphs: final.append(glyph) new_line.add_glyphs(final,False) new_line.sort_glyphs() #reading order -- from left to right lines.append(new_line) for glyph in glyphs: show.append(glyph) return lines def show_bboxes(image,glyphs): """Returns an RGB image with bounding boxes of the given glyphs as hollow rects. Useful for visualization and debugging of a segmentation. Signature: ``show_bboxes (image, glyphs)`` with: *image*: An image of the textdokument which has to be segmentated. *glyphs*: List of rects which will be drawn on ``image`` as hollow rects. As all image types are derived from ``Rect``, any image list can be passed. """ rgb = image.to_rgb() if(len(glyphs) > 0): for glyph in glyphs: rgb.draw_hollow_rect(glyph, RGBPixel(255,0,0), 1.0) return rgb ocr-1.2.2/gamera/toolkits/ocr/classes.py0000644000175000017500000003556312433404440021007 0ustar dalitzdalitz00000000000000# # Copyright (C) 2009-2010 Rene Baston, Christoph Dalitz # 2014 Fabian Schmitt, Christoph Dalitz # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # from gamera.core import * init_gamera() from gamera.plugins import pagesegmentation from gamera.plugins.listutilities import median class Textline: ######################################################################### """The ``Textline`` object stores information about a text line in its following properties: **bbox** A ``Rect`` object representing the bounding box of the text line. **glyphs** A list of ``Cc`` objects, each representing a character in the line. **words** A nested list of ``Cc`` objects, where each sublist represents the characters of a single word. """ bbox = [] glyphs = [] words = [] text = "" ####################################################################### # constructor # def __init__(self,bbox,glyphs = None): """Signature: ``init (bbox, glyphs=None)`` with *bbox*: ``Rect`` object representing position and size of the text line *glyphs*: A list of ``Cc`` objects representing the characters in the text line """ self.bbox = Rect(bbox) if(glyphs == None): self.glyphs = [] else: self.glyphs = glyphs self.text = "" def add_glyph(self,glyph,extend=True): """Adds the given *glyph* to the Textline. Signature: ``add_glyph (glyph, extend=True)`` When *extend* is ``True``, the text line bounding box *bbox* is extended by the glyph's bounding box. """ self.glyphs.append(glyph) if (extend): self.bbox.union(glyph) def add_glyphs(self,glyphs,extend=True): """Adds the given *glyphs* to the Textline. Signature: ``add_glyphs (glyphs, extend=True)`` When *extend* is ``True``, the text line bounding box *bbox* is extended by the union of the glyphs' bounding boxes. """ for glyph in glyphs: self.glyphs.append(glyph) if (extend): self.bbox.union(glyph) def sort_glyphs(self): """Sorts the characters in *Textline.glyphs* from left to right. """ self.glyphs.sort(lambda x,y: cmp(x.ul_x, y.ul_x)) class ClassifyCCs: ############################################################################### """This is a callable class that can optionally be passed to the constructor of Page_, so that it will be called during the segmentation process. .. _Page: gamera.toolkits.ocr.classes.Page.html Its standard definition should generally be sufficient for using a kNN classifier. Should you need to write your own classification function (e.g. one that additionally uses heuristic rules for classification), make sure that you overwrite the `__call__`_ method with the same signature. For fine tuning the classification, the follwoing attributes can be used: **knn** The knn classifier; this is passed in the constructor **parts_to_group** Corresponds to *max_parts_per_group* in *kNNInteractive.group_list_automatic*. Default value is 3. **grouping_distance** Corresponds to the *distance* argument of the *grouping_function* in *kNNInteractive.group_list_automatic*. Only CCs closer than this distance are considered for grouping. Default value is -1, which means that it will be calculated automatically as in `__call__`__. .. __: #call """ ############################################################################# # constructor # def __init__(self,knn): """Signature: ``__init__ (knn)`` where *knn* is a kNN classifier which has already loaded training data. """ self.knn = knn self.grouping_distance = -1 self.parts_to_group = 3 def __call__(self,ccs): """This method will be called in `Page.segment`_. Signature: .. _`Page.segment`: gamera.toolkits.ocr.classes.Page.html#segment ``__call__ (ccs)`` where *ccs* is the list of glyphs that is to be classified. See the documentation of Gamera's classifier API how the classification result is stored in the glpyhs. How the classification is done is controled by the following attributes of ``ClassifyCCs``: - When *parts_to_group* > 1, the classification is done with Gamera's grouping algorithm; otherwise no grouping of broken characters is done. - In case of grouping, the property *distance* is passed to the grouping function. When it is -1 (default), it is set to the median height of the *ccs*. """ from gamera.classify import ShapedGroupingFunction, BoundingBoxGroupingFunction distance = self.grouping_distance if (self.parts_to_group > 1 and distance < 0): distance = int(median([c.nrows for c in ccs])) if (self.parts_to_group > 1): ccs = self.knn.group_and_update_list_automatic(ccs,grouping_function=ShapedGroupingFunction(distance),max_parts_per_group=self.parts_to_group) #ccs = self.knn.group_and_update_list_automatic(ccs,grouping_function=BoundingBoxGroupingFunction(distance),max_parts_per_group=self.parts_to_group) else: ccs = self.knn.classify_and_update_list_automatic(ccs) return ccs class Page: ##################################################################################### """The ``Page`` object offers the page segmentation functionality by providing a ``segment`` method. See `its documentation`__ for more information on how to overwrite specific steps of the segmentation process. .. __: #segment After the call of ``segment``, the segmentation results are stored in the following attributes of ``Page``: **textlines** List of Textline_ objects representing all text lines **img** The image to which Ccs in the *textlines* refer. .. _Textline: gamera.toolkits.ocr.classes.Textline.html """ ccs_glyphs = [] ccs_lines = [] textlines = [] img = None classify_ccs = None #################################################################################### # constructor # def __init__(self, image, glyphs=None, classify_ccs=None): """The only required argument in the constructor is the image that is to be segmented. Note that the constructor does *not* do the segmentation; for this, you must call the segment__ method. .. __: #segment Signature: ``init (image, glyphs=None, classify_ccs=None)`` with *image*: The image to be segmented. *glyphs*: An optional list of connected components representing the characters in the image. In general, this is not needed, but it can be useful for bottom up methods starting from already detected characters (e.g. by Gamera's classification based character grouping. *classify_ccs*: A callable class with the same interface as ClassifyCCs_. If given, it will be called during the segmentation process, right after the splitting of lines to characters. .. _ClassifyCCs: gamera.toolkits.ocr.classes.ClassifyCCs.html """ self.img = image self.textlines = [] if (classify_ccs != None): self.classify_ccs = classify_ccs else: self.classify_ccs = None if (glyphs != None): self.ccs_glyphs = glyphs else: self.ccs_glyphs = [] def segment(self): """Segments *Page.img* and stores the result in *Page.textlines*. This method has no arguments. It calls the following methods in the given order: - page_to_lines_ for splitting the page into segments representing text lines - order_lines_ for sorting the lines into reading order - lines_to_chars_ for splitting all lines into characters - *Page.classify_ccs* when it is set, i.e., has been passed to the constructor (default is that it is not set) - chars_to_words_ for grouping the characters to words .. _page_to_lines: #page-to-lines .. _order_lines: #order-lines .. _lines_to_chars: #lines-to-chars .. _chars_to_words: #chars-to-words By overwriting one (or several) of the above functions, you can replace specific steps of the segmentation process with custom algorithms. """ self.page_to_lines() self.order_lines() self.lines_to_chars() if(self.classify_ccs != None): for line in self.textlines: line.glyphs = self.classify_ccs(line.glyphs) # grouping in classification may change glyph order line.sort_glyphs() self.chars_to_words() def page_to_lines(self): """Splits the image into segments representing text lines. This method has no arguments. The current implementation simply calls the *bbox_merging* plugin from the Gamera core with *Ey=0*, such that the page is not split into paragraphs, but into lines. The segmentation result is stored in the variable *Page.ccs_lines*, which is a list of the data type ``Cc``, i.e., with each segment (line) represented by a different label in the image. This is the interface used by all page segmentation plugins in the Gamera core. .. note:: When you overwrite this method, make sure that write the segmentation result to *self.ccs_lines*. This member variable will then be further processed by lines_to_chars_. .. _lines_to_chars: #lines-to-chars """ self.ccs_lines = self.img.bbox_merging(Ey=0) def order_lines(self): """Sorts the segments in *Page.ccs_lines* into reading order. This method has no arguments. The current implementation uses the plugin *textline_reading_order* from the Gamera core. """ from gamera.plugins.pagesegmentation import textline_reading_order self.ccs_lines = textline_reading_order(self.ccs_lines) def lines_to_chars(self, lines=None): """Splits text lines into characters. Signature: ``lines_to_chars (lines=None)`` *lines* must be a list of ``Cc`` data types, each of them representing a text line. When not given (default), *Page.ccs_lines* is used instead. The current implementation calls *get_line_glyphs* as defined in the module ocr_toolkit_. .. _ocr_toolkit: functions.html The result is stored in *Page.textlines*; the characters are stored for each textline in *Textline.glyphs*. """ from gamera.toolkits.ocr.ocr_toolkit import get_line_glyphs if(lines != None): seg_lines = lines else: seg_lines = self.ccs_lines self.textlines = get_line_glyphs(self.img, seg_lines) def chars_to_words(self, lines=None): """Groups the characters in each ``Textline`` from *Page.textlines* to words and stores the result for each ``Textline`` in the property *Textline.words*. This method has an optional but generally useless argument for the list of textlines. It is therefore usually called without arguments. The current implementation calls *chars_make_words* as defined in the module ocr_toolkit_. .. _ocr_toolkit: functions.html """ from gamera.toolkits.ocr.ocr_toolkit import chars_make_words if(lines != None): lines = lines else: lines = self.textlines for line in lines: line.words = chars_make_words(line.glyphs) def show_lines(self): """Returns an RGB image with all segmented text lines marked by hollow rects. Makes only sense after *page_to_lines* (or *segment*) has been called. """ from gamera.toolkits.ocr.ocr_toolkit import show_bboxes return show_bboxes(self.img, self.ccs_lines) def show_glyphs(self): """Returns an RGB image with all segmented/grouped characters marked by hollow rects. Makes only sense after *lines_to_chars* (or *segment*) has been called. """ glyphs = [] for line in self.textlines: if(len(line.glyphs) > 0): for glyph in line.glyphs: glyphs.append(glyph) from gamera.toolkits.ocr.ocr_toolkit import show_bboxes return show_bboxes(self.img, glyphs) def show_words(self): """Returns an RGB image with all grouped words marked by hollow rects. Makes only sense after *chars_to_words* (or *segment*) has been called.. """ words = [] for line in self.textlines: for word in line.words: words.append(word) final_bboxes = [] if(len(words) > 0): for word in words: cc = word[:1] word_bbox = Rect(cc[0]) for glyph in word[1:]: word_bbox.union(glyph) final_bboxes.append(word_bbox) from gamera.toolkits.ocr.ocr_toolkit import show_bboxes return show_bboxes(self.img, final_bboxes) class hocrPage(Page): """A class derived from Page__ that overrides the *page_to_lines* method. Instead of bbox_merging, *page_to_lines* reads the segmentation information from a hOCR file for textline detection. .. __: gamera.toolkits.ocr.classes.Page.html """ #Constructor ########################################################################### def __init__(self, image, hocr_in_path, glyphs=None, classify_ccs=None): """Like `Page.__init__`_, but with the additional obligatory argument *hocr_in_path* for the name of a hOCR file from which the textline segmentation is read. Note that the constructor does *not* do the segmentation; for this, you must call the *segment* method. Signature: ``init (image, hocr_in_path, glyphs=None, classify_ccs=None)`` .. _Page.__init__: gamera.toolkits.ocr.classes.Page.html#init """ self.hocr_path = hocr_in_path self.img = image self.textlines = [] if (classify_ccs != None): self.classify_ccs = classify_ccs else: self.classify_ccs = None if (glyphs != None): self.ccs_glyphs = glyphs else: self.ccs_glyphs = [] #extract the bbox information from the file ########################################################################### def bbox_from_hocr(self, hocr_path, maxrect): hocr = open(hocr_path,"r") maxy = maxrect.nrows - 1 maxx = maxrect.ncols - 1 bboxes = [] for l in hocr: pcl = l.split("class=",1) if len(pcl) > 1: cl = pcl[1].split("'",2)[1] if cl == "ocr_line": bbox_t = l.split("id=")[1].split("title=")[1].split('"',2)[1] val = bbox_t.split(" ") ul = Point(min([int(val[1]),maxx]), min([int(val[2]), maxy])) lr = Point(min([int(val[3]),maxx]), min([int(val[4].rstrip(';')),maxy])) bboxes.append(Rect(ul,lr)) return bboxes #create the textline ccs with the bbox information ########################################################################### def page_to_lines(self): bboxes = self.bbox_from_hocr(self.hocr_path, self.img) self.ccs_lines = [] for bbox in bboxes: self.ccs_lines.append(Cc(self.img, 1, bbox)) ocr-1.2.2/LICENSE0000644000175000017500000003542311716171512014115 0ustar dalitzdalitz00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS ocr-1.2.2/version0000644000175000017500000000000612433405257014510 0ustar dalitzdalitz000000000000001.2.2