From a92af7a968cc8df6ae8fe8f9432b4451a3d7ba9d Mon Sep 17 00:00:00 2001 From: Philipp Zumstein Date: Thu, 9 Mar 2017 21:44:08 +0100 Subject: [PATCH] Adjust reading order when there are no columns When there are no columns white separators and no black columns separators, then we determine the reading order by simply looking which line is above which other lines. --- ocrolib/psegutils.py | 4 ++-- ocropus-gpageseg | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ocrolib/psegutils.py b/ocrolib/psegutils.py index 77592353..0b6844e8 100644 --- a/ocrolib/psegutils.py +++ b/ocrolib/psegutils.py @@ -121,7 +121,7 @@ def extract_masked(image,linedesc,pad=5,expand=0): line = where(mask,line,amax(line)) return line -def reading_order(lines,highlight=None,debug=0): +def reading_order(lines,nocolumns=False,highlight=None,debug=0): """Given the list of lines (a list of 2D slices), computes the partial reading order. The output is a binary 2D array such that order[i,j] is true if line i comes before line j @@ -141,7 +141,7 @@ def separates(w,u,v): clf(); title("highlight"); imshow(binary); ginput(1,debug) for i,u in enumerate(lines): for j,v in enumerate(lines): - if x_overlaps(u,v): + if x_overlaps(u,v) or nocolumns: if above(u,v): order[i,j] = 1 else: diff --git a/ocropus-gpageseg b/ocropus-gpageseg index f5eedc6d..4a0036f2 100755 --- a/ocropus-gpageseg +++ b/ocropus-gpageseg @@ -388,7 +388,10 @@ def process1(job): if not args.quiet: print_info("finding reading order") lines = psegutils.compute_lines(segmentation,scale) - order = psegutils.reading_order([l.bounds for l in lines]) + if args.maxcolseps>0 or args.blackseps: + order = psegutils.reading_order([l.bounds for l in lines]) + else: + order = psegutils.reading_order([l.bounds for l in lines], nocolumns=True) lsort = psegutils.topsort(order) # renumber the labels so that they conform to the specs