Skip to content

Commit

Permalink
Additional cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
VikParuchuri committed Jan 8, 2025
1 parent bdc244e commit bf2f693
Show file tree
Hide file tree
Showing 21 changed files with 64 additions and 214 deletions.
6 changes: 1 addition & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Integration test
name: Unit tests

on: [push]

Expand All @@ -14,10 +14,6 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Install apt dependencies
run: |
sudo apt-get update
sudo apt-get install -y tesseract-ocr tesseract-ocr-eng
- name: Install python dependencies
run: |
pip install poetry
Expand Down
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from tests.conftest import layout_predictorfrom tests.conftest import detection_predictorfrom tests.conftest import recognition_predictor

# Surya

Surya is a document OCR toolkit that does:
Expand Down
4 changes: 2 additions & 2 deletions benchmark/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from benchmark.utils.metrics import precision_recall
from benchmark.utils.tesseract import tesseract_parallel
from surya.input.processing import open_pdf, get_page_images, convert_if_not_rgb
from surya.postprocessing.heatmap import draw_polys_on_image
from surya.postprocessing.util import rescale_bbox
from surya.debug.draw import draw_polys_on_image
from surya.common.util import rescale_bbox
from surya.settings import settings
from surya.detection import DetectionPredictor

Expand Down
2 changes: 1 addition & 1 deletion benchmark/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from benchmark.utils.metrics import precision_recall
from surya.layout import LayoutPredictor
from surya.input.processing import convert_if_not_rgb
from surya.postprocessing.heatmap import draw_bboxes_on_image
from surya.debug.draw import draw_bboxes_on_image
from surya.settings import settings
import os
import time
Expand Down
2 changes: 1 addition & 1 deletion benchmark/recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from benchmark.utils.scoring import overlap_score
from surya.input.processing import convert_if_not_rgb
from surya.postprocessing.text import draw_text_on_image
from surya.debug.text import draw_text_on_image
from surya.recognition import RecognitionPredictor
from surya.settings import settings
from surya.recognition.languages import CODE_TO_LANGUAGE
Expand Down
20 changes: 19 additions & 1 deletion benchmark/table_recognition.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import argparse
from PIL import ImageDraw
import collections
import json

from surya.debug.draw import draw_bboxes_on_image
from tabulate import tabulate

from surya.input.processing import convert_if_not_rgb
Expand All @@ -19,6 +21,7 @@ def main():
parser.add_argument("--results_dir", type=str, help="Path to JSON file with benchmark results.", default=os.path.join(settings.RESULT_DIR, "benchmark"))
parser.add_argument("--max", type=int, help="Maximum number of images to run benchmark on.", default=None)
parser.add_argument("--tatr", action="store_true", help="Run table transformer.", default=False)
parser.add_argument("--debug", action="store_true", help="Enable debug mode.", default=False)
args = parser.parse_args()

table_rec_predictor = TableRecPredictor()
Expand Down Expand Up @@ -47,7 +50,7 @@ def main():
page_metrics = collections.OrderedDict()
mean_col_iou = 0
mean_row_iou = 0
for idx, pred in enumerate(table_rec_predictions):
for idx, (pred, image) in enumerate(zip(table_rec_predictions, images)):
row = dataset[idx]
pred_row_boxes = [p.bbox for p in pred.rows]
pred_col_bboxes = [p.bbox for p in pred.cols]
Expand All @@ -67,6 +70,21 @@ def main():

page_metrics[idx] = page_results

if args.debug:
# Save debug images
draw_img = image.copy()
draw = ImageDraw.Draw(draw_img)
draw_bboxes_on_image(pred_row_boxes, draw_img, [f"Row {i}" for i in range(len(pred_row_boxes))])
draw_bboxes_on_image(pred_col_bboxes, draw_img, [f"Col {i}" for i in range(len(pred_col_bboxes))], color="blue")
draw_img.save(os.path.join(result_path, f"{idx}_bbox.png"))

actual_draw_image = image.copy()
draw = ImageDraw.Draw(actual_draw_image)
draw_bboxes_on_image(actual_row_bboxes, actual_draw_image, [f"Row {i}" for i in range(len(actual_row_bboxes))])
draw_bboxes_on_image(actual_col_bboxes, actual_draw_image, [f"Col {i}" for i in range(len(actual_col_bboxes))], color="blue")
actual_draw_image.save(os.path.join(result_path, f"{idx}_actual.png"))


mean_col_iou /= len(table_rec_predictions)
mean_row_iou /= len(table_rec_predictions)

Expand Down
2 changes: 1 addition & 1 deletion benchmark/utils/bbox.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import fitz as pymupdf
from surya.postprocessing.util import rescale_bbox
from surya.common.util import rescale_bbox


def get_pdf_lines(pdf_path, img_sizes):
Expand Down
2 changes: 1 addition & 1 deletion detect_layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from surya.input.load import load_from_folder, load_from_file
from surya.layout import LayoutPredictor
from surya.postprocessing.heatmap import draw_polys_on_image
from surya.debug.draw import draw_polys_on_image
from surya.settings import settings
from surya.common.cli.config import CLILoader
import os
Expand Down
2 changes: 1 addition & 1 deletion detect_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from surya.input.load import load_from_folder, load_from_file
from surya.detection import DetectionPredictor
from surya.postprocessing.heatmap import draw_polys_on_image
from surya.debug.draw import draw_polys_on_image
from surya.common.cli.config import CLILoader
from surya.settings import settings
import os
Expand Down
6 changes: 3 additions & 3 deletions ocr_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@

from surya.models import load_predictors

from surya.postprocessing.heatmap import draw_polys_on_image, draw_bboxes_on_image
from surya.debug.draw import draw_polys_on_image, draw_bboxes_on_image

from surya.postprocessing.text import draw_text_on_image
from surya.debug.text import draw_text_on_image
from PIL import Image
from surya.recognition.languages import CODE_TO_LANGUAGE, replace_lang_with_code
from surya.table_rec import TableResult
from surya.detection import TextDetectionResult
from surya.recognition import OCRResult
from surya.layout import LayoutResult
from surya.settings import settings
from surya.postprocessing.util import rescale_bbox
from surya.common.util import rescale_bbox
from pdftext.extraction import plain_text_output


Expand Down
2 changes: 1 addition & 1 deletion ocr_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from surya.detection import DetectionPredictor
from surya.recognition.languages import replace_lang_with_code
from surya.input.load import load_from_folder, load_from_file, load_lang_file
from surya.postprocessing.text import draw_text_on_image
from surya.debug.text import draw_text_on_image
from surya.recognition import RecognitionPredictor
from surya.common.cli.config import CLILoader
from surya.settings import settings
Expand Down
2 changes: 0 additions & 2 deletions surya/common/polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import numpy as np
from pydantic import BaseModel, field_validator, computed_field

from surya.postprocessing.util import rescale_bbox


class PolygonBox(BaseModel):
polygon: List[List[float]]
Expand Down
16 changes: 16 additions & 0 deletions surya/common/util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
from typing import List

from surya.common.polygon import PolygonBox
Expand Down Expand Up @@ -26,3 +27,18 @@ def clean_boxes(boxes: List[PolygonBox]) -> List[PolygonBox]:
if not contained:
new_boxes.append(box_obj)
return new_boxes


def rescale_bbox(bbox, processor_size, image_size):
page_width, page_height = processor_size

img_width, img_height = image_size
width_scaler = img_width / page_width
height_scaler = img_height / page_height

new_bbox = copy.deepcopy(bbox)
new_bbox[0] = int(new_bbox[0] * width_scaler)
new_bbox[1] = int(new_bbox[1] * height_scaler)
new_bbox[2] = int(new_bbox[2] * width_scaler)
new_bbox[3] = int(new_bbox[3] * height_scaler)
return new_bbox
4 changes: 2 additions & 2 deletions surya/postprocessing/heatmap.py → surya/debug/draw.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import List
from PIL import ImageDraw, ImageFont

from surya.postprocessing.fonts import get_font_path
from surya.debug.fonts import get_font_path
from surya.common.polygon import PolygonBox
from surya.postprocessing.text import get_text_size
from surya.debug.text import get_text_size


def draw_bboxes_on_image(bboxes, image, labels=None, label_font_size=10, color: str | list = 'red'):
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion surya/postprocessing/text.py → surya/debug/text.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List, Tuple
from PIL import Image, ImageDraw, ImageFont

from surya.postprocessing.fonts import get_font_path
from surya.debug.fonts import get_font_path


def get_text_size(text, font):
Expand Down
11 changes: 10 additions & 1 deletion surya/detection/affinity.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
import math
from typing import List

import cv2
import numpy as np

from surya.postprocessing.util import get_line_angle
from surya.detection.schema import ColumnLine


def get_line_angle(x1, y1, x2, y2):
slope = (y2 - y1) / (x2 - x1)

angle_radians = math.atan(slope)
angle_degrees = math.degrees(angle_radians)

return angle_degrees


def get_detected_lines_sobel(image, vertical=True):
# Apply Sobel operator with a kernel size of 3 to detect vertical edges
if vertical:
Expand Down
136 changes: 0 additions & 136 deletions surya/input/pdflines.py

This file was deleted.

4 changes: 1 addition & 3 deletions surya/input/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@

import cv2
import numpy as np
import math
import pypdfium2
from PIL import Image, ImageOps, ImageDraw
import torch
from PIL import Image
from surya.settings import settings


Expand Down
Loading

0 comments on commit bf2f693

Please sign in to comment.