Skip to content
This repository has been archived by the owner on Jul 7, 2020. It is now read-only.

Adding GetPlainText methods, fixing charmap #17

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
79 changes: 77 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,78 @@
go get rsc.io/pdf
# PDF Reader

http://godoc.org/rsc.io/pdf
A simple Go library which enables reading PDF files.

Features
- Get plain text content (without format)
- Get Content (including all font and formatting information)

## Install:

`go get -u github.com/rsc/pdf`


## Read plain text

```golang
package main

import (
"bytes"
"fmt"

"github.com/rsc/pdf"
)

func main() {
content, err := readPdf("test.pdf") // Read local pdf file
if err != nil {
panic(err)
}
fmt.Println(content)
return
}

func readPdf(path string) (string, error) {
r, err := pdf.Open(path)
if err != nil {
return "", err
}

var buf bytes.Buffer
buf.ReadFrom(p.GetPlainText())
return buf.String(), nil
}
```

## Read all text with styles from PDF

```golang
func readPdf2(path string) (string, error) {
r, err := pdf.Open(path)
if err != nil {
return "", err
}
totalPage := r.NumPage()

for pageIndex := 1; pageIndex <= totalPage; pageIndex++ {
p := r.Page(pageIndex)
if p.V.IsNull() {
continue
}
var lastTextStyle pdf.Text
texts := p.Content().Text
for _, text := range texts {
if isSameSentence(text, lastTextStyle) {
lastTextStyle.S = lastTextStyle.S + text.S
} else {
fmt.Printf("Font: %s, Font-size: %f, x: %f, y: %f, content: %s \n", lastTextStyle.Font, lastTextStyle.FontSize, lastTextStyle.X, lastTextStyle.Y, lastTextStyle.S)
lastTextStyle = text
}
}
}
return "", nil
}
```

## Demo
![Run example](https://i.gyazo.com/01fbc539e9872593e0ff6bac7e954e6d.gif)
Loading