Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of "full processor" for JSON-LD 1.1 #63

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 47 additions & 13 deletions rdflib_jsonld/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,48 @@
assert json # workaround for pyflakes issue #13
except ImportError:
import simplejson as json

from ._compat import IS_PY3 as PY3
from html.parser import HTMLParser

from os import sep
from os.path import normpath
from urllib.parse import urljoin, urlsplit, urlunsplit
from rdflib.parser import create_input_source
from io import StringIO

if PY3:
from urllib.parse import urljoin, urlsplit, urlunsplit
else:
from urlparse import urljoin, urlsplit, urlunsplit

from rdflib.parser import create_input_source
class HTMLJSONParser(HTMLParser):
def __init__(self):
super().__init__()
self.json = []
self.contains_json = False

def handle_starttag(self, tag, attrs):
self.contains_json = False

# Only set self. contains_json to True if the
# type is 'application/ld+json'
if tag == "script":
for (attr, value) in attrs:
if attr == 'type' and value == 'application/ld+json':
self.contains_json = True
else:
# Nothing to do
continue

if PY3:
from io import StringIO
def handle_data(self, data):
# Only do something when we know the context is a
# script element containing application/ld+json

if self.contains_json is True:
if data.strip() == "":
# skip empty data elements
return

# Try to parse the json
self.json.append(json.loads(data))

def get_json(self):
return self.json


def source_to_json(source):
Expand All @@ -27,10 +54,17 @@ def source_to_json(source):

stream = source.getByteStream()
try:
if PY3:
return json.load(StringIO(stream.read().decode("utf-8")))
else:
return json.load(stream)
return json.load(StringIO(stream.read().decode('utf-8')))
except json.JSONDecodeError as e:
# The document is not a JSON document, let's see whether we can parse
# it as HTML

# Reset stream pointer to 0
stream.seek(0)
parser = HTMLJSONParser()
parser.feed(stream.read().decode('utf-8'))

return parser.get_json()
finally:
stream.close()

Expand Down