Skip to content

Commit

Permalink
修正 _getVerbStemLIST() 回傳結果, 調整 LawsToolkit re finditer
Browse files Browse the repository at this point in the history
  • Loading branch information
EclairCJ committed May 31, 2021
1 parent aeded03 commit cffc4c2
Showing 1 changed file with 7 additions and 11 deletions.
18 changes: 7 additions & 11 deletions ArticutAPI/Toolkit/toolkits.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
import re

# Regex Pattern
verbPPat = re.compile("(?<=<VerbP>)[^<]*?(?=.</VerbP>)")
verbPat = re.compile("(?<=<ACTION_verb>)[^<]*?(?=</ACTION_verb>)")
verbPat = re.compile("(?<=<VerbP>)[^<]*?(?=.</VerbP>)|(?<=<ACTION_verb>)[^<]*?(?=</ACTION_verb>)")
nounPat = re.compile("(?<=<ENTITY_nounHead>)[^<]*?(?=</ENTITY_nounHead>)|(?<=<ENTITY_nouny>)[^<]*?(?=</ENTITY_nouny>)|(?<=<ENTITY_noun>)[^<]*?(?=</ENTITY_noun>)|(?<=<ENTITY_oov>)[^<]*?(?=</ENTITY_oov>)")
modifierPat = re.compile("(?<=<MODIFIER>)[^<]*?(?=</MODIFIER>)")
modifierPPat = re.compile("(?<=<DegreeP>)[^<]*?(?=</DegreeP>)|(?<=<ModifierP>)[^<]*?(?=</ModifierP>)")
Expand All @@ -30,7 +29,7 @@
wikiDataPat = re.compile("(?<=<KNOWLEDGE_wikiData>)[^<]*?(?=</KNOWLEDGE_wikiData>)")
stripPat = re.compile("(?<=>).*?(?=<)")
clausePat = re.compile("\<CLAUSE_.*?Q\>")
contentPat = re.compile("|".join([verbPPat.pattern, verbPat.pattern, nounPat.pattern, modifierPat.pattern, modifierPPat.pattern, userDefinedPat.pattern]))
contentPat = re.compile("|".join([verbPat.pattern, nounPat.pattern, modifierPat.pattern, modifierPPat.pattern, userDefinedPat.pattern]))


def _segIndexConverter(parseResultDICT, posIndexLIST):
Expand Down Expand Up @@ -139,10 +138,7 @@ def getVerbStemLIST(parseResultDICT, indexWithPOS=True):

for p in parseResultDICT["result_pos"]:
if len(p) > 1:
if "VerbP" in p:
verbLIST.append([(v.start(), v.end(), v.group(0)) for v in list(verbPPat.finditer(p))])
else:
verbLIST.append([(v.start(), v.end(), v.group(0)) for v in list(verbPat.finditer(p))])
verbLIST.append([(v.start(), v.end(), v.group(0)) for v in list(verbPat.finditer(p))])
else:
verbLIST.append([])
if not indexWithPOS:
Expand Down Expand Up @@ -354,7 +350,7 @@ def getLawArticle(self, parseResultDICT={}):
'''
if parseResultDICT:
self.articutResult = parseResultDICT
articleLIST = list(set([self.tagPurger(a.group(0)) for a in re.finditer(self.articlePat, "".join(self.articutResult["result_pos"]))]))
articleLIST = list(set([self.tagPurger(a.group(0)) for a in self.articlePat.finditer("".join(self.articutResult["result_pos"]))]))
return articleLIST

def getCrime(self, parseResultDICT={}):
Expand All @@ -363,7 +359,7 @@ def getCrime(self, parseResultDICT={}):
'''
if parseResultDICT:
self.articutResult = parseResultDICT
crimePosLIST = set([c.group(0) for c in re.finditer(self.crimePat, "".join(self.articutResult["result_pos"]))])
crimePosLIST = set([c.group(0) for c in self.crimePat.finditer("".join(self.articutResult["result_pos"]))])
crimeTextLIST = [self.tagPurger(c) for c in crimePosLIST]
return crimeTextLIST

Expand All @@ -375,7 +371,7 @@ def getCriminalResponsibility(self, parseResultDICT={}):
if parseResultDICT:
self.articutResult = parseResultDICT
try:
crPosLIST = set([c.group(0) for c in re.finditer(self.criminalResponsibilityPat, "".join(self.articutResult["result_pos"]))])
crPosLIST = set([c.group(0) for c in self.criminalResponsibilityPat.finditer("".join(self.articutResult["result_pos"]))])
crTextLIST = [self.tagPurger(c) for c in crPosLIST]
return crTextLIST
except KeyError:
Expand All @@ -394,7 +390,7 @@ def getEventRef(self, parseResultDICT={}):
'''
if parseResultDICT:
self.articutResult = parseResultDICT
erPosLIST = set([e.group(0) for e in re.finditer(self.eventRefPat, "".join(self.articutResult["result_pos"]))])
erPosLIST = set([e.group(0) for e in self.eventRefPat.finditer("".join(self.articutResult["result_pos"]))])
erTextLIST = [self.tagPurger(e) for e in erPosLIST]
return erTextLIST

Expand Down

0 comments on commit cffc4c2

Please sign in to comment.