-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathactive_user.py
89 lines (70 loc) · 2.73 KB
/
active_user.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from config import domains, siteData
import toolforge
import json
import sys
from typing import Dict, List, Optional, TypedDict
class UserStats(TypedDict):
proofread: str
validate: str
class DomainStats(TypedDict):
users: Dict[str, UserStats]
class PageStatus(TypedDict):
proofread: Optional[Dict[str, str]]
validate: Optional[Dict[str, str]]
from pywikisource import WikiSourceApi
statsUser: Dict[str, Dict[str, UserStats]] = {}
time: str = sys.argv[1]
for domain in domains:
# Define variables
statsUser[domain] = {}
usr: Dict[str, List[str]] = {}
activity: Dict[str, UserStats] = {}
dbname: str = domain + 'wikisource_p'
WS: WikiSourceApi = WikiSourceApi(domain)
conn = toolforge.connect(dbname)
cur = conn.cursor()
pageNsCode: int = siteData[domain]['namespace']['page']
# SQL Query to get active user with page edit in Page namespace
q: str = "select rev_id, actor_id, actor_name, page_id, page_title " \
"from revision inner join actor on actor.actor_id = rev_actor " \
"inner join page on page.page_id = rev_page " \
"where rev_timestamp > '"+ time + "01000000' and rev_timestamp < '" + time + "31235959' " \
"and page_namespace = "+ str(pageNsCode) + " and actor_user != '' order by actor_name;"
# Execute SQL Query
cur.execute(q)
rows = cur.fetchall()
# Create usr dict with list of page edits
for row in rows:
user: str = str(row[2].decode("utf-8"))
if user in usr:
usr[user].append(str(row[4].decode("utf-8")))
else:
usr[user] = []
usr[user].append(str(row[4].decode("utf-8")))
# Process the pages edit by users
for k, v in usr.items():
prcount: int = 0
valcount: int = 0
# Used set for unique list
for page in list(set(v)):
pstatus: PageStatus = WS.pageStatus('Page:'+ page)
# To count proofread
if (pstatus["proofread"] is not None
and pstatus["proofread"].get("user") == k
and pstatus["proofread"].get("timestamp")[0:7].replace('-', '') == time
):
prcount = prcount + 1
# To count validation
if (pstatus["validate"] is not None
and pstatus["validate"].get("user") == k
and pstatus["validate"].get("timestamp")[0:7].replace('-', '') == time
):
valcount = valcount + 1
# Store the count
activity[k] = {
"proofread": str(prcount),
"validate": str(valcount)
}
statsUser[domain] = activity
with open("ActiveUserStats/" + time + ".json", "w") as f:
json.dump(statsUser, f, ensure_ascii=False, sort_keys=True, indent=True)