-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinitialise-db.mjs
213 lines (180 loc) · 5.78 KB
/
initialise-db.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
import Database from "better-sqlite3";
import fs from "fs";
import path from "path";
const dbPath = path.resolve("articles.db");
const db = new Database(dbPath, {
verbose: console.log,
});
db.pragma("journal_mode = WAL");
function checkIfTableExists(tableName) {
const tableExistsStatement = db.prepare(
`SELECT name FROM sqlite_master WHERE type='table' AND name = ?;`,
);
const tableExistsResult = tableExistsStatement.get(tableName);
return tableExistsResult !== undefined;
}
async function initialise() {
const creatingFeedsTable = !checkIfTableExists("feeds");
if (creatingFeedsTable) {
const createFeedsTable = db.prepare(
`CREATE TABLE IF NOT EXISTS feeds (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL
);`,
);
createFeedsTable.run();
console.log("Creating default feed");
const insertDefaultFeed = db.prepare(
`INSERT INTO feeds (title) VALUES (?)`,
);
insertDefaultFeed.run("Default");
}
const createArticlesTable = db.prepare(
`CREATE TABLE IF NOT EXISTS articles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
url TEXT NOT NULL,
feed_id INTEGER,
read_at TIMESTAMP NULL,
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
mp3Url TEXT NULL,
mp3Duration TEXT NULL,
mp3Length INTEGER NULL,
status TEXT NULL,
type TEXT NULL,
text_content TEXT NULL,
bullmq_job_id INTEGER NULL,
FOREIGN KEY (feed_id) REFERENCES feeds(id)
)`,
);
console.log("Creating articles table");
createArticlesTable.run();
if (creatingFeedsTable) {
console.log("Setting feed_id for all articles to 1");
// set all articles to the default feed
const updateArticlesFeedId = db.prepare(`UPDATE articles SET feed_id = 1`);
updateArticlesFeedId.run();
}
const createExtractionRulesTable = db.prepare(
`CREATE TABLE IF NOT EXISTS extraction_rules (
id INTEGER PRIMARY KEY AUTOINCREMENT,
is_active INTEGER NOT NULL DEFAULT 1,
title TEXT NOT NULL,
domain TEXT NULL,
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
rule_type TEXT NOT NULL DEFAULT 'delete_selector',
content TEXT NOT NULL
)`,
);
console.log("Creating extraction rules table");
createExtractionRulesTable.run();
const extractionRulesQuery = db.prepare(
`SELECT count(*) FROM extraction_rules`,
);
const extractionRulesCount = extractionRulesQuery.get();
if (extractionRulesCount["count(*)"] === 0) {
console.log("Adding default extraction rules");
addDefaultExtractionRules();
}
const createRelatedLinksTable = db.prepare(
`CREATE TABLE IF NOT EXISTS related_links (
id INTEGER PRIMARY KEY AUTOINCREMENT,
article_id INTEGER NOT NULL,
title TEXT NOT NULL,
url TEXT NOT NULL,
context_quote TEXT NULL,
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
own_article_id INTEGER NULL,
FOREIGN KEY (article_id) REFERENCES articles(id),
FOREIGN KEY (own_article_id) REFERENCES articles(id)
)
`,
);
console.log("Creating related links table");
createRelatedLinksTable.run();
const creatingDocumentsTable = !checkIfTableExists("documents");
if (creatingDocumentsTable) {
const createDocumentsTable = db.prepare(
`CREATE TABLE IF NOT EXISTS documents (
id INTEGER PRIMARY KEY AUTOINCREMENT,
title TEXT NOT NULL,
filename TEXT NOT NULL,
filepath TEXT NOT NULL,
document_type TEXT NOT NULL,
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
bullmq_job_id INTEGER NULL
feed_id INTEGER NULL,
FOREIGN KEY (feed_id) REFERENCES feeds(id)
)
`,
);
console.log("Creating documents table");
createDocumentsTable.run();
} else {
// add feed_id column
const addFeedIdColumn = db.prepare(
`ALTER TABLE documents ADD COLUMN feed_id INTEGER NULL REFERENCES feeds(id)`,
);
console.log("Adding feed_id column to documents table");
addFeedIdColumn.run();
}
const createDocumentChaptersTable = db.prepare(
`CREATE TABLE IF NOT EXISTS document_chapters (
id INTEGER PRIMARY KEY AUTOINCREMENT,
document_id INTEGER NOT NULL,
title TEXT NOT NULL,
chapter_number INTEGER NULL,
text_content TEXT NULL,
added_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
own_article_id INTEGER NULL,
FOREIGN KEY (document_id) REFERENCES documents(id),
FOREIGN KEY (own_article_id) REFERENCES articles(id)
)
`,
);
console.log("Creating document chapters table");
createDocumentChaptersTable.run();
}
function addDefaultExtractionRules() {
const wikipediaSelectors = [
"figure",
"img",
"figcaption",
"sup.reference",
"sup.noprint",
"div.thumb",
"table.infobox",
"ol.references",
".mw-editsection",
];
const createRuleStatement = db.prepare(
"INSERT INTO extraction_rules (domain, is_active, title, rule_type, content) VALUES (?, ?, ?, ?, ?)",
);
for (const selector of wikipediaSelectors) {
createRuleStatement.run(
"en.wikipedia.org",
1,
`Remove ${selector} from Wikipedia`,
"delete_selector",
selector,
);
}
}
async function setMp3Length() {
// get articles with mp3Url
const articles = db
.prepare(`SELECT * FROM articles WHERE mp3Url IS NOT NULL`)
.all();
// for each article, get the mp3 file size
articles.forEach((article) => {
const mp3Path = "./public" + article.mp3Url;
const mp3Length = fs.statSync(mp3Path).size;
// update the article with the mp3 file size
const updateArticle = db.prepare(
`UPDATE articles SET mp3Length = ? WHERE id = ?`,
);
updateArticle.run(mp3Length, article.id);
});
}
initialise();