-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
executable file
·88 lines (74 loc) · 2.54 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
/**
* Entry point for redirection recording lambda function. The function
* takes the following arguments.
*
* Required:
* - url {string}: The url to begin crawling from
*
* Optional:
* - bucket {string}: The S3 bucket to record results to. If not provided,
* then the results will not be written to S3.
* - seconds {int}: The maximum number of seconds to follow redirects for,
* before ending crawling and recording the results to S3.
* If provided, must be > 0 and less <= 300.
* (Defaults to 240).
* - debug (boolean): Whether to run in debug mode. If true, uses local chrome,
* prints logging information to STDOUT, etc. (Defaults to
* false).
*/
const path = require('path')
const utilsLib = require('./lib/utils')
const crawlLib = require('./lib/crawl')
const s3Lib = require('./lib/s3')
const lambdaLib = require('./lib/lambda')
const localResourcesDir = path.join(__dirname, 'resources')
const chromeHeadlessPath = path.join(localResourcesDir, 'headless-chromium')
const handler = (args, _, callback) => {
if (process.env.S3_BUCKET !== undefined) {
args.bucket = process.env.S3_BUCKET
}
if (args.Records !== undefined) {
if (args.Records.length > 0) {
if (args.Records[0].body !== undefined) {
args.url = args.Records[0].body
}
}
}
const validationResult = utilsLib.validateArgs(args)
if (validationResult !== undefined) {
throw validationResult.msg
}
const crawlArgs = {
url: args.url,
profile: args.profile,
seconds: (args.seconds || 240),
debug: true
}
const logger = crawlArgs.debug === true ? console.dir : _ => { }
if (args.chromePath !== undefined) {
crawlArgs.chromePath = args.chromePath
} else {
crawlArgs.chromePath = chromeHeadlessPath
}
logger('Beginning crawl with following settings:')
logger(crawlArgs)
const onCrawlComplete = crawlResults => {
if (!args.bucket) {
logger('no s3 bucket provided; not sending to S3 or lambda.')
callback(crawlResults)
return
}
let s3KeyHandle
s3Lib.record(crawlArgs.url, args.bucket, crawlResults, crawlArgs.debug)
.then(s3Key => {
s3KeyHandle = s3Key
return lambdaLib.trigger(s3Key, crawlArgs.debug)
})
.then(_ => {
logger(`Launched lambda function for ${args.bucket}:${s3KeyHandle}.`)
callback(null, crawlResults)
})
}
crawlLib.crawl(crawlArgs, onCrawlComplete).catch()
}
module.exports.handler = handler