generated from idea2app/Web-file-cache
-
Notifications
You must be signed in to change notification settings - Fork 0
103 lines (91 loc) · 3.31 KB
/
crawler.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
name: Download File or Web page
on:
issues:
types:
- labeled
jobs:
Fetch-and-Save:
if: github.event.label.name == 'crawler'
runs-on: ubuntu-latest
permissions:
contents: write
issues: write
steps:
- name: Comment this GitHub action URL
uses: actions/github-script@v4
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const { issue, runId } = context,
{ owner, repo } = context.repo;
github.issues.createComment({
issue_number: issue.number,
owner,
repo,
body: `The GitHub action URL is: https://github.com/${owner}/${repo}/actions/runs/${runId}`
});
- uses: pnpm/action-setup@v4
with:
version: 9
- uses: actions/setup-node@v4
with:
node-version: 20
- uses: actions/checkout@v4
- id: issue-parser
uses: stefanbuck/github-issue-parser@v3
with:
template-path: ".github/ISSUE_TEMPLATE/crawler.yml"
- name: Set URL & MIME variables
env:
URL: ${{ steps.issue-parser.outputs.issueparser_url }}
run: |
cat >> $GITHUB_ENV <<EOF
FILE_URL=$URL
FILE_PATH=$(echo "$URL" | grep -oP '^\w+://[^/]+/?\K([^?]*)')
MIME_TYPE=$(curl -sI "$URL" | grep -oPi 'Content-Type:\s+\K(\w+\/\w+)')
EOF
- name: Set Path variables
run: |
pnpm i mime -g
FILE_PATH="${{ env.FILE_PATH }}.$(mime -r ${{ env.MIME_TYPE }})"
FOLDER_PATH="$(dirname $FILE_PATH)"
cat >> $GITHUB_ENV <<EOF
FILE_PATH=$FILE_PATH
FOLDER_PATH=$FOLDER_PATH
EOF
mkdir -p $FOLDER_PATH
- name: Download the File
if: env.MIME_TYPE != 'text/html'
run: curl -s -o "${{ env.FILE_PATH }}" "${{ env.FILE_URL }}"
- uses: browser-actions/setup-chrome@v1
if: env.MIME_TYPE == 'text/html'
- name: Download the Web page & its assets
if: env.MIME_TYPE == 'text/html'
run: |
cd ${{ env.FOLDER_PATH }}
pnpx web-fetch "${{ env.FILE_URL }}"
- uses: ryand56/r2-upload-action@master
with:
r2-account-id: ${{ secrets.R2_ACCOUNT_ID }}
r2-access-key-id: ${{ secrets.R2_ACCESS_KEY_ID }}
r2-secret-access-key: ${{ secrets.R2_SECRET_ACCESS_KEY }}
r2-bucket: ${{ secrets.R2_BUCKET }}
source-dir: "${{ env.FILE_PATH }}"
destination-dir: "${{ env.FOLDER_PATH }}"
- uses: ryand56/r2-upload-action@master
if: env.MIME_TYPE == 'text/html'
with:
r2-account-id: ${{ secrets.R2_ACCOUNT_ID }}
r2-access-key-id: ${{ secrets.R2_ACCESS_KEY_ID }}
r2-secret-access-key: ${{ secrets.R2_SECRET_ACCESS_KEY }}
r2-bucket: ${{ secrets.R2_BUCKET }}
source-dir: "${{ env.FILE_PATH }}.html"
destination-dir: "${{ env.FOLDER_PATH }}"
- name: Git commit
run: |
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git config --global user.name "github-actions[bot]"
git add .
git commit -m "[add] ${{ github.event.issue.title }} (closes #${{ github.event.issue.number }})"
git rebase origin/main
git push