-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgnrs_import.sh
executable file
·171 lines (140 loc) · 5.57 KB
/
gnrs_import.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#!/bin/bash
#########################################################################
# Purpose: Imports user data for processing with gnrs
#
# Usage: ./gnrs_import.sh
#
# Warning: Requires database geonames on local filesystem
#
# Authors: Brad Boyle ([email protected])
# Date created: 12 June 2017
#########################################################################
: <<'COMMENT_BLOCK_x'
COMMENT_BLOCK_x
######################################################
# Set basic parameters, functions and options
######################################################
# Assign unique job ID
# Date in nanoseconds plus random integer for good measure
job="job_$(date +%Y%m%d_%H%M%N)_${RANDOM}"
# Get local working directory
#DIR_LOCAL="${BASH_SOURCE%/*}"
DIR_LOCAL="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
if [[ ! -d "$DIR_LOCAL" ]]; then DIR_LOCAL="$PWD"; fi
# $local = name of this file
# $local_basename = name of this file minus '.sh' extension
# $local_basename should be same as containing directory, as
# well as local data subdirectory within main data directory,
local=`basename "${BASH_SOURCE[0]}"`
local_basename="${local/.sh/}"
# Set parent directory if running independently
#if [ -z ${master+x} ]; then
DIR=$DIR_LOCAL
#fi
# Set includes directory path, relative to $DIR
includes_dir=$DIR"/includes"
# Load startup script for local files
# Sets remaining parameters and options, and issues confirmation
# and startup messages
custom_opts="true"
source "$includes_dir/startup_local.sh"
# Pseudo error log, to absorb screen echo during import
tmplog="/tmp/tmplog.txt"
echo "Error log
" > $tmplog
# Set current script as master if not already source by another file
# master = name of this file.
# Tells sourced scripts not to reload general parameters and command line
# options as they are being called by another script. Allows component
# scripts to be called individually if needed
if [ -z ${master+x} ]; then
master=`basename "$0"`
fi
###########################################################
# Get and set custom parameters
###########################################################
# Set defaults
nullval=""
while [ "$1" != "" ]; do
case $1 in
-s | --silent ) silent="true"
;;
-n | --nowarnings ) i="false"
;;
-v | --nullval ) shift
nullval="$1"
;;
* ) echo "invalid option!"; exit 1
esac
shift
done
# Set final value of "NULL AS '<nullval>'"
# Note that omitting this parameter removes this component
# completely from \copy command
if [ "nullval" == "" ]; then
nullas=""
else
nullas="NULL AS '"$nullval"'"
fi
#########################################################################
# Main
#########################################################################
: <<'COMMENT_BLOCK_1'
COMMENT_BLOCK_1
############################################
# Import raw data
#
# Import CSV file from data directory to
# table user_data_raw in GNRS database
############################################
echoi $e "Importing user data:"
echoi $e -n "- Clearing raw table..."
PGOPTIONS='--client-min-messages=warning' psql -d gnrs --set ON_ERROR_STOP=1 -q -c "DELETE FROM user_data_raw WHERE job='$job'"
source "$includes_dir/check_status.sh"
# Data
datafile=$data_raw
echoi $e "- Importing raw data:"
echoi $e "-- '$submitted_filename' --> user_data_raw"
: <<'COMMENT_BLOCK_1'
# The development limit option below need to be updated
# to accommodate use of job # with raw import
# Import the raw data
# Value of $nullas statement set as optional command line parameter
if [ $use_limit = "true" ]; then
# Import subset of records (development only)
head -n $recordlimit $data_dir_local/$submitted_filename | psql -d gnrs -q -c "COPY user_data_raw FROM STDIN DELIMITER ',' CSV $nullas HEADER"
else
# Import full file
sql="\COPY user_data_raw FROM '${data_dir_local}/${submitted_filename}' DELIMITER ',' CSV $nullas HEADER;"
PGOPTIONS='--client-min-messages=warning' psql -d gnrs -q << EOF
\set ON_ERROR_STOP on
$sql
EOF
fi
source "$includes_dir/check_status.sh"
COMMENT_BLOCK_1
# Compose name of temporary, job-specific raw data table
raw_data_tbl_temp="user_data_raw_${job}"
# Create job-specific temp table to hold raw data
echoi $e -n "-- Creating table $raw_data_tbl_temp..."
PGOPTIONS='--client-min-messages=warning' psql -d gnrs --set ON_ERROR_STOP=1 -q -v job=$job -v raw_data_tbl_temp="${raw_data_tbl_temp}" -f $DIR_LOCAL/sql/create_raw_data_temp.sql
source "$DIR/includes/check_status.sh"
# Import the raw data
# $nullas statement set as optional command line parameter
echoi $e -n "-- Importing raw data to temp table..."
metacmd="\COPY $raw_data_tbl_temp FROM '${data_dir_local}/${submitted_filename}' DELIMITER ',' CSV $nullas HEADER;"
PGOPTIONS='--client-min-messages=warning' psql gnrs --set ON_ERROR_STOP=1 -q -c "$metacmd"
eval $cmd
source "$DIR/includes/check_status.sh"
# Import the raw data
echoi $e -n "-- Loading to raw data table with job..."
PGOPTIONS='--client-min-messages=warning' psql -d gnrs --set ON_ERROR_STOP=1 -q -v job=$job -v raw_data_tbl_temp="$raw_data_tbl_temp" -f $DIR_LOCAL/sql/import_user_data.sql
eval $cmd
source "$DIR/includes/check_status.sh"
######################################################
# Report total elapsed time and exit if running solo
######################################################
if [ -z ${master+x} ]; then source "$includes_dir/finish.sh"; fi
######################################################
# End script
######################################################