-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy path04b_cyclestreets_batch_routing.R
142 lines (131 loc) · 9.11 KB
/
04b_cyclestreets_batch_routing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#########################
### PART 2: RUN ROUTES THROUGH CYCLESTREETS
#########################
# LOAD DATA AND RUN BATCHES. FOR QUIET LINES, RESTRICT TO THOSE WITH FAST ROUTE UNDER MAX. VISUALSE LENGTH
lines_cs <- readRDS(file.path(path_inputs, "02_intermediate/02_travel_data", purpose, geography, "lines_cs.Rds"))
if (route_type=="quietest") {
rf_all_data <- readRDS(file.path(path_inputs, "02_intermediate/02_travel_data", purpose, geography, paste0("archive/rf_",file_name,"_data.Rds")))
rf_all_data <- rf_all_data[(rf_all_data$length < (maxdist_scenario * 1000)),]
summary({sel_q_line <- lines_cs$id %in% rf_all_data$id})
lines_cs <- lines_cs[sel_q_line,]
}
size_limit <- 5000 # maximum size of a batch
nbatch <- ceiling(nrow(lines_cs) / size_limit)
# RUN BATCHES (1000 lines 5 min)] (before start national build ask for cycle streets update?)
for(i in 1:nbatch){
l_start <- as.integer(1 + (i - 1) * size_limit)
if (i * size_limit < nrow(lines_cs)) {
l_fin <- as.integer(i * size_limit)
} else {
l_fin <- as.integer(nrow(lines_cs))
}
lines_cs_sub <- lines_cs[c(l_start:l_fin), ]
routes <- line2route(lines_cs_sub, route_fun = route_cyclestreet, plan = route_type, n_processes = 10, base_url = "http://pct.cyclestreets.net/api/")
routes@data <- routes@data[,!names(routes@data) %in% c("plan","start","finish")] # drop fields not wanted
routes@data <- left_join(routes@data, lines_cs_sub@data, by = "id") # merge in data in lines file
saveRDS(routes,file = file.path(path_temp_cs, purpose, geography, paste0("r",substr(route_type, 1, 1),"_",file_name,"_",i,"of",nbatch,".Rds")), version = 2)
print(paste0("Batch ",i," of ",nbatch," finished at ",Sys.time()))
}
#########################
### PART 3: MERGE BATCHES FOR a) DATA AND b) ROUTES [do in 2 stages otherwise LSOA routes files get too big to handle]
#########################
# REJOIN THE FILES FOR **DATA** (ALL LENGTHS, USED FOR SCENARIO BUILDING) & CHECK
file_first <- readRDS(file.path(path_temp_cs, purpose, geography, paste0("r",substr(route_type, 1, 1),"_",file_name,"_",1,"of",nbatch,".Rds")))
rownames(file_first@data) <- sapply(1:length(file_first), function(j) file_first@lines[[j]]@ID) # FORCE DATA ROW NAMES TO BE SAME AS ID IN LINES (in case don't start from '1')
stack_data <- file_first@data
if(nbatch > 1) {
for(i in 2:nbatch){
file_next <- readRDS(file.path(path_temp_cs, purpose, geography, paste0("r",substr(route_type, 1, 1),"_",file_name,"_",i,"of",nbatch,".Rds")))
rownames(file_next@data) <- sapply(1:length(file_next), function(j) file_next@lines[[j]]@ID)
file_next_data <- file_next@data
stack_data <- rbind(stack_data, file_next_data)
#print(paste0("Stack ",i," of ",nbatch," added at ",Sys.time()))
}
}
nrow(stack_data) == nrow(lines_cs)
summary(stack_data$id == lines_cs@data$id) # check route IDS - should all be True
# REDO FAILED LINES IF THERE ARE ANY, AND MERGE INTO STACK DATA
if(any(is.na(stack_data$length))) {
stack_keep <- stack_data[!is.na(stack_data$length) & !is.na(stack_data$av_incline) & !is.na(stack_data$time) & is.na(stack_data$error), ]
id1 <- (stack_data$id[1])
stack_redo <- stack_data[is.na(stack_data$length) | is.na(stack_data$av_incline) | is.na(stack_data$time) | !is.na(stack_data$error) | (stack_data$id==id1), ] # ## put in the top line, assuming it has not failed, so that first re-done line does not fail and cause line2route to give error
summary(stack_redo$error)
# View(stack_redo[c("geo_code1", "geo_code2", "id", "e_dist_km", "error")]) # view errors interactively if needs be
stack_redo_data <- stack_redo[c("geo_code1", "geo_code2", "id", "e_dist_km")]
if (purpose=="commute") {
stack_redo_lines <- od2line2(flow = stack_redo_data, zones = cents_all)
} else if (purpose=="school") {
stack_redo_lines <- od2line(flow = stack_redo_data, zones = cents_o, destinations = cents_d)
} else {
}
row.names(stack_redo_lines) <- row.names(stack_redo_data)
stack_redo <- SpatialLinesDataFrame(sl = stack_redo_lines, data = stack_redo_data)
proj4string(stack_redo) <- proj_4326
routes_redo <- line2route(l = stack_redo, route_fun = route_cyclestreet, plan = route_type, n_processes = 10, base_url = "http://pct.cyclestreets.net/api/")
routes_redo@data <- routes_redo@data[,!names(routes_redo@data) %in% c("plan","start","finish")] # drop fields not wanted
routes_redo@data <- left_join(routes_redo@data, stack_redo@data, by = "id")
routes_redo <- routes_redo[routes_redo@data$id!=id1,] # REMOVE THE FIRST LINE, THAT WAS REPEATED JUST TO MAKE SURE LINE2ROUTE RAN OK
row.names(routes_redo@data) <- sapply(1:length(routes_redo), function(j) routes_redo@lines[[j]]@ID)
saveRDS(routes_redo,file = file.path(path_temp_cs, purpose, geography, paste0("r",substr(route_type, 1, 1),"_",file_name,"_redo_of",nbatch,".Rds")), version = 2)
routes_redo_data <- routes_redo@data
stack_data <- rbind(stack_keep, routes_redo_data)
stack_redo2 <- stack_data[is.na(stack_data$length) | is.na(stack_data$av_incline) | is.na(stack_data$time) | !is.na(stack_data$error), ]
summary(stack_redo2$error) # SHOULD BE ZERO - IF NOT RUN THIS SECTION AGAIN? OR ADD IMPOSSIBLE LINES TO EXCLUDED LIST OF PROBLEM IDS
}
# FOR FASTEST LINE, LIMIT TO LINES UNDER MAXIMUM SCENARIO LENGTH & SAVE
if (route_type == "fastest") {
stack_data <- stack_data[(stack_data$length < (maxdist_scenario * 1000)),] # NB length in metres, maxdist_scenario in km
}
saveRDS(stack_data, file = file.path(path_inputs, "02_intermediate/02_travel_data", purpose, geography, paste0("archive/r",substr(route_type, 1, 1),"_",file_name,"_data.Rds")), version = 2)
# REJOIN THE FILES FOR **ROUTES** (FASTEST ROUTE LENGTH < VISUALISE DISTANCE), MERGE IN ROUTES THAT INITIALLY FAILED, & SAVE SHAPE
rf_data_visualise <- readRDS(file.path(path_inputs, "02_intermediate/02_travel_data", purpose, geography, paste0("archive/rf_",file_name,"_data.Rds")))
rf_data_visualise <- rf_data_visualise[(rf_data_visualise$length < (maxdist_visualise * 1000)),]
size_per_stack <- 50
nbatch_stacks <- ceiling(nbatch/size_per_stack)
if(nbatch_stacks > 1) {
for(j in 1:nbatch_stacks) {
# DEFINE WHICH FILES TO LOAD IN THIS BATCH
numload <- ((j-1) * size_per_stack) + 1
numstart <- numload + 1
if(j==nbatch_stacks){ numend <- nbatch} else {numend <- (j * size_per_stack)}
# MERGE SELECTED BATCH
print(paste0("Starting stack batch ",j," of ",nbatch_stacks," at ",Sys.time()))
stack_next <- readRDS(file.path(path_temp_cs, purpose, geography, paste0("r", substr(route_type, 1, 1), "_", file_name, "_", numload, "of", nbatch, ".Rds")))
rownames(stack_next@data) <- sapply(1:length(stack_next), function(j) stack_next@lines[[j]]@ID) # FORCE DATA ROW NAMES TO BE SAME AS ID IN LINES (in case don't start from '1')
stack_next <- stack_next[((stack_next@data$id %in% rf_data_visualise$id) & is.na(stack_next@data$error)),]
for(i in numstart:numend){
file_next <- readRDS(file.path(path_temp_cs, purpose, geography, paste0("r",substr(route_type, 1, 1),"_",file_name,"_",i,"of",nbatch,".Rds")))
rownames(file_next@data) <- sapply(1:length(file_next), function(j) file_next@lines[[j]]@ID)
file_next <- file_next[((file_next@data$id %in% rf_data_visualise$id) & is.na(file_next@data$error)),]
stack_next <- spRbind(stack_next, file_next)
print(paste0("Stack ",i," of ",nbatch," added at ",Sys.time()))
}
# APPEND TO MAIN BATCH
if(j==1){
stack <- stack_next
} else {
stack <- spRbind(stack, stack_next)
}
}
} else {
stack <- readRDS(file.path(path_temp_cs, purpose, geography, paste0("r", substr(route_type, 1, 1), "_", file_name, "_1of", nbatch, ".Rds")))
rownames(stack@data) <- sapply(1:length(stack), function(j) stack@lines[[j]]@ID) # FORCE DATA ROW NAMES TO BE SAME AS ID IN LINES (in case don't start from '1')
stack <- stack[((stack@data$id %in% rf_data_visualise$id) & is.na(stack@data$error)),]
if (nbatch > 1){
for(i in 2:nbatch){
file_next <- readRDS(file.path(path_temp_cs, purpose, geography, paste0("r",substr(route_type, 1, 1),"_",file_name,"_",i,"of",nbatch,".Rds")))
rownames(file_next@data) <- sapply(1:length(file_next), function(j) file_next@lines[[j]]@ID)
file_next <- file_next[((file_next@data$id %in% rf_data_visualise$id) & is.na(file_next@data$error)),]
stack <- spRbind(stack, file_next)
print(paste0("Stack ",i," of ",nbatch," added at ",Sys.time()))
}
}
}
if(file.exists(file.path(path_temp_cs, purpose, geography, paste0("r", substr(route_type, 1, 1), "_", file_name, "_redo_of", nbatch,".Rds")))) {
routes_redo <- readRDS(file.path(path_temp_cs, purpose, geography, paste0("r", substr(route_type, 1, 1), "_", file_name, "_redo_of", nbatch,".Rds")))
routes_redo <- routes_redo[((routes_redo@data$id %in% rf_data_visualise$id) & is.na(routes_redo@data$error)),]
stack <- spRbind(stack, routes_redo)
}
stack@data <- stack@data["id"]
saveRDS(stack,file = file.path(path_inputs, "02_intermediate/02_travel_data", purpose, geography, paste0("archive/r",substr(route_type, 1, 1),"_",file_name,"_shape.Rds")), version = 2)
saveRDS(stack,file = file.path(path_inputs, "02_intermediate/02_travel_data", purpose, geography, paste0("r",substr(route_type, 1, 1),"_shape.Rds")), version = 2)