# Code to extract lat-long of point projects from capex for abandoned/shelved/Completed # projects from "2015" to "2018" # Author: Sayan Dasgupta # Loading required libraries library(stringr) library(dplyr) library(tidyr) library(lubridate) library(RMySQL) library(data.table) # Connecting to database mydb <- dbConnect(dbDriver("MySQL"), db = "fld", host = "data.mayin.org", port = 3306, user = Sys.getenv("HHD_AND_FLD_USER"), password = Sys.getenv("HHD_AND_FLD_PASSWORD")) # Getting Project event data project_event <- dbGetQuery(mydb,"select COMPANY,PROJECT_NAME,PROJECT_NUMBER, COMPANY_CODE,EVENT,EVENT_DATE from capex_all_project_events") colnames(project_event) <- tolower(colnames(project_event)) # Getting project status data project_status <- dbGetQuery(mydb,"select COMPANY,PROJECT_NAME,PROJECT_NUMBER, COMPANY_CODE,PROJECT_STATUS from capex_all_project_detail") colnames(project_status) <- tolower(colnames(project_status)) # Extracting project location data project_location <- dbGetQuery(mydb,"select * from capex_all_project_location") colnames(project_location) <- tolower(colnames(project_location)) project_event$event_date <- as.Date(project_event$event_date, format = "%d %B %Y") events_data <- project_event %>% filter(event_date >= "2015-01-01" & event_date < "2019-01-01") completed_events_data <- events_data %>% filter(event == "Completed" | event == "Abandoned on") completed_projects <- completed_events_data %>% distinct(project_name,project_number,company_code,company) cost <- dbGetQuery(mydb,"select * from capex_all_project_cost") colnames(cost) <- tolower(colnames(cost)) cost <- cost[rev(order(as.Date(cost$date_of_cost_of_project, format="%d/%B/%Y"))), ] cost <- cost %>% distinct(project_name,project_number,company,company_code,.keep_all = TRUE) completed_comp_cost <- left_join(completed_projects,cost, by = c("project_name", "project_number", "company", "company_code")) cost_1000 <- completed_comp_cost %>% filter(cost >= 10000) cost_status <- left_join(cost_1000,project_status, by = c("project_name", "project_number", "company", "company_code")) cost_1000 <- cost_status %>% filter(project_status == "Completed" | project_status == "Abandoned") completed <- cost_1000 %>% filter(project_status == "Completed") abandoned <- cost_1000 %>% filter(project_status == "Abandoned") comp_ab_1000 <- rbind(completed,abandoned) # Point Projects project_location <- as.data.frame(subset(project_location, select =c("company","project_name", "project_number","company_code", "longitude,latitude"))) colnames(project_location)[which(names(project_location) == "longitude,latitude")] <- "lat_long" project_location <- project_location[!(is.na(project_location$lat_long) | project_location$lat_long==""), ] point_projects <- project_location %>% group_by(company,project_name,project_number,company_code) %>% summarise(number_of_locations = n()) point_projects <- as.data.frame(point_projects) point_projects <- point_projects %>% filter(number_of_locations == 1) point_projects_location <- left_join(point_projects,project_location, by = c("company","project_name", "project_number","company_code")) point_projects_location <- subset(point_projects_location, select = c("company","project_name", "project_number","company_code","lat_long")) point_comp_ab_1000 <- left_join(comp_ab_1000,point_projects_location,by = c("project_name", "project_number", "company", "company_code")) point_comp_ab_1000 <- point_comp_ab_1000 %>% distinct(company,company_code,project_name,project_number,lat_long) point_comp_ab_1000 <- point_comp_ab_1000[ grep(":",point_comp_ab_1000$lat_long, invert = TRUE) , ] point_comp_ab_1000 <- point_comp_ab_1000 %>% drop_na(lat_long) write.csv(point_comp_ab_1000,"../DATA/abandoned_completed_2015_2018_1000cr_projects_lat_long.csv")