# Code to extract lat-long of costliest thermal projects from capex for construction commenced # projects from "2015" to "2018" # Author: Sayan Dasgupta # Loading required libraries library(stringr) library(dplyr) library(tidyr) library(lubridate) library(RMySQL) library(data.table) # Connecting to database mydb <- dbConnect(dbDriver("MySQL"), db = "fld", host = "data.mayin.org", port = 3306, user = Sys.getenv("HHD_AND_FLD_USER"), password = Sys.getenv("HHD_AND_FLD_PASSWORD")) # Getting Project event data project_event <- dbGetQuery(mydb,"select COMPANY,PROJECT_NAME,PROJECT_NUMBER, COMPANY_CODE,EVENT,EVENT_DATE from capex_all_project_events") colnames(project_event) <- tolower(colnames(project_event)) elec_gen_types <- function(elec_gen_src){ sqlquery <- paste("select a.COMPANY,a.PROJECT_NAME,a.PROJECT_NUMBER,a.INDUSTRY from capex_all_project_detail a WHERE a.INDUSTRY IN ('Renewable Electricity','Conventional Electricity') ") elec_industry <- dbGetQuery(mydb,sqlquery) '%notlike%' = Negate('%like%') if(elec_gen_src == "Coal") { coal <- elec_industry %>% filter(PROJECT_NAME %like% "Thermal") %>%filter(PROJECT_NAME %notlike% "Solar") %>% filter(PROJECT_NAME %notlike% "Gas") %>% filter(PROJECT_NAME %notlike% "Biomass") %>% filter(PROJECT_NAME %notlike% "Geo") %>% filter(PROJECT_NAME %notlike% "Flux") return(coal) } else if(elec_gen_src == "Wind") { wind <- elec_industry %>% filter(PROJECT_NAME %like% "Wind") return(wind) } else if(elec_gen_src == "Solar") { solar <- elec_industry %>% filter(PROJECT_NAME %like% "Solar") return(solar) } else if(elec_gen_src == "Hydro") { hydro <- elec_industry %>% filter(PROJECT_NAME %like% "Hydel") return(hydro) } else { print("Incorrect electricity generation source") } } ### thermal <- elec_gen_types("Coal") colnames(thermal) <- tolower(colnames(thermal)) ### Getting project status data project_status <- dbGetQuery(mydb,"select COMPANY,PROJECT_NAME,PROJECT_NUMBER, COMPANY_CODE,PROJECT_STATUS from capex_all_project_detail") colnames(project_status) <- tolower(colnames(project_status)) # Extracting project location data project_location <- dbGetQuery(mydb,"select * from capex_all_project_location") colnames(project_location) <- tolower(colnames(project_location)) project_event$event_date <- as.Date(project_event$event_date, format = "%d %B %Y") events_data <- project_event %>% filter(event_date >= "2015-01-01" & event_date < "2019-01-01") completed_events_data <- events_data %>% filter(event == "Construction commenced" | event == "Implementation started") completed_projects <- completed_events_data %>% distinct(project_name,project_number,company_code,company) thermal_completed_projects <- inner_join(completed_projects,thermal,by = c("project_name","project_number","company")) thermal_completed_projects <- thermal_completed_projects %>% distinct(project_name,project_number,industry,company) ### project location project_location <- as.data.frame(subset(project_location, select =c("company","project_name", "project_number","company_code", "longitude,latitude"))) colnames(project_location)[which(names(project_location) == "longitude,latitude")] <- "lat_long" project_location <- project_location[!(is.na(project_location$lat_long) | project_location$lat_long==""), ] point_projects <- project_location %>% group_by(company,project_name,project_number,company_code) %>% summarise(number_of_locations = n()) point_projects <- as.data.frame(point_projects) point_projects <- point_projects %>% filter(number_of_locations == 1) point_projects_location <- left_join(point_projects,project_location, by = c("company","project_name", "project_number","company_code")) point_projects_location <- subset(point_projects_location, select = c("company","project_name", "project_number","company_code","lat_long")) point_comp_ab_1000 <- left_join(thermal_completed_projects,point_projects_location,by = c("project_name", "project_number", "company")) point_comp_ab_1000 <- point_comp_ab_1000 %>% distinct(company,company_code,project_name,project_number,lat_long) write.csv(point_comp_ab_1000,"../DATA/29_thermal_constr_2015_2018_wrong.csv")