knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(tidyverse)
library(tidymodels)
library(recipes)
library(rvest)
library(stringr)
library(lubridate)
library(revgeo)
library(jsonlite)
library(httr)
library(kableExtra)
library(FinCal)
library(gridExtra)
library(grid)

Retreive certified buildings data from the Bulgarian Energy Efficiency Information System

URL <- "https://seea.government.bg/documents/SG%20_Final_BG.html"

buildings <-
        read_html(URL)

buildings.list <-
        buildings %>%
        html_nodes("script") %>% 
        html_text() %>%
        last %>%
        str_extract_all("\\['.*\\]") %>%
        purrr::pluck(1) %>%
        as_tibble

buildings.split <-  
        buildings.list$value %>%
        str_replace_all("\\[|\\]", "") %>%
        str_split(",(?=(?:[^']*'[^']*')*[^']*$)")

tbl.buildings <-
        tibble(
                Name = map_chr(buildings.split, 14),
                Location = map_chr(buildings.split, 20),
                PostalCode = as.integer(map_chr(buildings.split, 21)),
                Addess = map_chr(buildings.split, 15),
                InUseSince = as.integer(map_chr(buildings.split, 22)),
                Rating = as_factor(map_chr(buildings.split, 1)),
                Potential.Rating = as_factor(map_chr(buildings.split, 5)),
                Area = as.double(map_chr(buildings.split, 6)),
                Energy.Savings = as.double(map_chr(buildings.split, 18)),
                CO2.Savings = as.double(map_chr(buildings.split, 19)),
                BGN.Savings = as.double(map_chr(buildings.split, 11)),
                Investment = as.double(map_chr(buildings.split, 12)),
                Payback = as.double(map_chr(buildings.split, 13)),
                Building.Type1 = as_factor(map_chr(buildings.split, 16)),
                Building.Type2 = as_factor(map_chr(buildings.split, 7)),
                Lat = as.double(map_chr(buildings.split, 2)),
                Lng = as.double(map_chr(buildings.split, 3))
        )

tbl.buildings <- tbl.buildings %>%
        mutate(
                Name = str_replace_all(Name, "'", ""),
                Location = str_replace_all(Location, "'", ""),
                Addess = str_replace_all(Addess, "'", ""),
                Rating = str_replace_all(Rating, "'", ""),
                Potential.Rating = str_replace_all(Potential.Rating, "'", ""),
                Building.Type1 = str_replace_all(Building.Type1, "'", "")
        ) 
        #%>%
        #write_csv("data_v2\\buildings.csv")
        
tbl.buildings
#read_csv("data_v2\\buildings.csv") %>% tail()

Reverse geocode


# tbl.buildings.bing <-
#         tbl.buildings %>%
#         map2_df(.x = as.list(.$Lng), 
#                 .y = as.list(.$Lat), 
#                 .f = revgeo, 
#                 output = "frame", 
#                 provider = "bing",
#                 API = "...Your access code goes here..."
#         ) %>%
#         bind_cols(tbl.buildings
#         )

#write_csv(tbl.buildings.bing, "data_v2\\buildings-bing.csv")

tbl.buildings.bing <- read_csv("data_v2\\buildings-bing.csv")
tbl.buildings.bing %>% tail()

Transform to SEDA db

state.region <-
        read_csv("data_v2\\state-region.csv") %>%
        mutate(
                region = as_factor(region),
                state = as_factor(state)
        )


seda_db <- read_csv("data_v2\\buildings-bing.csv") %>%
        mutate(
                city = as_factor(city),
                state = as_factor(state),
                Building.Type1 = as_factor(Building.Type1)
        ) %>%
        inner_join(state.region, by = "state") %>%
        mutate(state = as_factor(state))

#write_rds(seda_db, "data_v2\\seda-db.rds")

seda_db
#read_rds("data_v2\\seda-db.rds")

Group by development regions


tbl.buildings.region <-
        state.region %>%
        inner_join(tbl.buildings.bing, by = "state")

tbl.buildings.region %>%
        group_by(region) %>%
        summarise(n = n()) %>%
        ungroup() %>%
        bind_cols(
                lngRed = c(23.431189, 26.946191, 27.403131, 25.439181, 25.464054, 23.115908)
        ) %>%
        bind_cols(
                latRed = c(42.184089, 42.360107, 43.477124, 43.179384, 41.846443, 43.604365)
        ) 
        #%>%
        #write.csv("data_v2\\buildings-red.csv")

#read.csv("data_v2\\buildings-red.csv") %>% select(-X)

Consolidated view of Investments; Savings; Area

tbl.buildings.region %>%
        summarize(
                Investment = sum(Investment, na.rm = TRUE)/1000000,
                Savings = sum(Energy.Savings, na.rm = TRUE)/1000000,
                Area = sum(Area, na.rm = TRUE)/1000000
        ) %>%
        ungroup() 
        #%>%
        #write.csv("data_v2\\buildings-aggreg.csv")

#read.csv("data_v2\\buildings-aggreg.csv") %>% select(-X)
LS0tDQp0aXRsZTogIkNlcnRpZmllZCBidWlsZGluZ3MgQkcgTWFya2V0IC0gRXh0cmFjdC10by1GaWxlIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3Igc2V0dXB9DQprbml0cjo6b3B0c19jaHVuayRzZXQod2FybmluZyA9IEZBTFNFLCBtZXNzYWdlID0gRkFMU0UpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkodGlkeW1vZGVscykNCmxpYnJhcnkocmVjaXBlcykNCmxpYnJhcnkocnZlc3QpDQpsaWJyYXJ5KHN0cmluZ3IpDQpsaWJyYXJ5KGx1YnJpZGF0ZSkNCmxpYnJhcnkocmV2Z2VvKQ0KbGlicmFyeShqc29ubGl0ZSkNCmxpYnJhcnkoaHR0cikNCmxpYnJhcnkoa2FibGVFeHRyYSkNCmxpYnJhcnkoRmluQ2FsKQ0KbGlicmFyeShncmlkRXh0cmEpDQpsaWJyYXJ5KGdyaWQpDQpgYGANCg0KDQojIFJldHJlaXZlIGNlcnRpZmllZCBidWlsZGluZ3MgZGF0YSBmcm9tIHRoZSBCdWxnYXJpYW4gRW5lcmd5IEVmZmljaWVuY3kgSW5mb3JtYXRpb24gU3lzdGVtDQoNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpVUkwgPC0gImh0dHBzOi8vc2VlYS5nb3Zlcm5tZW50LmJnL2RvY3VtZW50cy9TRyUyMF9GaW5hbF9CRy5odG1sIg0KDQpidWlsZGluZ3MgPC0NCiAgICAgICAgcmVhZF9odG1sKFVSTCkNCg0KYnVpbGRpbmdzLmxpc3QgPC0NCiAgICAgICAgYnVpbGRpbmdzICU+JQ0KICAgICAgICBodG1sX25vZGVzKCJzY3JpcHQiKSAlPiUgDQogICAgICAgIGh0bWxfdGV4dCgpICU+JQ0KICAgICAgICBsYXN0ICU+JQ0KICAgICAgICBzdHJfZXh0cmFjdF9hbGwoIlxcWycuKlxcXSIpICU+JQ0KICAgICAgICBwdXJycjo6cGx1Y2soMSkgJT4lDQogICAgICAgIGFzX3RpYmJsZQ0KDQpidWlsZGluZ3Muc3BsaXQgPC0gIA0KICAgICAgICBidWlsZGluZ3MubGlzdCR2YWx1ZSAlPiUNCiAgICAgICAgc3RyX3JlcGxhY2VfYWxsKCJcXFt8XFxdIiwgIiIpICU+JQ0KICAgICAgICBzdHJfc3BsaXQoIiwoPz0oPzpbXiddKidbXiddKicpKlteJ10qJCkiKQ0KDQp0YmwuYnVpbGRpbmdzIDwtDQogICAgICAgIHRpYmJsZSgNCiAgICAgICAgICAgICAgICBOYW1lID0gbWFwX2NocihidWlsZGluZ3Muc3BsaXQsIDE0KSwNCiAgICAgICAgICAgICAgICBMb2NhdGlvbiA9IG1hcF9jaHIoYnVpbGRpbmdzLnNwbGl0LCAyMCksDQogICAgICAgICAgICAgICAgUG9zdGFsQ29kZSA9IGFzLmludGVnZXIobWFwX2NocihidWlsZGluZ3Muc3BsaXQsIDIxKSksDQogICAgICAgICAgICAgICAgQWRkZXNzID0gbWFwX2NocihidWlsZGluZ3Muc3BsaXQsIDE1KSwNCiAgICAgICAgICAgICAgICBJblVzZVNpbmNlID0gYXMuaW50ZWdlcihtYXBfY2hyKGJ1aWxkaW5ncy5zcGxpdCwgMjIpKSwNCiAgICAgICAgICAgICAgICBSYXRpbmcgPSBhc19mYWN0b3IobWFwX2NocihidWlsZGluZ3Muc3BsaXQsIDEpKSwNCiAgICAgICAgICAgICAgICBQb3RlbnRpYWwuUmF0aW5nID0gYXNfZmFjdG9yKG1hcF9jaHIoYnVpbGRpbmdzLnNwbGl0LCA1KSksDQogICAgICAgICAgICAgICAgQXJlYSA9IGFzLmRvdWJsZShtYXBfY2hyKGJ1aWxkaW5ncy5zcGxpdCwgNikpLA0KICAgICAgICAgICAgICAgIEVuZXJneS5TYXZpbmdzID0gYXMuZG91YmxlKG1hcF9jaHIoYnVpbGRpbmdzLnNwbGl0LCAxOCkpLA0KICAgICAgICAgICAgICAgIENPMi5TYXZpbmdzID0gYXMuZG91YmxlKG1hcF9jaHIoYnVpbGRpbmdzLnNwbGl0LCAxOSkpLA0KICAgICAgICAgICAgICAgIEJHTi5TYXZpbmdzID0gYXMuZG91YmxlKG1hcF9jaHIoYnVpbGRpbmdzLnNwbGl0LCAxMSkpLA0KICAgICAgICAgICAgICAgIEludmVzdG1lbnQgPSBhcy5kb3VibGUobWFwX2NocihidWlsZGluZ3Muc3BsaXQsIDEyKSksDQogICAgICAgICAgICAgICAgUGF5YmFjayA9IGFzLmRvdWJsZShtYXBfY2hyKGJ1aWxkaW5ncy5zcGxpdCwgMTMpKSwNCiAgICAgICAgICAgICAgICBCdWlsZGluZy5UeXBlMSA9IGFzX2ZhY3RvcihtYXBfY2hyKGJ1aWxkaW5ncy5zcGxpdCwgMTYpKSwNCiAgICAgICAgICAgICAgICBCdWlsZGluZy5UeXBlMiA9IGFzX2ZhY3RvcihtYXBfY2hyKGJ1aWxkaW5ncy5zcGxpdCwgNykpLA0KICAgICAgICAgICAgICAgIExhdCA9IGFzLmRvdWJsZShtYXBfY2hyKGJ1aWxkaW5ncy5zcGxpdCwgMikpLA0KICAgICAgICAgICAgICAgIExuZyA9IGFzLmRvdWJsZShtYXBfY2hyKGJ1aWxkaW5ncy5zcGxpdCwgMykpDQogICAgICAgICkNCg0KdGJsLmJ1aWxkaW5ncyA8LSB0YmwuYnVpbGRpbmdzICU+JQ0KICAgICAgICBtdXRhdGUoDQogICAgICAgICAgICAgICAgTmFtZSA9IHN0cl9yZXBsYWNlX2FsbChOYW1lLCAiJyIsICIiKSwNCiAgICAgICAgICAgICAgICBMb2NhdGlvbiA9IHN0cl9yZXBsYWNlX2FsbChMb2NhdGlvbiwgIiciLCAiIiksDQogICAgICAgICAgICAgICAgQWRkZXNzID0gc3RyX3JlcGxhY2VfYWxsKEFkZGVzcywgIiciLCAiIiksDQogICAgICAgICAgICAgICAgUmF0aW5nID0gc3RyX3JlcGxhY2VfYWxsKFJhdGluZywgIiciLCAiIiksDQogICAgICAgICAgICAgICAgUG90ZW50aWFsLlJhdGluZyA9IHN0cl9yZXBsYWNlX2FsbChQb3RlbnRpYWwuUmF0aW5nLCAiJyIsICIiKSwNCiAgICAgICAgICAgICAgICBCdWlsZGluZy5UeXBlMSA9IHN0cl9yZXBsYWNlX2FsbChCdWlsZGluZy5UeXBlMSwgIiciLCAiIikNCiAgICAgICAgKSANCiAgICAgICAgIyU+JQ0KICAgICAgICAjd3JpdGVfY3N2KCJkYXRhX3YyXFxidWlsZGluZ3MuY3N2IikNCiAgICAgICAgDQp0YmwuYnVpbGRpbmdzDQojcmVhZF9jc3YoImRhdGFfdjJcXGJ1aWxkaW5ncy5jc3YiKSAlPiUgdGFpbCgpDQoNCmBgYA0KDQoNCiMgUmV2ZXJzZSBnZW9jb2RlIA0KDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KDQojIHRibC5idWlsZGluZ3MuYmluZyA8LQ0KIyAgICAgICAgIHRibC5idWlsZGluZ3MgJT4lDQojICAgICAgICAgbWFwMl9kZigueCA9IGFzLmxpc3QoLiRMbmcpLCANCiMgICAgICAgICAgICAgICAgIC55ID0gYXMubGlzdCguJExhdCksIA0KIyAgICAgICAgICAgICAgICAgLmYgPSByZXZnZW8sIA0KIyAgICAgICAgICAgICAgICAgb3V0cHV0ID0gImZyYW1lIiwgDQojICAgICAgICAgICAgICAgICBwcm92aWRlciA9ICJiaW5nIiwNCiMgICAgICAgICAgICAgICAgIEFQSSA9ICIuLi5Zb3VyIGFjY2VzcyBjb2RlIGdvZXMgaGVyZS4uLiINCiMgICAgICAgICApICU+JQ0KIyAgICAgICAgIGJpbmRfY29scyh0YmwuYnVpbGRpbmdzDQojICAgICAgICAgKQ0KDQojd3JpdGVfY3N2KHRibC5idWlsZGluZ3MuYmluZywgImRhdGFfdjJcXGJ1aWxkaW5ncy1iaW5nLmNzdiIpDQoNCnRibC5idWlsZGluZ3MuYmluZyA8LSByZWFkX2NzdigiZGF0YV92MlxcYnVpbGRpbmdzLWJpbmcuY3N2IikNCnRibC5idWlsZGluZ3MuYmluZyAlPiUgdGFpbCgpDQpgYGANCg0KIyBUcmFuc2Zvcm0gdG8gU0VEQSBkYg0KDQpgYGB7cn0NCnN0YXRlLnJlZ2lvbiA8LQ0KICAgICAgICByZWFkX2NzdigiZGF0YV92Mlxcc3RhdGUtcmVnaW9uLmNzdiIpICU+JQ0KICAgICAgICBtdXRhdGUoDQogICAgICAgICAgICAgICAgcmVnaW9uID0gYXNfZmFjdG9yKHJlZ2lvbiksDQogICAgICAgICAgICAgICAgc3RhdGUgPSBhc19mYWN0b3Ioc3RhdGUpDQogICAgICAgICkNCg0KDQpzZWRhX2RiIDwtIHJlYWRfY3N2KCJkYXRhX3YyXFxidWlsZGluZ3MtYmluZy5jc3YiKSAlPiUNCiAgICAgICAgbXV0YXRlKA0KICAgICAgICAgICAgICAgIGNpdHkgPSBhc19mYWN0b3IoY2l0eSksDQogICAgICAgICAgICAgICAgc3RhdGUgPSBhc19mYWN0b3Ioc3RhdGUpLA0KICAgICAgICAgICAgICAgIEJ1aWxkaW5nLlR5cGUxID0gYXNfZmFjdG9yKEJ1aWxkaW5nLlR5cGUxKQ0KICAgICAgICApICU+JQ0KICAgICAgICBpbm5lcl9qb2luKHN0YXRlLnJlZ2lvbiwgYnkgPSAic3RhdGUiKSAlPiUNCiAgICAgICAgbXV0YXRlKHN0YXRlID0gYXNfZmFjdG9yKHN0YXRlKSkNCg0KI3dyaXRlX3JkcyhzZWRhX2RiLCAiZGF0YV92Mlxcc2VkYS1kYi5yZHMiKQ0KDQpzZWRhX2RiDQojcmVhZF9yZHMoImRhdGFfdjJcXHNlZGEtZGIucmRzIikNCmBgYA0KDQojIEdyb3VwIGJ5IGRldmVsb3BtZW50IHJlZ2lvbnMNCg0KYGBge3J9DQoNCnRibC5idWlsZGluZ3MucmVnaW9uIDwtDQogICAgICAgIHN0YXRlLnJlZ2lvbiAlPiUNCiAgICAgICAgaW5uZXJfam9pbih0YmwuYnVpbGRpbmdzLmJpbmcsIGJ5ID0gInN0YXRlIikNCg0KdGJsLmJ1aWxkaW5ncy5yZWdpb24gJT4lDQogICAgICAgIGdyb3VwX2J5KHJlZ2lvbikgJT4lDQogICAgICAgIHN1bW1hcmlzZShuID0gbigpKSAlPiUNCiAgICAgICAgdW5ncm91cCgpICU+JQ0KICAgICAgICBiaW5kX2NvbHMoDQogICAgICAgICAgICAgICAgbG5nUmVkID0gYygyMy40MzExODksIDI2Ljk0NjE5MSwgMjcuNDAzMTMxLCAyNS40MzkxODEsIDI1LjQ2NDA1NCwgMjMuMTE1OTA4KQ0KICAgICAgICApICU+JQ0KICAgICAgICBiaW5kX2NvbHMoDQogICAgICAgICAgICAgICAgbGF0UmVkID0gYyg0Mi4xODQwODksIDQyLjM2MDEwNywgNDMuNDc3MTI0LCA0My4xNzkzODQsIDQxLjg0NjQ0MywgNDMuNjA0MzY1KQ0KICAgICAgICApIA0KICAgICAgICAjJT4lDQogICAgICAgICN3cml0ZS5jc3YoImRhdGFfdjJcXGJ1aWxkaW5ncy1yZWQuY3N2IikNCg0KI3JlYWQuY3N2KCJkYXRhX3YyXFxidWlsZGluZ3MtcmVkLmNzdiIpICU+JSBzZWxlY3QoLVgpDQpgYGANCg0KIyBDb25zb2xpZGF0ZWQgdmlldyBvZiAgSW52ZXN0bWVudHM7IFNhdmluZ3M7IEFyZWENCg0KYGBge3J9DQp0YmwuYnVpbGRpbmdzLnJlZ2lvbiAlPiUNCiAgICAgICAgc3VtbWFyaXplKA0KICAgICAgICAgICAgICAgIEludmVzdG1lbnQgPSBzdW0oSW52ZXN0bWVudCwgbmEucm0gPSBUUlVFKS8xMDAwMDAwLA0KICAgICAgICAgICAgICAgIFNhdmluZ3MgPSBzdW0oRW5lcmd5LlNhdmluZ3MsIG5hLnJtID0gVFJVRSkvMTAwMDAwMCwNCiAgICAgICAgICAgICAgICBBcmVhID0gc3VtKEFyZWEsIG5hLnJtID0gVFJVRSkvMTAwMDAwMA0KICAgICAgICApICU+JQ0KICAgICAgICB1bmdyb3VwKCkgDQogICAgICAgICMlPiUNCiAgICAgICAgI3dyaXRlLmNzdigiZGF0YV92MlxcYnVpbGRpbmdzLWFnZ3JlZy5jc3YiKQ0KDQojcmVhZC5jc3YoImRhdGFfdjJcXGJ1aWxkaW5ncy1hZ2dyZWcuY3N2IikgJT4lIHNlbGVjdCgtWCkNCmBgYA0KDQo=