diff --git a/.Rhistory b/.Rhistory index e8e0eb5..4e58f75 100644 --- a/.Rhistory +++ b/.Rhistory @@ -2,8 +2,8 @@ import ibis from ibis import _ import ibis.selectors as s from plotnine import * -sakura_first_bloom_dates = ibis.read_csv("https://raw.githubusercontent.com/christophscheuch/data-viz-japan/main/data/sakura_first_bloom_dates.csv") -sakura_full_bloom_dates = ibis.read_csv("https://raw.githubusercontent.com/christophscheuch/data-viz-japan/main/data/sakura_full_bloom_dates.csv") +sakura_first_bloom_dates = ibis.read_csv("data/sakura_first_bloom_dates.csv") +sakura_full_bloom_dates = ibis.read_csv("data/sakura_full_bloom_dates.csv") first_bloom_long = (sakura_first_bloom_dates .drop("30 Year Average 1981-2010", "Notes") .rename(location = "Site Name", is_currently_observed = "Currently Being Observed") @@ -17,23 +17,22 @@ full_bloom_long = (sakura_full_bloom_dates sakura_dates = (first_bloom_long.outer_join(full_bloom_long, ["location", "year", "is_currently_observed"]) .select(~s.contains("_right")) .mutate(year = _.year.cast("int32")) -.mutate(full_bloom_day = _.full_bloom - ibis.date(_.year.cast('string') + '-01-01'), -time_to_full_bloom = _.full_bloom - _.first_bloom) +.mutate(days_to_full_bloom = (_.full_bloom - ibis.date(_.year.cast('string') + '-01-01')).cast('interval("D")').cast("int32"), +days_from_first_to_full_bloom = _.full_bloom - _.first_bloom) ) -locations_regions = ibis.read_csv("https://raw.githubusercontent.com/christophscheuch/data-viz-japan/main/data/locations_region.csv") +locations_regions = ibis.read_csv("data/locations_region.csv") southern_islands = ["Naze", "Ishigaki Island", "Miyakojima", "Naha", "Minami Daito Island"] -locations_regions = locations_regions.mutate( -region=ibis.case() -.when(_.location.isin(southern_islands), "Ryukyu Islands") -.else_(_.region) -.end() +locations_regions = (locations_regions +.mutate( +region = ibis.case().when(_.location.isin(southern_islands), "Ryukyu Islands").else_(_.region).end() +) ) sakura_data = (sakura_dates .left_join(locations_regions, "location") .filter([_.is_currently_observed == True, _.year >= 1954, -_.full_bloom_day.notnull(), -_.time_to_full_bloom.notnull()]) +_.days_to_full_bloom.notnull(), +_.days_from_first_to_full_bloom.notnull()]) ) theme_set(theme_classic(base_size = 16, base_family = "Palatino") + theme(figure_size = (12, 8))) theme_update( @@ -48,80 +47,141 @@ axis_line = element_blank() colors = ["#ffb7c5", "#b7c5ff", "#c5ffb7"] breaks_year = range(1950, 2031, 20) (ggplot(sakura_data, -aes(x = "year", y = "full_bloom_day")) +aes(x = "year", y = "days_to_full_bloom")) + geom_point(color = colors[0], alpha = 0.5, size = 2) + geom_smooth(method = "lm", se = False, color = colors[1], size = 2) + facet_wrap("~region", nrow = 1) -+ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1953", ++ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1954", subtitle = "Cities in northern regions Hokkaidō and Honshū exhibit earlier full blooms, while Ryukyu Islands even later", x = None, y = None) + scale_x_continuous(breaks = breaks_year) + scale_y_continuous(breaks = range(30, 151, 30)) ) (ggplot(sakura_data, -aes(x = "year", y = "time_to_full_bloom")) -+ geom_point(color = colors[0], alpha = 0.5) -+ geom_smooth(method = "lm", se = False, +aes(x = "year", y = "days_to_full_bloom")) ++ geom_point(color = colors[0], alpha = 0.5, size = 2) ++ geom_smooth(se = False, color = colors[1], size = 2) + facet_wrap("~region", nrow = 1) -+ labs(title = "Days from blossoms opening to peak bloom for regions in Japan", -subtitle = "Hokkaidō exhibits the shortest and decreasing blooming periods, while Kyūshū's and Ryukyu's have lengthened", ++ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1954", +subtitle = "Cities in northern regions Hokkaidō and Honshū exhibit earlier full blooms, while Ryukyu Islands even later", x = None, y = None) + scale_x_continuous(breaks = breaks_year) ++ scale_y_continuous(breaks = range(30, 151, 30)) ) (ggplot(sakura_data, -aes(x = "year", y = "full_bloom_day")) +aes(x = "year", y = "days_to_full_bloom")) + geom_point(color = colors[0], alpha = 0.5, size = 2) -+ geom_smooth(method = "lm", se = False, ++ geom_smooth(se = False, span=0.3, color = colors[1], size = 2) + facet_wrap("~region", nrow = 1) -+ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1953", ++ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1954", subtitle = "Cities in northern regions Hokkaidō and Honshū exhibit earlier full blooms, while Ryukyu Islands even later", x = None, y = None) + scale_x_continuous(breaks = breaks_year) + scale_y_continuous(breaks = range(30, 151, 30)) ) (ggplot(sakura_data, -aes(x = "year", y = "full_bloom_day")) +aes(x = "year", y = "days_to_full_bloom")) + geom_point(color = colors[0], alpha = 0.5, size = 2) -+ geom_smooth(method = "lm", se = False, ++ geom_smooth(se = False, span=1, color = colors[1], size = 2) + facet_wrap("~region", nrow = 1) -+ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1953", ++ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1954", subtitle = "Cities in northern regions Hokkaidō and Honshū exhibit earlier full blooms, while Ryukyu Islands even later", x = None, y = None) + scale_x_continuous(breaks = breaks_year) ++ scale_y_continuous(breaks = range(30, 151, 30)) ) -sakura_data -sakura_first_bloom_dates = ibis.read_csv("https://raw.githubusercontent.com/christophscheuch/data-viz-japan/main/data/sakura_first_bloom_dates.csv") -sakura_full_bloom_dates = ibis.read_csv("https://raw.githubusercontent.com/christophscheuch/data-viz-japan/main/data/sakura_full_bloom_dates.csv") -first_bloom_long = (sakura_first_bloom_dates -.drop("30 Year Average 1981-2010", "Notes") -.rename(location = "Site Name", is_currently_observed = "Currently Being Observed") -.pivot_longer(s.r["1953":"2023"], names_to = "year", values_to = "first_bloom") +from plotnine import ggplot, aes, geom_point, geom_smooth, labs, theme_matplotlib, theme_set +from plotnine.data import mpg +theme_set(theme_matplotlib()) +( +ggplot(mpg, aes(x="displ", y="hwy")) ++ geom_point() ++ geom_smooth(span=0.3) ++ labs(x="displacement", y="horsepower") ) -full_bloom_long = (sakura_full_bloom_dates -.drop("30 Year Average 1981-2010", "Notes") -.rename(location = "Site Name", is_currently_observed = "Currently Being Observed") -.pivot_longer(s.r["1953":"2023"], names_to = "year", values_to = "full_bloom") +( +ggplot(mpg, aes(x="displ", y="hwy")) ++ geom_point() ++ geom_smooth() ++ labs(x="displacement", y="horsepower") ) -sakura_dates = (first_bloom_long.outer_join(full_bloom_long, ["location", "year", "is_currently_observed"]) -.select(~s.contains("_right")) -.mutate(year = _.year.cast("int32")) -.mutate(full_bloom_day = (_.full_bloom - ibis.date(_.year.cast('string') + '-01-01')).cast('interval("D")').cast("int32"), -time_to_full_bloom = _.full_bloom - _.first_bloom) +mpg +( +ggplot(mpg, aes(x="displ", y="hwy")) ++ geom_point() ++ geom_smooth() ++ facet_wrap("manufacturer") ++ labs(x="displacement", y="horsepower") ) -sakura_dates (ggplot(sakura_data, -aes(x = "year", y = "full_bloom_day")) +aes(x = "year", y = "days_to_full_bloom")) + geom_point(color = colors[0], alpha = 0.5, size = 2) -+ geom_smooth(method = "lm", se = False, ++ geom_smooth( color = colors[1], size = 2) + facet_wrap("~region", nrow = 1) -+ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1953", ++ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1954", subtitle = "Cities in northern regions Hokkaidō and Honshū exhibit earlier full blooms, while Ryukyu Islands even later", x = None, y = None) -+ scale_x_continuous(breaks = breaks_year) + -scale_y_continuous(breaks = range(30, 151, by = 30)) ++ scale_x_continuous(breaks = breaks_year) ++ scale_y_continuous(breaks = range(30, 151, 30)) +) +(ggplot(sakura_data, +aes(x = "year", y = "days_to_full_bloom")) ++ geom_point(color = colors[0], alpha = 0.5, size = 2) ++ geom_smooth(span = 0.1, +color = colors[1], size = 2) ++ facet_wrap("~region", nrow = 1) ++ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1954", +subtitle = "Cities in northern regions Hokkaidō and Honshū exhibit earlier full blooms, while Ryukyu Islands even later", +x = None, y = None) ++ scale_x_continuous(breaks = breaks_year) ++ scale_y_continuous(breaks = range(30, 151, 30)) +) +(ggplot(sakura_data, +aes(x = "year", y = "days_to_full_bloom")) ++ geom_point(color = colors[0], alpha = 0.5, size = 2) ++ geom_smooth(se = False, +color = colors[1], size = 2) ++ facet_wrap("~region", nrow = 1) ++ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1954", +subtitle = "Cities in northern regions Hokkaidō and Honshū exhibit earlier full blooms, while Ryukyu Islands even later", +x = None, y = None) ++ scale_x_continuous(breaks = breaks_year) ++ scale_y_continuous(breaks = range(30, 151, 30)) +) +(ggplot(sakura_data, +aes(x = "year", y = "days_to_full_bloom")) ++ geom_point(color = colors[0], alpha = 0.5, size = 2) ++ geom_smooth(se = False, method = "BLA", +color = colors[1], size = 2) ++ facet_wrap("~region", nrow = 1) ++ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1954", +subtitle = "Cities in northern regions Hokkaidō and Honshū exhibit earlier full blooms, while Ryukyu Islands even later", +x = None, y = None) ++ scale_x_continuous(breaks = breaks_year) ++ scale_y_continuous(breaks = range(30, 151, 30)) +) +(ggplot(sakura_data, +aes(x = "year", y = "days_to_full_bloom")) ++ geom_point(color = colors[0], alpha = 0.5, size = 2) ++ geom_smooth(se = False, method = "loess", +color = colors[1], size = 2) ++ facet_wrap("~region", nrow = 1) ++ labs(title = "Day of the year with peak cherry tree blossom for regions in Japan since 1954", +subtitle = "Cities in northern regions Hokkaidō and Honshū exhibit earlier full blooms, while Ryukyu Islands even later", +x = None, y = None) ++ scale_x_continuous(breaks = breaks_year) ++ scale_y_continuous(breaks = range(30, 151, 30)) +) +theme_set(theme_matplotlib()) +( +ggplot(mpg, aes(x="displ", y="hwy")) ++ geom_point() ++ geom_smooth() ++ facet_wrap("manufacturer") ++ labs(x="displacement", y="horsepower") ) diff --git a/assets/css/styles-global.css b/assets/css/styles-global.css index 14f556e..835c017 100644 --- a/assets/css/styles-global.css +++ b/assets/css/styles-global.css @@ -4,7 +4,7 @@ } body { - font-family: 'Fira Sans', sans-serif; + font-family: "Gill Sans", sans-serif; } .navbar { diff --git a/docs/assets/css/styles-global.css b/docs/assets/css/styles-global.css index 14f556e..835c017 100644 --- a/docs/assets/css/styles-global.css +++ b/docs/assets/css/styles-global.css @@ -4,7 +4,7 @@ } body { - font-family: 'Fira Sans', sans-serif; + font-family: "Gill Sans", sans-serif; } .navbar { diff --git a/docs/posts/classification-customer-churn/index.html b/docs/posts/classification-customer-churn/index.html index fae5641..dc35ea4 100644 --- a/docs/posts/classification-customer-churn/index.html +++ b/docs/posts/classification-customer-churn/index.html @@ -1260,7 +1260,7 @@
Tidy Collaborative Filtering: Bui -