Questions:

  1. In which cities can millenials have their avocado toast AND buy a home?

  2. Was the Avocadopocalypse of 2017 real?

Some relevant columns in the dataset:

  1. Date - The date of the observation
  2. AveragePrice - the average price of a single avocado
  3. type - conventional or organic
  4. year - the year
  5. Region - the city or region of the observation
  6. Total Volume - Total number of avocados sold
  7. 4046 - Total number of avocados with PLU 4046 sold
  8. 4225 - Total number of avocados with PLU 4225 sold
  9. 4770 - Total number of avocados with PLU 4770 sold

Import libraries

# Import libraries
library(tidyverse)
library(gridExtra)
library(lubridate)

Data import and column casting

# Import dataset
avocados <- read_csv("./data/avocado.csv", col_names = TRUE)
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
  X1 = col_integer(),
  Date = col_date(format = ""),
  AveragePrice = col_double(),
  `Total Volume` = col_double(),
  `4046` = col_double(),
  `4225` = col_double(),
  `4770` = col_double(),
  `Total Bags` = col_double(),
  `Small Bags` = col_double(),
  `Large Bags` = col_double(),
  `XLarge Bags` = col_double(),
  type = col_character(),
  year = col_integer(),
  region = col_character()
)
# Convert date column to ymd format
avocados$Date <- ymd(avocados$Date)
# Strip spaces from column names
colnames(avocados) <- tolower(
    gsub(" ", "",
        colnames(avocados)))

Average price of avocados per year

# Select average price and group by year
# Remove above 99th percentile of average price
avg_avocados <- avocados %>%
  select(averageprice,year) %>%
  group_by(year) %>%
  filter(averageprice < quantile(averageprice, 0.99))
# Box plot of average avocado price per year
avg_avocado_box <- ggplot(avg_avocados, aes(factor(year), averageprice)) + geom_boxplot(aes(fill = factor(year)), width = 0.75) + labs(title="Average Price of Avocados Per Year", x="Year", y="Price ($)") + scale_fill_brewer(palette="Greens")
print(avg_avocado_box)

Compare organic to conventional avocados

# Group avocado average prices by organic or conventional
# Remove above 95th percentile
av_types <- avocados %>%
  select(averageprice,type) %>%
  group_by(type) %>%
  filter(averageprice < quantile(averageprice, 0.95))
# Box plot 
type_avocado_box <- ggplot(av_types, aes(factor(type), averageprice)) + geom_boxplot(aes(fill = factor(type)), width = 0.5, outlier.colour = "#006600", outlier.size = 1, outlier.shape = 10, outlier.stroke = 2) + labs(title="Average Price of Avocados: Organic vs Conventional", x="Type", y="Price ($)") + scale_fill_brewer(palette="Greens")
print(type_avocado_box)

Average price of avocados per region

sorted <- avocados %>% 
  group_by(region) %>%
  mutate(avgvolume = mean(totalvolume))
plot <- ggplot(data=sorted, aes(x=reorder(as.factor(region), -avgvolume), y=averageprice)) +
  geom_point(aes(color=month(date), alpha=0.1, size=totalvolume)) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  guides(size = "none") +
  guides(alpha = "none")
print(plot)

LS0tDQp0aXRsZTogIkF2b2NhZG9zIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMgUXVlc3Rpb25zOg0KMS4gSW4gd2hpY2ggY2l0aWVzIGNhbiBtaWxsZW5pYWxzIGhhdmUgdGhlaXIgYXZvY2FkbyB0b2FzdCBBTkQgYnV5IGEgaG9tZT8NCg0KMi4gV2FzIHRoZSBBdm9jYWRvcG9jYWx5cHNlIG9mIDIwMTcgcmVhbD8NCg0KIyBTb21lIHJlbGV2YW50IGNvbHVtbnMgaW4gdGhlIGRhdGFzZXQ6DQoNCjEuIERhdGUgLSBUaGUgZGF0ZSBvZiB0aGUgb2JzZXJ2YXRpb24NCjIuIEF2ZXJhZ2VQcmljZSAtIHRoZSBhdmVyYWdlIHByaWNlIG9mIGEgc2luZ2xlIGF2b2NhZG8NCjMuIHR5cGUgLSBjb252ZW50aW9uYWwgb3Igb3JnYW5pYw0KNC4geWVhciAtIHRoZSB5ZWFyDQo1LiBSZWdpb24gLSB0aGUgY2l0eSBvciByZWdpb24gb2YgdGhlIG9ic2VydmF0aW9uDQo2LiBUb3RhbCBWb2x1bWUgLSBUb3RhbCBudW1iZXIgb2YgYXZvY2Fkb3Mgc29sZA0KNy4gNDA0NiAtIFRvdGFsIG51bWJlciBvZiBhdm9jYWRvcyB3aXRoIFBMVSA0MDQ2IHNvbGQNCjguIDQyMjUgLSBUb3RhbCBudW1iZXIgb2YgYXZvY2Fkb3Mgd2l0aCBQTFUgNDIyNSBzb2xkDQo5LiA0NzcwIC0gVG90YWwgbnVtYmVyIG9mIGF2b2NhZG9zIHdpdGggUExVIDQ3NzAgc29sZA0KDQojIyBJbXBvcnQgbGlicmFyaWVzDQoNCmBgYHtyIGltcG9ydGxpYnJhcmllc30NCg0KIyBJbXBvcnQgbGlicmFyaWVzDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkoZ3JpZEV4dHJhKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpgYGANCg0KDQojIyBEYXRhIGltcG9ydCBhbmQgY29sdW1uIGNhc3RpbmcNCg0KYGBge3IgZGF0YSBjbGVhbmluZ30NCg0KIyBJbXBvcnQgZGF0YXNldA0KYXZvY2Fkb3MgPC0gcmVhZF9jc3YoIi4vZGF0YS9hdm9jYWRvLmNzdiIsIGNvbF9uYW1lcyA9IFRSVUUpDQoNCiMgQ29udmVydCBkYXRlIGNvbHVtbiB0byB5bWQgZm9ybWF0DQphdm9jYWRvcyREYXRlIDwtIHltZChhdm9jYWRvcyREYXRlKQ0KDQojIFN0cmlwIHNwYWNlcyBmcm9tIGNvbHVtbiBuYW1lcywgbWFrZSBsb3dlcmNhc2UNCmNvbG5hbWVzKGF2b2NhZG9zKSA8LSB0b2xvd2VyKA0KICAgIGdzdWIoIiAiLCAiIiwNCiAgICAgICAgY29sbmFtZXMoYXZvY2Fkb3MpKSkNCmBgYA0KDQojIyBBdmVyYWdlIHByaWNlIG9mIGF2b2NhZG9zIHBlciB5ZWFyDQoNCmBgYHtyIGF2Z3BlcnllYXJ9DQoNCiMgU2VsZWN0IGF2ZXJhZ2UgcHJpY2UgYW5kIGdyb3VwIGJ5IHllYXINCiMgUmVtb3ZlIGFib3ZlIDk5dGggcGVyY2VudGlsZSBvZiBhdmVyYWdlIHByaWNlDQoNCmF2Z19hdm9jYWRvcyA8LSBhdm9jYWRvcyAlPiUNCiAgc2VsZWN0KGF2ZXJhZ2VwcmljZSx5ZWFyKSAlPiUNCiAgZ3JvdXBfYnkoeWVhcikgJT4lDQogIGZpbHRlcihhdmVyYWdlcHJpY2UgPCBxdWFudGlsZShhdmVyYWdlcHJpY2UsIDAuOTkpKQ0KDQojIEJveCBwbG90IG9mIGF2ZXJhZ2UgYXZvY2FkbyBwcmljZSBwZXIgeWVhcg0KDQphdmdfYXZvY2Fkb19ib3ggPC0gZ2dwbG90KGF2Z19hdm9jYWRvcywgYWVzKGZhY3Rvcih5ZWFyKSwgYXZlcmFnZXByaWNlKSkgKyBnZW9tX2JveHBsb3QoYWVzKGZpbGwgPSBmYWN0b3IoeWVhcikpLCB3aWR0aCA9IDAuNzUpICsgbGFicyh0aXRsZT0iQXZlcmFnZSBQcmljZSBvZiBBdm9jYWRvcyBQZXIgWWVhciIsIHg9IlllYXIiLCB5PSJQcmljZSAoJCkiKSArIHNjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9IkdyZWVucyIpDQoNCnByaW50KGF2Z19hdm9jYWRvX2JveCkNCg0KYGBgDQoNCiMjIENvbXBhcmUgb3JnYW5pYyB0byBjb252ZW50aW9uYWwgYXZvY2Fkb3MNCg0KDQpgYGB7ciBhdmdwZXJ0eXBlfQ0KDQojIEdyb3VwIGF2b2NhZG8gYXZlcmFnZSBwcmljZXMgYnkgb3JnYW5pYyBvciBjb252ZW50aW9uYWwNCiMgUmVtb3ZlIGFib3ZlIDk1dGggcGVyY2VudGlsZQ0KYXZfdHlwZXMgPC0gYXZvY2Fkb3MgJT4lDQogIHNlbGVjdChhdmVyYWdlcHJpY2UsdHlwZSkgJT4lDQogIGdyb3VwX2J5KHR5cGUpICU+JQ0KICBmaWx0ZXIoYXZlcmFnZXByaWNlIDwgcXVhbnRpbGUoYXZlcmFnZXByaWNlLCAwLjk1KSkNCg0KIyBCb3ggcGxvdCANCnR5cGVfYXZvY2Fkb19ib3ggPC0gZ2dwbG90KGF2X3R5cGVzLCBhZXMoZmFjdG9yKHR5cGUpLCBhdmVyYWdlcHJpY2UpKSArIGdlb21fYm94cGxvdChhZXMoZmlsbCA9IGZhY3Rvcih0eXBlKSksIHdpZHRoID0gMC41LCBvdXRsaWVyLmNvbG91ciA9ICIjMDA2NjAwIiwgb3V0bGllci5zaXplID0gMSwgb3V0bGllci5zaGFwZSA9IDEwLCBvdXRsaWVyLnN0cm9rZSA9IDIpICsgbGFicyh0aXRsZT0iQXZlcmFnZSBQcmljZSBvZiBBdm9jYWRvczogT3JnYW5pYyB2cyBDb252ZW50aW9uYWwiLCB4PSJUeXBlIiwgeT0iUHJpY2UgKCQpIikgKyBzY2FsZV9maWxsX2JyZXdlcihwYWxldHRlPSJHcmVlbnMiKQ0KDQpwcmludCh0eXBlX2F2b2NhZG9fYm94KQ0KDQpgYGANCg0KIyMgQXZlcmFnZSBwcmljZSBvZiBhdm9jYWRvcyBwZXIgcmVnaW9uDQoNCmBgYHtyIGF2Z3ByaWNlcGVycmVnaW9ufQ0Kc29ydGVkIDwtIGF2b2NhZG9zICU+JSANCiAgZ3JvdXBfYnkocmVnaW9uKSAlPiUNCiAgbXV0YXRlKGF2Z3ZvbHVtZSA9IG1lYW4odG90YWx2b2x1bWUpKQ0KDQpwbG90IDwtIGdncGxvdChkYXRhPXNvcnRlZCwgYWVzKHg9cmVvcmRlcihhcy5mYWN0b3IocmVnaW9uKSwgLWF2Z3ZvbHVtZSksIHk9YXZlcmFnZXByaWNlKSkgKw0KICBnZW9tX3BvaW50KGFlcyhjb2xvcj1tb250aChkYXRlKSwgYWxwaGE9MC4xLCBzaXplPXRvdGFsdm9sdW1lKSkgKw0KICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCBoanVzdCA9IDEpKSArDQogIGd1aWRlcyhzaXplID0gIm5vbmUiKSArDQogIGd1aWRlcyhhbHBoYSA9ICJub25lIikNCg0KcHJpbnQocGxvdCkNCmBgYA0K