Some relevant columns in the dataset:
- Date - The date of the observation
- AveragePrice - the average price of a single avocado
- type - conventional or organic
- year - the year
- Region - the city or region of the observation
- Total Volume - Total number of avocados sold
- 4046 - Total number of avocados with PLU 4046 sold
- 4225 - Total number of avocados with PLU 4225 sold
- 4770 - Total number of avocados with PLU 4770 sold
Import libraries
# Import libraries
library(tidyverse)
library(gridExtra)
library(lubridate)
Data import and column casting
# Import dataset
avocados <- read_csv("./data/avocado.csv", col_names = TRUE)
Missing column names filled in: 'X1' [1]Parsed with column specification:
cols(
X1 = col_integer(),
Date = col_date(format = ""),
AveragePrice = col_double(),
`Total Volume` = col_double(),
`4046` = col_double(),
`4225` = col_double(),
`4770` = col_double(),
`Total Bags` = col_double(),
`Small Bags` = col_double(),
`Large Bags` = col_double(),
`XLarge Bags` = col_double(),
type = col_character(),
year = col_integer(),
region = col_character()
)
# Convert date column to ymd format
avocados$Date <- ymd(avocados$Date)
# Strip spaces from column names
colnames(avocados) <- tolower(
gsub(" ", "",
colnames(avocados)))
Average price of avocados per year
# Select average price and group by year
# Remove above 99th percentile of average price
avg_avocados <- avocados %>%
select(averageprice,year) %>%
group_by(year) %>%
filter(averageprice < quantile(averageprice, 0.99))
# Box plot of average avocado price per year
avg_avocado_box <- ggplot(avg_avocados, aes(factor(year), averageprice)) + geom_boxplot(aes(fill = factor(year)), width = 0.75) + labs(title="Average Price of Avocados Per Year", x="Year", y="Price ($)") + scale_fill_brewer(palette="Greens")
print(avg_avocado_box)

Compare organic to conventional avocados
# Group avocado average prices by organic or conventional
# Remove above 95th percentile
av_types <- avocados %>%
select(averageprice,type) %>%
group_by(type) %>%
filter(averageprice < quantile(averageprice, 0.95))
# Box plot
type_avocado_box <- ggplot(av_types, aes(factor(type), averageprice)) + geom_boxplot(aes(fill = factor(type)), width = 0.5, outlier.colour = "#006600", outlier.size = 1, outlier.shape = 10, outlier.stroke = 2) + labs(title="Average Price of Avocados: Organic vs Conventional", x="Type", y="Price ($)") + scale_fill_brewer(palette="Greens")
print(type_avocado_box)

Average price of avocados per region
sorted <- avocados %>%
group_by(region) %>%
mutate(avgvolume = mean(totalvolume))
plot <- ggplot(data=sorted, aes(x=reorder(as.factor(region), -avgvolume), y=averageprice)) +
geom_point(aes(color=month(date), alpha=0.1, size=totalvolume)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
guides(size = "none") +
guides(alpha = "none")
print(plot)

LS0tDQp0aXRsZTogIkF2b2NhZG9zIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KIyMgUXVlc3Rpb25zOg0KMS4gSW4gd2hpY2ggY2l0aWVzIGNhbiBtaWxsZW5pYWxzIGhhdmUgdGhlaXIgYXZvY2FkbyB0b2FzdCBBTkQgYnV5IGEgaG9tZT8NCg0KMi4gV2FzIHRoZSBBdm9jYWRvcG9jYWx5cHNlIG9mIDIwMTcgcmVhbD8NCg0KIyBTb21lIHJlbGV2YW50IGNvbHVtbnMgaW4gdGhlIGRhdGFzZXQ6DQoNCjEuIERhdGUgLSBUaGUgZGF0ZSBvZiB0aGUgb2JzZXJ2YXRpb24NCjIuIEF2ZXJhZ2VQcmljZSAtIHRoZSBhdmVyYWdlIHByaWNlIG9mIGEgc2luZ2xlIGF2b2NhZG8NCjMuIHR5cGUgLSBjb252ZW50aW9uYWwgb3Igb3JnYW5pYw0KNC4geWVhciAtIHRoZSB5ZWFyDQo1LiBSZWdpb24gLSB0aGUgY2l0eSBvciByZWdpb24gb2YgdGhlIG9ic2VydmF0aW9uDQo2LiBUb3RhbCBWb2x1bWUgLSBUb3RhbCBudW1iZXIgb2YgYXZvY2Fkb3Mgc29sZA0KNy4gNDA0NiAtIFRvdGFsIG51bWJlciBvZiBhdm9jYWRvcyB3aXRoIFBMVSA0MDQ2IHNvbGQNCjguIDQyMjUgLSBUb3RhbCBudW1iZXIgb2YgYXZvY2Fkb3Mgd2l0aCBQTFUgNDIyNSBzb2xkDQo5LiA0NzcwIC0gVG90YWwgbnVtYmVyIG9mIGF2b2NhZG9zIHdpdGggUExVIDQ3NzAgc29sZA0KDQojIyBJbXBvcnQgbGlicmFyaWVzDQoNCmBgYHtyIGltcG9ydGxpYnJhcmllc30NCg0KIyBJbXBvcnQgbGlicmFyaWVzDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkoZ3JpZEV4dHJhKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpgYGANCg0KDQojIyBEYXRhIGltcG9ydCBhbmQgY29sdW1uIGNhc3RpbmcNCg0KYGBge3IgZGF0YSBjbGVhbmluZ30NCg0KIyBJbXBvcnQgZGF0YXNldA0KYXZvY2Fkb3MgPC0gcmVhZF9jc3YoIi4vZGF0YS9hdm9jYWRvLmNzdiIsIGNvbF9uYW1lcyA9IFRSVUUpDQoNCiMgQ29udmVydCBkYXRlIGNvbHVtbiB0byB5bWQgZm9ybWF0DQphdm9jYWRvcyREYXRlIDwtIHltZChhdm9jYWRvcyREYXRlKQ0KDQojIFN0cmlwIHNwYWNlcyBmcm9tIGNvbHVtbiBuYW1lcywgbWFrZSBsb3dlcmNhc2UNCmNvbG5hbWVzKGF2b2NhZG9zKSA8LSB0b2xvd2VyKA0KICAgIGdzdWIoIiAiLCAiIiwNCiAgICAgICAgY29sbmFtZXMoYXZvY2Fkb3MpKSkNCmBgYA0KDQojIyBBdmVyYWdlIHByaWNlIG9mIGF2b2NhZG9zIHBlciB5ZWFyDQoNCmBgYHtyIGF2Z3BlcnllYXJ9DQoNCiMgU2VsZWN0IGF2ZXJhZ2UgcHJpY2UgYW5kIGdyb3VwIGJ5IHllYXINCiMgUmVtb3ZlIGFib3ZlIDk5dGggcGVyY2VudGlsZSBvZiBhdmVyYWdlIHByaWNlDQoNCmF2Z19hdm9jYWRvcyA8LSBhdm9jYWRvcyAlPiUNCiAgc2VsZWN0KGF2ZXJhZ2VwcmljZSx5ZWFyKSAlPiUNCiAgZ3JvdXBfYnkoeWVhcikgJT4lDQogIGZpbHRlcihhdmVyYWdlcHJpY2UgPCBxdWFudGlsZShhdmVyYWdlcHJpY2UsIDAuOTkpKQ0KDQojIEJveCBwbG90IG9mIGF2ZXJhZ2UgYXZvY2FkbyBwcmljZSBwZXIgeWVhcg0KDQphdmdfYXZvY2Fkb19ib3ggPC0gZ2dwbG90KGF2Z19hdm9jYWRvcywgYWVzKGZhY3Rvcih5ZWFyKSwgYXZlcmFnZXByaWNlKSkgKyBnZW9tX2JveHBsb3QoYWVzKGZpbGwgPSBmYWN0b3IoeWVhcikpLCB3aWR0aCA9IDAuNzUpICsgbGFicyh0aXRsZT0iQXZlcmFnZSBQcmljZSBvZiBBdm9jYWRvcyBQZXIgWWVhciIsIHg9IlllYXIiLCB5PSJQcmljZSAoJCkiKSArIHNjYWxlX2ZpbGxfYnJld2VyKHBhbGV0dGU9IkdyZWVucyIpDQoNCnByaW50KGF2Z19hdm9jYWRvX2JveCkNCg0KYGBgDQoNCiMjIENvbXBhcmUgb3JnYW5pYyB0byBjb252ZW50aW9uYWwgYXZvY2Fkb3MNCg0KDQpgYGB7ciBhdmdwZXJ0eXBlfQ0KDQojIEdyb3VwIGF2b2NhZG8gYXZlcmFnZSBwcmljZXMgYnkgb3JnYW5pYyBvciBjb252ZW50aW9uYWwNCiMgUmVtb3ZlIGFib3ZlIDk1dGggcGVyY2VudGlsZQ0KYXZfdHlwZXMgPC0gYXZvY2Fkb3MgJT4lDQogIHNlbGVjdChhdmVyYWdlcHJpY2UsdHlwZSkgJT4lDQogIGdyb3VwX2J5KHR5cGUpICU+JQ0KICBmaWx0ZXIoYXZlcmFnZXByaWNlIDwgcXVhbnRpbGUoYXZlcmFnZXByaWNlLCAwLjk1KSkNCg0KIyBCb3ggcGxvdCANCnR5cGVfYXZvY2Fkb19ib3ggPC0gZ2dwbG90KGF2X3R5cGVzLCBhZXMoZmFjdG9yKHR5cGUpLCBhdmVyYWdlcHJpY2UpKSArIGdlb21fYm94cGxvdChhZXMoZmlsbCA9IGZhY3Rvcih0eXBlKSksIHdpZHRoID0gMC41LCBvdXRsaWVyLmNvbG91ciA9ICIjMDA2NjAwIiwgb3V0bGllci5zaXplID0gMSwgb3V0bGllci5zaGFwZSA9IDEwLCBvdXRsaWVyLnN0cm9rZSA9IDIpICsgbGFicyh0aXRsZT0iQXZlcmFnZSBQcmljZSBvZiBBdm9jYWRvczogT3JnYW5pYyB2cyBDb252ZW50aW9uYWwiLCB4PSJUeXBlIiwgeT0iUHJpY2UgKCQpIikgKyBzY2FsZV9maWxsX2JyZXdlcihwYWxldHRlPSJHcmVlbnMiKQ0KDQpwcmludCh0eXBlX2F2b2NhZG9fYm94KQ0KDQpgYGANCg0KIyMgQXZlcmFnZSBwcmljZSBvZiBhdm9jYWRvcyBwZXIgcmVnaW9uDQoNCmBgYHtyIGF2Z3ByaWNlcGVycmVnaW9ufQ0Kc29ydGVkIDwtIGF2b2NhZG9zICU+JSANCiAgZ3JvdXBfYnkocmVnaW9uKSAlPiUNCiAgbXV0YXRlKGF2Z3ZvbHVtZSA9IG1lYW4odG90YWx2b2x1bWUpKQ0KDQpwbG90IDwtIGdncGxvdChkYXRhPXNvcnRlZCwgYWVzKHg9cmVvcmRlcihhcy5mYWN0b3IocmVnaW9uKSwgLWF2Z3ZvbHVtZSksIHk9YXZlcmFnZXByaWNlKSkgKw0KICBnZW9tX3BvaW50KGFlcyhjb2xvcj1tb250aChkYXRlKSwgYWxwaGE9MC4xLCBzaXplPXRvdGFsdm9sdW1lKSkgKw0KICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChhbmdsZSA9IDkwLCBoanVzdCA9IDEpKSArDQogIGd1aWRlcyhzaXplID0gIm5vbmUiKSArDQogIGd1aWRlcyhhbHBoYSA9ICJub25lIikNCg0KcHJpbnQocGxvdCkNCmBgYA0K