# A price analysis of coffee

- cpc = cost per cup (18gr of coffee per cup)
- cpc_250g = cost per cup when buying 250g bag (13.88 cups per bag)
- cpc_1kg = cost per cup when buying 1kg bag (55.55 cups per bag)

All prices in €, as of march 1, 2024, as found on the brand websites.

11 shops, 86 coffees in total

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

df = pd.read_csv("coffee.csv")

#convert euros to float
df['250gr'] = df['250gr'].replace('[^\d.]', '', regex=True).astype(float)
df['1kg'] = df['1kg'].replace('[^\d.]', '', regex=True).astype(float)
df['cpc_250g'] = df['cpc_250g'].replace('[^\d.]', '', regex=True).astype(float)
df['cpc_1kg'] = df['cpc_1kg'].replace('[^\d.]', '', regex=True).astype(float)

df


Unnamed: 0,shop,name,url,country,type,process,notes,250gr,1kg,cpc_250g,cpc_1kg
0,coffee circle,house blend,https://www.coffeecircle.com/en/k/house-blend-...,"brazil, laos, colombia, congo",blend,natural,"Characterful | Marzipan, Nougat",9.9,33.9,0.71,0.61
1,coffee circle,sidamo,https://www.coffeecircle.com/en/k/sidamo-espresso,ethiopia,so,natural,"Lively | Caramel, Dark Berries",10.9,36.9,0.78,0.66
2,coffee circle,cerrado,https://www.coffeecircle.com/en/k/cerrado,brazil,so,natural,"Velvety | Hazelnut, Sweet Fruits",10.9,36.9,0.78,0.66
3,coffee circle,tiga terra omniroast,https://www.coffeecircle.com/en/k/tiga-terra,"brazil, guinea, indonesia",blend,natural,"Full-bodied | Chocolate, Walnut",11.9,40.9,0.86,0.74
4,coffee circle,yirga santos,https://www.coffeecircle.com/en/k/yirga-santos...,"india, brazil, ethiopia",blend,(mixed),"Strong | Chocolate, Brown Sugar",9.9,33.9,0.71,0.61
5,coffee circle,pacas,https://www.coffeecircle.com/en/k/pacas,el salvador,so,washed,"Balanced | Hazelnut, Nougat",14.9,50.9,1.07,0.92
6,coffee circle,feliciano castillo,https://www.coffeecircle.com/en/k/feliciano-ca...,honduras,so,washed,"Velvety | Dark Chocolate, Pecan Nut",14.9,50.9,1.07,0.92
7,coffee circle,espresso chire,https://www.coffeecircle.com/en/k/chire-espresso,ethiopia,so,natural,"Velvety | Lime, Strawberry",14.9,50.9,1.07,0.92
8,coffee circle,grano gayo,https://www.coffeecircle.com/en/k/grano-gayo-e...,"peru, indonesia, colombia",blend,washed,"Strong | Brown Sugar, Tiramisu",10.9,36.9,0.78,0.66
9,coffee circle,francisco lopez,https://www.coffeecircle.com/en/k/francisco-lo...,guatemala,so,washed,"Creamy | Almond, Chocolate",14.9,50.9,1.07,0.92


# Country count


In [2]:
split_countries = df['country'].str.split(',')
all_countries = [country.strip().title() for sublist in split_countries.dropna() for country in sublist]
country_counts = pd.Series(all_countries).value_counts()
country_counts = country_counts.reset_index()
#country_counts

In [3]:
df_singleorigin = df[df['type'] == 'so']
fig = px.box(df_singleorigin, x='country', y='cpc_1kg', title='Distribution of Coffee Prices by Country (single origin only)')
fig.update_layout(xaxis_title='country', yaxis_title='cpc_1kg', xaxis_tickangle=-45)
fig.show()

# Brand: average price per brand
cost per cup when buying 1kg bags

just checking which brands are more expensive, on average

In [4]:
shop_avg = df.groupby('shop')[['cpc_250g','cpc_1kg']].mean().round(2)
shop_avg = shop_avg.sort_values(by='cpc_1kg')
shop_avg = shop_avg.reset_index()
#shop_avg.dtypes

fig = px.bar(shop_avg, x='shop', y='cpc_1kg', title='Average cost per cup (when buying 1kg bags)', text_auto=True)
fig.show()

# Type: Single origin vs blend

Single origin tends to be more expensive than blends, but how much more?

In [5]:
types = df.groupby('type')[['cpc_250g','cpc_1kg']].mean()
types = types.sort_values(by='cpc_250g')
types = types.reset_index()
types

Unnamed: 0,type,cpc_250g,cpc_1kg
0,blend,0.7735,0.676
1,so,1.049545,0.924545


In [6]:
df_type = df
fig = px.box(df_type, x='type', y='cpc_1kg', title='Single origin vs blend')
fig.update_layout(xaxis_title='type', yaxis_title='cpc_1kg')
fig.show()

In [7]:
mean_price_per_type = types.groupby('type')['cpc_250g'].mean()
blend_price = mean_price_per_type['blend']
so_price = mean_price_per_type['so']
percentage_difference = ((so_price - blend_price) / blend_price) * 100
print("Single origin is {:.2f}% more expensive than Blend, on average.".format(percentage_difference))

Single origin is 35.69% more expensive than Blend, on average.


# Process
- Natural Process: Coffee cherries are dried whole, allowing the fruit to ferment around the bean, imparting fruity flavors.
- Washed Process: Coffee cherries are pulped to remove the fruit, then fermented in water to remove mucilage before drying, resulting in a clean cup with bright acidity.
- Honey Process: Coffee cherries are pulped but some or all of the mucilage is left on the beans during drying, leading to varying degrees of sweetness and body.
- Anaerobic Process: Coffee cherries are placed in a sealed container to ferment in the absence of oxygen, resulting in unique and complex flavor profiles due to controlled fermentation conditions.

Natural and washed are by far the most common. Unless explicitely mentioned, all blends are usually a combination of natural and washed, and marked as (mixed).

In [8]:
process = df.groupby('process').agg({'process': 'count', 'cpc_250g': 'mean', 'cpc_1kg': 'mean'})
process.rename(columns={'process': 'count'}, inplace=True)
process = process.sort_values(by='count', ascending=False)
process = process.reset_index()
process

Unnamed: 0,process,count,cpc_250g,cpc_1kg
0,washed,33,1.05303,0.931212
1,natural,25,1.0468,0.9256
2,(mixed),9,0.781111,0.692222
3,honey,5,1.116,0.932
4,anaerobic,2,1.2,1.11
5,sugarcane,1,0.86,0.77


In [9]:
df_process = df[df['type'] == 'so']
fig = px.box(df_process, x='process', y='cpc_1kg', title='Distribution of Coffee Prices by process (single origin only)')
fig.update_layout(xaxis_title='process', yaxis_title='cpc_1kg', xaxis_tickangle=-45)
fig.show()