<aside> 💡 0,5
</aside>
# calculate frequencies
absfreq = table(data[,"Insurance"])
freqSum = prop.table(absfreq)
# calculate GINI for *Insurance*
GINI_Insurance = 1 - freqSum["Yes"]^2 - freqSum["No"]^2
# print *Insurance* GINI
print(GINI_Insurance)
<aside> 💡 0
</aside>
# initialize *Customer ID*
GINI_CustomerID = 0
# create frequency tables
absfreq = table(data[,c("CustomerID", "Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))
# calculate GINI for each different value of *CustomerID*
for (i in 1:length(data[,1])) {
temp_var <- paste("GINI", i, sep = "_")
temp_val <- 1 - freq[i, "Yes"]^2 - freq[i, "No"]^2
assign(temp_var, temp_val)
}
# calculate GINI for *CustomerID*
for (i in 1:length(data[,1])) {
temp_var <- paste("GINI", i, sep = "_")
GINI_CustomerID = GINI_CustomerID + freqSum[i] * eval(as.name(temp_var))
}
# print *CustomerID* GINI
print(GINI_CustomerID)
<aside> 💡 0,48
</aside>
# create frequency tables
absfreq = table(data[,c("Sex","Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))
# calculate GINI for *Male*
GINI_M = 1 - freq["M", "No"]^2 - freq["M", "Yes"]^2
# print *Male* GINI
print(GINI_M)
<aside> 💡 0,1625
</aside>
# create frequency tables
absfreq = table(data[,c("CarType","Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))
# check the levels of CarType
levels(data$CarType)
# calculate GINI for each level of *CarType*
GINI_Family = 1 - freq["Family", "No"]^2 - freq["Family", "Yes"]^2
GINI_Sedan = 1 - freq["Sedan", "No"]^2 - freq["Sedan", "Yes"]^2
GINI_Sport = 1 - freq["Sport", "No"]^2 - freq["Sport", "Yes"]^2
# calculate GINI for *CarType*
GINI_CarType = freqSum["Family"] * GINI_Family + freqSum["Sedan"] * GINI_Sedan + freqSum["Sport"] * GINI_Sport
# print *CarType* GINI
print(GINI_CarType)
<aside> 💡 0,4914
</aside>
# create frequency tables
absfreq = table(data[,c("Budget","Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))
# check the levels of *Budget*
levels(data$Budget)
# calculate GINI for each level of *Budget*
GINI_High = 1 - freq["High", "No"]^2 - freq["High", "Yes"]^2
GINI_Low = 1 - freq["Low", "No"]^2 - freq["Low", "Yes"]^2
GINI_Medium = 1 - freq["Medium", "No"]^2 - freq["Medium", "Yes"]^2
GINI_VeryHigh = 1 - freq["VeryHigh", "No"]^2 - freq["VeryHigh", "Yes"]^2
# calculate GINI for *Budget*
GINI_Budget = freqSum["High"] * GINI_High + freqSum["Low"] * GINI_Low + freqSum["Medium"] * GINI_Medium + freqSum["VeryHigh"] * GINI_VeryHigh
# print *Budget* GINI
print(GINI_Budget)
<aside> 💡 (c) CarType
</aside>
# create frequency tables
absfreq = table(data[,c("Sex","Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))
# calculate GINI for each level of *Sex*
GINI_M = 1 - freq["M", "No"]^2 - freq["M", "Yes"]^2
GINI_F = 1 - freq["F", "No"]^2 - freq["F", "Yes"]^2
# calculate GINI for *Sex*
GINI_Sex = freqSum["M"] * GINI_M + freqSum["F"] * GINI_F
# print *Sex* GINI
print(GINI_Sex)
# create table with GINI Indexes
GINI = c(GINI_CustomerID, GINI_CarType, GINI_Budget, GINI_Sex)
# find preferable characteristic
min(GINI[GINI > 0])
## => CarType