Q2.1

<aside> 💡 0,5

</aside>

# calculate frequencies
absfreq = table(data[,"Insurance"])
freqSum = prop.table(absfreq)

# calculate GINI for *Insurance*
GINI_Insurance = 1 - freqSum["Yes"]^2 - freqSum["No"]^2

# print *Insurance* GINI
print(GINI_Insurance)

Q2.2

<aside> 💡 0

</aside>

# initialize *Customer ID*
GINI_CustomerID = 0

# create frequency tables
absfreq = table(data[,c("CustomerID", "Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))

# calculate GINI for each different value of *CustomerID*
for (i in 1:length(data[,1])) {
  temp_var <- paste("GINI", i, sep = "_")
  temp_val <- 1 - freq[i, "Yes"]^2 - freq[i, "No"]^2
  assign(temp_var, temp_val)
}

# calculate GINI for *CustomerID*
for (i in 1:length(data[,1])) {
  temp_var <- paste("GINI", i, sep = "_")
  GINI_CustomerID = GINI_CustomerID + freqSum[i] * eval(as.name(temp_var))
}

# print *CustomerID* GINI
print(GINI_CustomerID)

Q2.3

<aside> 💡 0,48

</aside>

# create frequency tables
absfreq = table(data[,c("Sex","Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))

# calculate GINI for *Male*
GINI_M = 1 - freq["M", "No"]^2 - freq["M", "Yes"]^2

# print *Male* GINI
print(GINI_M)

Q2.4

<aside> 💡 0,1625

</aside>

# create frequency tables
absfreq = table(data[,c("CarType","Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))

# check the levels of CarType
levels(data$CarType)

# calculate GINI for each level of *CarType*
GINI_Family = 1 - freq["Family", "No"]^2 - freq["Family", "Yes"]^2
GINI_Sedan = 1 - freq["Sedan", "No"]^2 - freq["Sedan", "Yes"]^2
GINI_Sport = 1 - freq["Sport", "No"]^2 - freq["Sport", "Yes"]^2

# calculate GINI for *CarType*
GINI_CarType = freqSum["Family"] * GINI_Family + freqSum["Sedan"] * GINI_Sedan + freqSum["Sport"] * GINI_Sport

# print *CarType* GINI
print(GINI_CarType)

Q2.5

<aside> 💡 0,4914

</aside>

# create frequency tables
absfreq = table(data[,c("Budget","Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))

# check the levels of *Budget*
levels(data$Budget)

# calculate GINI for each level of *Budget*
GINI_High = 1 - freq["High", "No"]^2 - freq["High", "Yes"]^2
GINI_Low = 1 - freq["Low", "No"]^2 - freq["Low", "Yes"]^2
GINI_Medium = 1 - freq["Medium", "No"]^2 - freq["Medium", "Yes"]^2
GINI_VeryHigh = 1 - freq["VeryHigh", "No"]^2 - freq["VeryHigh", "Yes"]^2

# calculate GINI for *Budget*
GINI_Budget = freqSum["High"] * GINI_High + freqSum["Low"] * GINI_Low + freqSum["Medium"] * GINI_Medium + freqSum["VeryHigh"] * GINI_VeryHigh

# print *Budget* GINI
print(GINI_Budget)

Q2.6

<aside> 💡 (c) CarType

</aside>

# create frequency tables
absfreq = table(data[,c("Sex","Insurance")])
freq = prop.table(absfreq, 1)
freqSum = rowSums(prop.table(absfreq))

# calculate GINI for each level of *Sex*
GINI_M = 1 - freq["M", "No"]^2 - freq["M", "Yes"]^2
GINI_F = 1 - freq["F", "No"]^2 - freq["F", "Yes"]^2

# calculate GINI for *Sex*
GINI_Sex = freqSum["M"] * GINI_M + freqSum["F"] * GINI_F

# print *Sex* GINI
print(GINI_Sex)

# create table with GINI Indexes
GINI = c(GINI_CustomerID, GINI_CarType, GINI_Budget, GINI_Sex)

# find preferable characteristic
min(GINI[GINI > 0])
## => CarType

Q2.7

https://s3-us-west-2.amazonaws.com/secure.notion-static.com/6c4b894b-2517-42f1-a349-b779e0ad09c3/Untitled.png