### Non-Life Insurance: Mathematics and Statistics ### Exercise Sheet 10 ### Exercise 10.2 ### b)-c) ### We apply the log-linear Gaussian regression model to the observed claim amounts given on the exercise sheet ### Load the observed claim amounts into a matrix S <- matrix(c(2000,2200,2500,1800,1600,2000,1500,1400,1700,1600,1400,1600), nrow = 3) ### Define the design matrix Z Z <- matrix(c(rep(1,12),rep(0,4),rep(1,4),rep(0,12),rep(1,4),rep(c(0,1,0,0),3),rep(c(0,0,1,0),3),rep(c(0,0,0,1),3)), nrow = 12) ### Store the design matrix Z (without the intercept term) and the dependent variable log(S_{i,j}) in one dataset data <- as.data.frame(cbind(Z[,-1],matrix(log(t(S)),nrow = 12))) colnames(data) <- c("van", "truck", "X31_40y", "X41_50y", "X51_60y", "observation") ### Apply the regression model linear.model1 <- lm(formula = observation ~ van + truck + X31_40y + X41_50y + X51_60y, data=data) ### Print the output of the regression model summary(linear.model1) ### Fitted values matrix(exp(fitted(linear.model1)), byrow = TRUE, nrow=3) ### We can also get the parameters by applying the formula (7.9) of the lecture notes solve(t(Z)%*%Z) %*% t(Z) %*% matrix(log(t(S)), nrow = 12) ### Note that we can also use R directly on the data, i.e. it finds the design matrix internally car <- c("passenger car", "van", "truck") age <- c("X21_30y", "X31_40y", "X41_50y", "X51_60y") dat <- expand.grid(car, age) colnames(dat) <- c("car","age") dat$observation <- as.vector(log(S)) linear.model1.direct <- lm(formula = observation ~ car + age, data=dat) summary(linear.model1.direct) ### d) ### Apply the regression model under H_0 linear.model2 <- lm(formula = observation ~ X31_40y + X41_50y + X51_60y, data=data) ### Calculation of the test statistic F F <- 3 * (sum((data[,6] - fitted(linear.model2))^2) - sum((data[,6] - fitted(linear.model1))^2)) / sum((data[,6] - fitted(linear.model1))^2) ### Calculation of the corresponding p-value pf(F, 2, 6, lower.tail = FALSE) ### We can also directly use anova anova(linear.model1,linear.model2) ### b) (with the same parametrization as in Exercise 10.1) ### We apply the log-linear Gaussian regression model to the observed claim amounts given on the exercise sheet ### Load the observed claim amounts into a matrix S <- matrix(c(2000,2200,2500,1800,1600,2000,1500,1400,1700,1600,1400,1600), nrow = 3) ### Define the design matrix Z Z <- matrix(c(rep(0,4),rep(1,4),rep(0,12),rep(1,4),rep(c(1,0,0,0),3),rep(c(0,1,0,0),3),rep(c(0,0,1,0),3),rep(c(0,0,0,1),3)), nrow = 12) ### Store the design matrix Z (without the intercept term) and the dependent variable log(S_{i,j}) in one dataset data <- as.data.frame(cbind(Z,matrix(log(t(S)),nrow = 12))) colnames(data) <- c("van", "truck", "X21_30y", "X31_40y", "X41_50y", "X51_60y", "observation") ### Apply the regression model ### The "-1" stands for the model without intercept linear.model1 <- lm(formula = observation ~ van + truck + X21_30y + X31_40y + X41_50y + X51_60y - 1, data=data) ### Get the exponential of the coefficients exp(summary(linear.model1)$coefficients[,1]) ### Fitted values matrix(exp(fitted(linear.model1)), byrow = TRUE, nrow=3)