In this article, the "faitful" data set from the R library "datasets" is analyzed. It contains the R code used, as well as the (german) commentary.
The data set is composed of the waiting time between eruptions and the duration of the eruption
for the "Old Faithful geyser" in Yellowstone National Park, Wyoming, USA. Download(R code).
# Knowledgedump.org - Analyse des "Old Faithful Geyser" Datensatzes #1. Laden der Daten rm(list=ls()) #Loeschen von ggf. definierten Objekten library(datasets) #Laden des Pakets mit "faithful" Datensatz #2. Old Faithful Geysir Daten faithful[1:10,] str(faithful) summary(faithful) help(faithful) par(mfrow=c(2,2)) plot(faithful[1:50,1], type="l", main="Eruptsionsdauer nach Index", xlab="Index", ylab="Eruptionsdauer in Minuten") panel.smooth(1:50,faithful[1:50,1]) plot(faithful[1:50,2], type="l", main="Wartezeiten nach Index", xlab="Index", ylab="Wartezeit in Minuten") panel.smooth(1:50,faithful[1:50,2]) hist(faithful$eruptions, main = "Verteilung von Eruptionsdauer", xlab = "Eruptionsdauer", ylab = "Dichte", freq=F) lines(density(faithful$eruptions), col = "red") hist(faithful$waiting, main = "Verteilung von Wartezeiten", xlab = "Wartezeit", ylab = "Dichte", freq=F) lines(density(faithful$waiting), col = "red") par(mfrow=c(1,1)) f_sek<-table(round(faithful[,1]*60)) plot(names(f_sek), f_sek, type="h", main="Anzahl der vorkommenden Eruptsionszeiten", xlab="Eruptionsdauer", ylab="Anzahl der Vorkommnis") f_sek[f_sek>=4] plot(faithful, main="Eruptionsdauer gg. Wartezeit", xlab="Eruptionsdauer in Minuten", ylab="Wartezeit in Minuten") panel.smooth(faithful[,1],faithful[,2]) #lowess() mit default Werten #3. Vorhersage der naechsten Eruption lmfaithful<-lm(faithful$waiting ~ faithful$eruptions, data=faithful) lmfaithful plot(faithful, main="Eruptionsdauer gg. Wartezeit", xlab="Eruptionsdauer in Minuten", ylab="Wartezeit in Minuten") abline(lmfaithful, col="blue") abline(30,10 ,col="red") pred1<-faithful$eruptions*10.73+33.47 # =lmfaithful$fitted pred2<-faithful$eruptions*10+30 summary(pred1) summary(pred2) summary(faithful$waiting) spaet1<-subset(pred1-faithful$waiting, pred1-faithful$waiting>1) spaet2<-subset(pred2-faithful$waiting, pred2-faithful$waiting>1) spaet1 spaet2 summary(lmfaithful) plot(lmfaithful$fitted, stdres(lmfaithful), sub = "lm(waiting ~ eruptions)", main = "Std. Residuen vs Fit", xlab = "Fit", ylab = "Std. Residuen") abline(h = 0, lty = 3, col = "gray") panel.smooth(lmfaithful$fitted,stdres(lmfaithful)) str(subset(faithful$eruptions, faithful$eruptions>=3.5)) str(subset(faithful$eruptions, faithful$eruptions<3.5)) plot(hatvalues(lmfaithful), main = "Hebelgewicht vs Index", ylab = "Hebelgewicht", xlab = "Index") panel.smooth(1:272,hatvalues(lmfaithful)) plot(lmfaithful, which=2) plot(lmfaithful, which=3) par(mfrow=c(1,2)) plot(lmfaithful, which=4:5) #4. Modellalternativen if (!require("quantreg")) install.packages("quantreg", repos="http://cran.us.r-project.org", dependencies=TRUE) library(quantreg) rq1<-rq(waiting~eruptions, data=faithful, tau=0.01) rq50<-rq(waiting~eruptions, data=faithful, tau=0.5) rq99<-rq(waiting~eruptions, data=faithful, tau=0.99) plot(faithful, main="Eruptionsdauer gg. Wartezeit", xlab="Eruptionsdauer in Minuten", ylab="Wartezeit in Minuten") abline(lmfaithful, col="blue") abline(rq1, col="red") abline(rq99, col="red") abline(rq50, col="green") plot(faithful, main="Eruptionsdauer gg. Wartezeit", xlab="Eruptionsdauer in Minuten", ylab="Wartezeit in Minuten") panel.smooth(faithful[,1],faithful[,2])#lowess() mit default Werten f1<-subset(faithful,faithful[,1]<4) f2<-subset(faithful,faithful[,1]>=4) lmf1<-lm(waiting~eruptions, data=f1) lmf2<-lm(waiting~eruptions, data=f2) abline(lmf1, col="green") abline(lmf2, col="green") abline(lmfaithful, col="blue") summary(lmf2) summary(lmf1)