####################################################################### # # R para Principiantes Quito 5-8 enero 2010 # # Soluciones # ####################################################################### setwd("C:/R_Quito/data/") # MARTES 1 # 1 Length <- c(75, 85, 91.6, 95, NA, 105.5, 106) Tb <- c(0, 0, 1, NA, 0, 0, 0) mean(Length) # 2 Farm <- c("MO", "MO", "MO", "MO", "LN", "SE", "QM") Month <- c(11, 07, 07, NA, 09, 09, 11) Boar <- cbind(Length, Tb, Farm, Month) Boar dim(Boar) nrow(Boar) ncol(Boar) # 3 Tb2 <- vector(length = 8) Tb2[1] <- 75 Tb2[2] <- 85 Tb2[3] <- 91.6 Tb2[4] <- 95 Tb2[5] <- NA Tb2[6] <- 105.5 Tb2[7] <- 106 Tb2 # 4 Dmat <- matrix(nrow=3, ncol=3) Dmat[,1] <- c(1,4,2) Dmat[,2] <- c(2,2,3) Dmat[,3] <- c(3,1,0) Dmat t(Dmat) solve(Dmat) Dmat %*% solve(Dmat) # 5 Sex <- c(1,2,2,2,1,2,2) Ecervi <- c(0,0,0,NA, 0, 0, 0) Year <- c(00, 00, 01, NA, 03, 03, 02) Boar2 <- data.frame(cbind(Boar, Sex, Ecervi, Year)) Boar2 Boar3 <- cbind(Boar2, sqrt(Length)) Boar3 Boar2 <- list(Boar, Sex, Ecervi, Year) Boar2 Boar3 <- c(Boar2, sqrt(Length)) # ? Boar3 <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # MARTES 2 #### 2.1 bird flu flu <- read.table("C:/R_Quito/data/BirdFlu.txt", header=T) names(flu) head(flu) str(flu) # numero de casos en 2003: sum(flu$c03) # total number of bird flu cases in 2003 and 2005 sum(flu$c03) + sum(flu$c05) # which country has had the most cases: ## new function: apply # number of cases by country cases <- apply(flu[ , c(2, 4, 6, 8, 10, 12)], 1, sum) muertos <- apply(flu[ , c(3,5,7,9,11,13)], 1, sum) # which country had most cases flu$Country[which(cases==max(cases))] # which country had least deaths flu$Country[which(muertos==min(muertos))] # total number cases per country cases # total number of cases per year cases.yr <- apply(flu[ , c(2, 4, 6, 8, 10, 12)], 2, sum) ### 2.2 # ISIT.txt isit <- read.table(file="ISIT.txt", header=T) str(isit) isit.1 <- isit[isit$Station==1,] # number of observations at station 1 length(isit.1[,1]) min(isit.1$SampleDepth) max(isit.1$SampleDepth) mean(isit.1$SampleDepth) median(isit.1$SampleDepth) # station 2 isit.2 <- isit[isit$Station==2,] # number of observations at station 2 length(isit.2[,1]) min(isit.2$SampleDepth) max(isit.2$SampleDepth) mean(isit.2$SampleDepth) median(isit.2$SampleDepth) # number of obervations per station: table(isit$station) # which stations have <30 observations tab <- which(as.vector(table(isit$Station))<30) # new data frame deleting those: isit.new <- subset(isit, Station!=tab[1] & Station!=tab[2] & Station!=tab[3] & Station!=tab[4]) # data from 2002 isit.2002 <- isit[isit$Year==2002,] # data from april (all years) isit.april <- isit[isit$Month==4,] # data from depths > 2000 m isit.2000m <- isit[isit$SampleDepth>2000] coplot(isit.2000m$Sources ~ isit.2000m$SampleDepth|isit.2000m$Station) # data >2000m in April isit.april2000m <- subset(isit, SampleDepth>2000 & Month==4) coplot(isit.april2000m$Sources ~ isit.april2000m$SampleDepth|isit.april2000m$Station) ### 2.3 write.table(isit.april2000m, "isit.april2000m.txt", quote=F, sep="\t") <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> ### CLASE 5 temp <- read.table("temp.txt", header=T) # 5.1 # mean per month per year round(tapply(temp$Temperature, list(temp$Year, temp$Month), mean, na.rm=TRUE), 2) round(tapply(temp$Temperature, list(temp$Year, temp$Month), sd, na.rm=TRUE), 2) round(tapply(temp$Temperature, list(temp$Year, temp$Month), length) ) # 5.2 table(temp$Year) table(temp$Station, temp$Year) <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # CLASE 6 GRAFICOS BASICOS amp <- read.table(file = "amphib.txt", header = TRUE) par(mfrow=c(1,2)) plot(TOT.N ~ D.PARK, data=amp, ylab="amfibios muertos", xlab="Distance to park") plot(TOT.N ~ D.PARK, data=amp, ylab="Amfibios muertos", xlab="Distance to park", cex=amp$OLIVE/mean(amp$OLIVE)+0.1, pch=20, col="lightblue") # no ponemos un smoothing line <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # CLASE 7 GRAFICOS AVANCADOS # 7.1 flu <- read.table("BirdFluCases.txt", header=T) cases <- apply(flu[2:16], 2, sum) pie(cases) flu.death <- read.table("BirdFluDeaths.txt", header=T) deaths <- apply(flu.death[2:16], 2, sum) pie(deaths+1) # +1 to show countries with 0 deaths # 7.2 veg <- read.table("Vegetation2.txt", header=TRUE) veg.mean <- tapply(veg$R, veg$Transect, mean) veg.sd <- tapply(veg$R, veg$Transect, sd) veg.n <- tapply(veg$R, veg$Transect, length) # standard error veg.se <- veg.sd / sqrt(veg.n) # barchart bp <- barplot(veg.mean, ylim=c(0,16), ylab="species richness", xlab="transect") arrows(bp, veg.mean, bp, veg.mean+veg.sd, lwd=0.5, angle=90, length=0.1) # dot plot plot(veg$R, veg$Transect, ylab="Transect", xlab="Species Richness", col="darkgrey") points(veg.mean, 1:8, pch=16) segments(x0=veg.mean-veg.se, x1=veg.mean+veg.se, y0=1:8, y1=1:8) # 7.3 boxplot(veg$R ~ veg$Transect) # 7.4 cod <- read.table("CodParasite.txt", header=T) par(mfrow=c(2,2)) boxplot(cod$Intensity ~ cod$Area, main="area") boxplot(cod$Intensity ~ cod$Sex, main="sex") boxplot(cod$Intensity ~ cod$Stage, main="stage") boxplot(cod$Intensity ~ cod$Year, main="year") # close graphics window boxplot(cod$Intensity ~ cod$Year * cod$Sex, main="year x sex") # 7.5 owl <- read.table("Owls.txt", header=T) dotchart(owl$SiblingNegotiation) dotchart(owl$ArrivalTime) dotchart(owl$ArrivalTime, groups=owl$FoodTreatment) # 7.6 dotchart(cod$Intensity, groups=cod$Area) dotchart(cod$Intensity, groups=cod$Sex) dotchart(cod$Intensity, groups=cod$Stage) dotchart(cod$Intensity, groups=cod$Year) dotchart(cod$Depth, groups=cod$Prevalence) # 7.7 log.neg <- log10(owl$SiblingNegotiation+1) plot(log.neg ~ owl$ArrivalTime, xlab="arrival time", ylab="log10 nestling negotiation", xaxt="n", las=2) owl.time <- c("22.00", "24.00", "02.00", "04.00") axis(side=1, at= c(22,24,26,28) , labels=owl.time) plot(log.neg ~ owl$ArrivalTime, xlab="arrival time", ylab="log10 nestling negotiation", xaxt="n", yaxt="n", las=2) owl.time <- c("22.00", "24.00", "02.00", "04.00") axis(side=1, at= c(22,24,26,28) , labels=owl.time) axis(side=2, at=c(0, 1), labels=c(1, 10), las=2) <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # CLASE 8 FUNCIONES # 8.1 mean.func <- function(x) sum(x) / length(x) # 8.2 se.func <- function(x) sd(x) / sqrt(length(x)) # 8.3 stats.func <- function(x) return(list("mean"=mean.func(x), "sd"=sd(x))) # 8.4 stats.func <- function(x) return(list("mean"=mean.func(x), "sd"=sd(x), "se"=se.func(x))) # 8.5 FtoC <- function(x) (x-32) * 5/9 # 8.6 # simpson index D <- function(x) sum((x/sum(x))^2) indices <- read.table("indices.txt", header=T) head(indices) y <- as.data.frame(tapply(indices[,1], list(indices[,2], indices[,3]), sum)) sapply(y, D) # 8.7 H <- function(x) - sum( (x/sum(x)) * (log(x/sum(x))) ) sapply(y, H) # 8.8 stats.func <- function(x) return(list("mean"=mean.func(x), "sd"=sd(x), "se"=se.func(x), "Simpson" = D(x), "Shannon" = H(x))) # 8.9 par(mar=c(5,4,4,4)) plot(apply(y, 2, mean), ylim=c(0, 4), ylab="mean", xlab="transect", las=1, tcl=0.3, pch=16) segments(x0=1:5, x1=1:5, y0=apply(y, 2, mean)- apply(y, 2, se.func), y1=apply(y, 2, mean) + apply(y, 2, se.func)) points(2*(sapply(y, D)), pch=16, col="red2") points(2*(sapply(y, H)), pch=16, col="orange2") axis(4, c(0, 1,2,3, 4), labels=c(0, 0.5, 1, 1.5,2), tcl=0.3,las=1) mtext("diversity index", 4, 2) legend(x=1, y=4, legend=c("Simpson", "Shannon"), col=c("red2", "orange2"), pch=16) <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # CLASE 9 LOOPS # 9.1 for (i in 1:10) print(i) # 9.2 x <- 25:32 for(i in 1:length(x)) print(FtoC(x[i])) # or.. for (celsius in 25:30) print(c(celsius, 9/5*celsius + 32)) # 9.3 simpson.ind <- vector() for(i in 1:5) simpson.ind[i] <- D(y[,i]) simpson.ind <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # CLASE 10 RANDOM DATA # 10.1 x <- runif(10,1,10) min(x) max(x) # 10.2 x <- rnorm(10, 5, 5) length(which(x<0)) # 10.3 x <- rnorm(100, 100, 10) length(which(x<80 | x>120)) # 10.4 x <- rbinom(50, 1, 0.5) length(x[x==1]) x <- sample(c("H", "T"), 50, replace=TRUE) length(x[x=="H"]) # 10.5 x <- sample(1:6, 100, replace=TRUE) length(x[x==6]) # 10.6 x <- sample(seq(1:49), 6, replace=FALSE) max(x) min(x) # 10.7 qnorm(0.05) # 10.8 # 10.9 pnorm(1.5, 0, 2) # 10.10 x <- rexp(100, 1/10) hist(x) median(x) mean(x) # 10.11 cards = paste(rep(c("A",2:10,"J","Q","K"),4),c("H","D","S","C")) sample(cards, 5, replace=F) <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # CLASE 12 CLASSICAL TESTS # 12.1 # use the 'notch=T' argument in the function boxplot() # 12.2 Non.Block <- c( 18, 15, 5, 8, 4) Block <- c(10, 5, 7, 18, 10) block <- as.matrix(rbind(Non.Block, Block)) chisq.test(block) # 12.3 p <- c(5/12, 3/12, 4/12) # proportion of fish in yr0 fish <- c( 53, 22, 49) # number of fish per spp in yr1 expected <- p*sum(fish) # expected number of fish in yr1 x <- as.matrix(rbind(fish, expected)) # matrix chisq.test(x) # 12.4 data(UCBAdmissions) # read in the dataset x = ftable(UCBAdmissions) # flatten x # what is there x[1:2,] chisq.test(x[1:2,]) chisq.test(x[3:4,]) # 12.5 binom.test(440, 900) # 12.6 t.test(blood[,1], blood[,2], paired=T) <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # CLASE 13: LINEAR REGRESSION # 13.1 Age <- c(18,23,25,35,65,54,34,56,72,19,23,42,18,39,37) Max.Rate <- c(202,186,187,180,156,169,174,172,153,199,193,174,198,183,178) model <- lm(Max.Rate ~ Age) plot(Age,Max.Rate) abline(model) ci.lwr = predict(model,data.frame(Age=sort(Age)), level=.9,interval="confidence")[,2] lines(sort(Age), ci.lwr, type="l", lty=3) # or use lines() ci.upr = predict(model,data.frame(Age=sort(Age)), level=.9,interval="confidence")[,3] lines(sort(Age), ci.upr, type="l", lty=3) # or use lines() # 13.2 price <- c(80, 151.4, 310, 295, 339, 337.5, 228.7, 245, 339, 43, 279, 599, 119, 289, 249, 178, 159, 289, 488, 376, 249, 275, 275, 459, 219, 359, 379, 189, 173) bedrooms <- c(3, 4, 4, 4, 3, 4, 3, 3, 3, 1, 3, 4, 3, 3, 2, 3, 2, 3, 3, 3, 3, 4, 2, 5, 3, 3, 4, 4, 3) plot(price ~ bedrooms) model <- lm(price ~ bedrooms) abline(model) ci.lwr = predict(model,data.frame(bedrooms=sort(bedrooms)), level=.9,interval="confidence")[,2] lines(sort(bedrooms), ci.lwr, type="l", lty=3) # or use lines() ci.upr = predict(model,data.frame(bedrooms=sort(bedrooms)), level=.9,interval="confidence")[,3] lines(sort(bedrooms), ci.upr, type="l", lty=3) # or use lines() # 13.3 Student <- c( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) Beers <- c(5, 2, 9, 8, 3, 7, 3, 5, 3, 5) BAL <- c(0.10, 0.03, 0.19, 0.12, 0.04, 0.095, 0.07, 0.06, 0.02, 0.05) plot(BAL ~ Beers) model <- lm(BAL ~ Beers) abline(model) ci.lwr = predict(model,data.frame(Beers=sort(Beers)), level=.9,interval="confidence")[,2] lines(sort(Beers), ci.lwr, type="l", lty=3) # or use lines() ci.upr = predict(model,data.frame(Beers=sort(Beers)), level=.9,interval="confidence")[,3] lines(sort(Beers), ci.upr, type="l", lty=3) # or use lines() # 13.4 5.34 degrees F per 1000 feet elev <- c( 600, 1000, 1250, 1600, 1800, 2100, 2500, 2900) # ft temp <- c(56, 54, 56, 50, 47, 49, 47, 45) # F plot(temp ~ elev) model <- lm(temp ~ elev) abline(model) ci.lwr = predict(model,data.frame(elev=sort(elev)), level=.9,interval="confidence")[,2] lines(sort(elev), ci.lwr, type="l", lty=3) # or use lines() ci.upr = predict(model,data.frame(elev=sort(elev)), level=.9,interval="confidence")[,3] lines(sort(elev), ci.upr, type="l", lty=3) # or use lines() model$coef model$coef[2] * 1000 pre <- predict(model,data.frame(elev=c(1000,2000))) pre[1]-pre[2] <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # CLASE 14: ANOVA data(InsectSprays) boxplot(InsectSprays$count ~ InsectSprays$spray) m <- lm(InsectSprays$count ~ InsectSprays$spray) summary.aov(m) summary(m) <><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><> # CLASE 15: GENERALIZED LINEAR MODELS # 15.1 spp <- read.table("species.txt", header=T) plot(spp$Biomass,spp$Species,type="n") sp<-split(spp$Species,spp$pH) bio<-split(spp$Biomass,spp$pH) points(bio[[1]],sp[[1]],pch=16) points(bio[[2]],sp[[2]],pch=17) points(bio[[3]],sp[[3]]) model1<-glm(Species~ Biomass*pH,poisson, data=spp) summary(model1) model2<-glm(Species~Biomass+pH,poisson, data=spp) anova(model1,model2,test="Chi") xv<-seq(0,10,0.1) levels(pH) length(xv) phv<-rep("high",101) yv<-predict(model1,list(pH=factor(phv),Biomass=xv),type="response") lines(xv,yv) phv<-rep("mid",101) yv<-predict(model1,list(pH=factor(phv),Biomass=xv),type="response") lines(xv,yv) phv<-rep("low",101) yv<-predict(model1,list(pH=factor(phv),Biomass=xv),type="response") lines(xv,yv) # 15.2 germination germ <- read.table("germination.txt", header=T) y<-cbind(count, sample-count) levels(Orobanche) levels(extract) model<-glm(y ~ Orobanche * extract, binomial) summary(model) model<-glm(y ~ Orobanche * extract, quasibinomial) model2<-update(model,~.- Orobanche:extract) anova(model,model2,test="F") anova(model2,test="F") model3<-update(model2,~.- Orobanche) anova(model2,model3,test="F") coef(model3) tapply(predict(model3,type="response"),extract,mean) as.vector(tapply(count,extract,sum))/as.vector(tapply(sample,extract,sum)) # summary: # • Make a two-column response vector containing the successes and failures. # • Use glm with family=binomial (you can omit family=). # • Fit the maximal model (in this case it had four parameters). # • Test for overdispersion. # • If, as here, you find overdispersion then use quasibinomial rather than binomial errors. # • Begin model simplification by removing the interaction term. # • This was non-significant once we had adjusted for overdispersion. # • Try removing main effects (we didn’t need Orobanche genotype in the model). # • Use plot to obtain your model-checking diagnostics. # • Back-transform using predict with the option type=“response” to obtain means.