12:17
[1] 12 13 14 15 16 17
seq(pi, 2*pi, length.out = 10)
[1] 3.141593 3.490659 3.839724 4.188790 4.537856 4.886922 5.235988 5.585054
[9] 5.934119 6.283185
ns <- 1:1000
idx <- ns %% 3 == 0
sum(ns[idx])
[1] 166833
top <- sqrt(10000)
ns <- 1:top
sum(ns^2)
[1] 338350
Loop through the numbers from 1 to 20. If the number is divisible by 3, print “Fizz”. If the number is divisible by 5, print “Buzz”. If it is divisible by both 3 and 5 print “FizzBuzz”. Otherwise just print the number. Your output should look something like
1
2
Fizz
4
Buzz
...
for (i in 1:20) {
if (i %% 15 == 0)
print("FizzBuzz")
else if (i %% 3 == 0)
print("Fizz")
else if (i %% 5 == 0)
print("Buzz")
else print(i)
}
[1] 1
[1] 2
[1] "Fizz"
[1] 4
[1] "Buzz"
[1] "Fizz"
[1] 7
[1] 8
[1] "Fizz"
[1] "Buzz"
[1] 11
[1] "Fizz"
[1] 13
[1] 14
[1] "FizzBuzz"
[1] 16
[1] 17
[1] "Fizz"
[1] 19
[1] "Buzz"
xs <- rnorm(1000)
sum(xs > 0)
[1] 503
Start by copying and pasting the code below in a Code cell
set.seed(123)
n <- 10
case <- rnorm(n, 0, 1)
ctrl <- rnorm(n, 0.1, 1)
Run the above code, then state the null hypothesis for comparing the mean between cases and controls. Perform a t-test with \(n = 10\) and $n = \(1000\).
set.seed(123)
n <- 10
case <- rnorm(n, 0, 1)
ctrl <- rnorm(n, 0.1, 1)
t.test(case, ctrl)
Welch Two Sample t-test
data: case and ctrl
t = -0.5249, df = 17.872, p-value = 0.6061
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.1710488 0.7030562
sample estimates:
mean of x mean of y
0.07462564 0.30862196
set.seed(123)
n <- 1000
case <- rnorm(n, 0, 1)
ctrl <- rnorm(n, 0.1, 1)
t.test(case, ctrl)
Welch Two Sample t-test
data: case and ctrl
t = -2.8229, df = 1997.355, p-value = 0.004806
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.2141064 -0.0385684
sample estimates:
mean of x mean of y
0.01612787 0.14246525
library(pwr)
pwr.t.test(d = 10/20, sig.level = 0.05, power = 0.8, type = "two.sample")
Two-sample t test power calculation
n = 63.76561
d = 0.5
sig.level = 0.05
power = 0.8
alternative = two.sided
NOTE: n is number in each group
ans <- pwr.t.test(d = 10/20, sig.level = 0.05, power = 0.8, type = "two.sample")
ans$n
[1] 63.76561
pwrs <- seq(0.5, 0.99, by=0.01)
ds <- c(0.1, 0.5, 1.0)
nrow <- length(pwrs)
ncol <- length(ds)
ns <- matrix(, nrow=nrow, ncol=ncol)
for (i in 1:nrow) {
for (j in 1:ncol) {
ns[i, j] <- pwr.t.test(d = ds[j], sig.level = 0.05, power = pwrs[i], type = "two.sample")$n
}
}
plot(pwrs, ns[,1], type='o', col="red", ylim = c(0, 3800))
lines(pwrs, ns[,2], type='o', col="blue")
lines(pwrs, ns[,3], type='o', col="green")
legend(0.5, 3500, paste("effeect size =", c("0.1", "0.5", "1.0")), col=c("red", "blue", "green"), pch="o")
0.5^4
[1] 0.0625
expts <- rbinom(10000, size=4, prob=0.5)
sum(expts == 4)
[1] 613
anscombe dataframe has 8 columns and 11 rows. Perform a
linear regression of \(y1\) against \(x1\), \(y2\)
against \(x2\) and so on and report what the intercept and
slope are in each case (Note: the anscombe dataframe is already
loaded).model <- lm(y1 ~ x1, data=anscombe)
model$coefficients
(Intercept) x1
3.0000909 0.5000909
model <- lm(y2 ~ x2, data=anscombe)
model$coefficients
model <- lm(y3 ~ x3, data=anscombe)
model$coefficients
model <- lm(y4 ~ x4, data=anscombe)
model$coefficients
(Intercept) x4
3.0017273 0.4999091
par(mfrow=c(2,2))
with(anscombe, plot(x1, y1, type="p"))
with(anscombe, plot(x2, y2, type="p"))
with(anscombe, plot(x3, y3, type="p"))
with(anscombe, plot(x4, y4, type="p"))
my.dist <- function(v1, v2) {
return(sqrt(sum((v1-v2)^2)))
}
my.dist(c(0,0), c(3,4))
[1] 5
my.median <- function(v) {
v <- sort(v)
n <- length(v)
if (n %% 2 == 1)
ans <- v[floor(n/2)]
else {
low <- floor(n/2)
ans <- mean(v[low:(low+1)])
}
return(ans)
}
my.median(1:11)
[1] 5
my.median(1:12)
[1] 6.5
peek that will take a dataframe and a
number as arguments - so you would eovke the function with a call
like peek(df, n). What this function does is return \(n\)
rows at random (no duplicate rows). If \(n\) is greater than
the number of rows, the enitre data frame is returned.peek <- function(df, n) {
if (n > length(df)) {
ans <- df
}
else {
idx <- sample(1:length(df), n, replace=FALSE)
ans <- df[idx,]
}
return(ans)
}
peek(anscombe, 3)
x1 x2 x3 x4 y1 y2 y3 y4
4 9 9 9 8 8.81 8.77 7.11 8.84
3 13 13 13 8 7.58 8.74 12.74 7.71
5 11 11 11 8 8.33 9.26 7.81 8.47