library(mozzie)data(mozzie)head(mozzie, 2)
ID Year Week Colombo Gampaha Kalutara Kandy Matale Nuwara Eliya Galle1 1 2008 52 15 7 1 11 4 0 02 2 2009 1 44 23 5 16 21 2 0 Hambantota Matara Jaffna Kilinochchi Mannar Vavuniya Mulative Batticalo1 6 22 0 0 8 0 0 12 5 18 1 0 0 0 0 0 Ampara Trincomalee Kurunagala Puttalam Anuradhapura Polonnaruwa Badulla1 0 0 2 1 2 0 12 1 1 10 5 0 0 1 Monaragala Ratnapura Kegalle1 1 2 162 0 1 25library(mozzie)data(mozzie)head(mozzie, 2)
ID Year Week Colombo Gampaha Kalutara Kandy Matale Nuwara Eliya Galle1 1 2008 52 15 7 1 11 4 0 02 2 2009 1 44 23 5 16 21 2 0 Hambantota Matara Jaffna Kilinochchi Mannar Vavuniya Mulative Batticalo1 6 22 0 0 8 0 0 12 5 18 1 0 0 0 0 0 Ampara Trincomalee Kurunagala Puttalam Anuradhapura Polonnaruwa Badulla1 0 0 2 1 2 0 12 1 1 10 5 0 0 1 Monaragala Ratnapura Kegalle1 1 2 162 0 1 25Use Min-Max transformation to rescale all the districts variables onto 0-1 range.
Min-Max transformation is xi−min(x)max(x)−min(x) where x=(x1,x2,...xn).
mozzie# Colombo districtminmax.colombo <- (mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263mozzie# Colombo districtminmax.colombo <- (mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263# Gampaha districtminmax.gampaha <- (mozzie$Gampaha - min(mozzie$Gampaha, na.rm = TRUE)) / (max(mozzie$Gampaha, na.rm = TRUE) - min(mozzie$Gampaha, na.rm = TRUE))head(minmax.gampaha)
[1] 0.02734375 0.08984375 0.07421875 0.08984375 0.09375000 0.06640625mozzie# Colombo districtminmax.colombo <- (mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263# Gampaha districtminmax.gampaha <- (mozzie$Gampaha - min(mozzie$Gampaha, na.rm = TRUE)) / (max(mozzie$Gampaha, na.rm = TRUE) - min(mozzie$Gampaha, na.rm = TRUE))head(minmax.gampaha)
[1] 0.02734375 0.08984375 0.07421875 0.08984375 0.09375000 0.06640625# Kalutara districtminmax.kalutara <- (mozzie$Gampaha - min(mozzie$Kalutara, na.rm = TRUE)) / (max(mozzie$Kalutara, na.rm = TRUE) - min(mozzie$Kalutara, na.rm = TRUE))head(minmax.kalutara)
[1] 0.09333333 0.30666667 0.25333333 0.30666667 0.32000000 0.22666667mozzie# Colombo districtminmax.colombo <- (mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263# Gampaha districtminmax.gampaha <- (mozzie$Gampaha - min(mozzie$Gampaha, na.rm = TRUE)) / (max(mozzie$Gampaha, na.rm = TRUE) - min(mozzie$Gampaha, na.rm = TRUE))head(minmax.gampaha)
[1] 0.02734375 0.08984375 0.07421875 0.08984375 0.09375000 0.06640625# Kalutara districtminmax.kalutara <- (mozzie$Gampaha - min(mozzie$Kalutara, na.rm = TRUE)) / (max(mozzie$Kalutara, na.rm = TRUE) - min(mozzie$Kalutara, na.rm = TRUE))head(minmax.kalutara)
[1] 0.09333333 0.30666667 0.25333333 0.30666667 0.32000000 0.22666667Very easily made errors when copying-and-pasting the codes.
A mistake copied becomes a mistake repeated.
Whenever you need to copy and paste a block of codes many times.
If you don't find a suitable built-in function to serve your purpose, you can write your own function.
To share your work with others.
rescale_minmaxrescale_minmax
rescale_minmax <-rescale_minmax
rescale_minmax <-
rescale_minmax <- function() # Arguments/inputs should be defined inside ()rescale_minmax
rescale_minmax <-
rescale_minmax <- function() # Arguments/inputs should be defined inside ()
rescale_minmax <- function(){# Task# output}Find all the inputs that correspond to a given function output?
# Colombo district(mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))Find all the inputs that correspond to a given function output?
# Colombo district(mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))
Re-write the code with general names
x <- mozzie$Colombo(x - min(x, na.rm = TRUE)) / (max(x, na.rm=TRUE) - min(x, na.rm=TRUE))Find all the inputs that correspond to a given function output?
# Colombo district(mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))
Re-write the code with general names
x <- mozzie$Colombo(x - min(x, na.rm = TRUE)) / (max(x, na.rm=TRUE) - min(x, na.rm=TRUE))
Remove duplication/ Make your code efficient and readable
rng <- range(x, na.rm = TRUE)rng
[1] 0 475rng <- range(x, na.rm = TRUE)(x - rng[1]) / (rng[2] - rng[1])rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) out.rescaled <- (x - rng[1]) / (rng[2] - rng[1]) out.rescaled}rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) out.rescaled <- (x - rng[1]) / (rng[2] - rng[1]) out.rescaled}
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) out.rescaled <- (x - rng[1]) / (rng[2] - rng[1]) return(out.rescaled)}
In this situation Type A is the best.
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax(c(1, 200, 250, 80, NA))
[1] 0.0000000 0.7991968 1.0000000 0.3172691 NAminmax.colombo <- rescale_minmax(mozzie$Colombo)head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax(c(1, 200, 250, 80, NA))
[1] 0.0000000 0.7991968 1.0000000 0.3172691 NAminmax.colombo <- rescale_minmax(mozzie$Colombo)head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263minmax.gampaha <- rescale_minmax(mozzie$Gampaha)minmax.kalutara <- rescale_minmax(mozzie$Kalutara)new.data.col <- c(400, 500, 350, 250, 60, 70, Inf)rescale_minmax(new.data.col)
[1] 0 0 0 0 0 0 NaNnew.data.col <- c(400, 500, 350, 250, 60, 70, Inf)rescale_minmax(new.data.col)
[1] 0 0 0 0 0 0 NaNrescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE, finite=TRUE) (x - rng[1]) / (rng[2] - rng[1])}
new.data.col <- c(400, 500, 350, 250, 60, 70, Inf)rescale_minmax(new.data.col)
[1] 0.77272727 1.00000000 0.65909091 0.43181818 0.00000000 0.02272727 InfRewrite rescale_minmax so that -Inf is set to 0, and Inf is mapped to 1.
04:00
R for Data Science - Exercise 19.2.1, Question 3
04:00
R for Data Science - Exercise 19.2.1, Question 4
10:00
Descriptive names for variables.
Comment your code.
Write your own function to calculate parameter estimates of simple linear regression model.
Help: ^β=(XTX)−1XTY

05:00
Write a function to calculate confidence intervals for mean. ¯x±tα/2,(n−1)s√(n)
10:00
cal_mean_ci <- function(x, conf){ len.x <- length(x) se <- sd(x) / sqrt(len.x) alpha <- 1-conf mean(x) + se * qt(c(alpha / 2, 1 - alpha / 2), df = len.x-1)}data <- c(165, 170, 175, 180, 185)cal_mean_ci(data, 0.95)
[1] 165.1838 184.8162cal_mean_ci <- function(x, conf){ len.x <- length(x) se <- sd(x) / sqrt(len.x) alpha <- 1-conf mean(x) + se * qt(c(alpha / 2, 1 - alpha / 2), df = len.x-1)}data <- c(165, 170, 175, 180, 185)cal_mean_ci(data, 0.95)
[1] 165.1838 184.8162cal_mean_ci <- function(x, conf = 0.95){ len.x <- length(x) se <- sd(x) / sqrt(len.x) alpha <- 1-conf mean(x) + se * qt(c(alpha / 2, 1 - alpha / 2), df = len.x-1)}cal_mean_ci(data)
[1] 165.1838 184.8162cal_mean_ci(data, 0.99)
[1] 158.7221 191.2779Control the flow of the execution.
Common ones include:
if, else
for
while
repeat
break
next
switch
if (condition) { # do something} else { # do something else}
Example
test_even_odd <- function(x){ if (x %% 2 == 0){ print("even number") } else { print("odd number") }}
test_even_odd(5)
[1] "odd number"test_even_odd(6)
[1] "even number"ifelse(condition, if TRUE the output, if FALSE the output)
Example
test_even_odd_v2 <- function(x){ ifelse(x %% 2 == 0, "even number", "odd number")}test_even_odd_v2(5)
FALSE [1] "odd number"test_even_odd_v2(c(1,6))
FALSE [1] "odd number" "even number"if, else and ifelsetest_even_odd <- function(x){ if (x %% 2 == 0) { print("even number") } else { print("odd number") }}test_even_odd(5)
FALSE [1] "odd number"test_even_odd(c(1,6))
FALSE Warning in if (x%%2 == 0) {: the condition has length > 1 and only the firstFALSE element will be usedFALSE [1] "odd number"test_even_odd_v2 <- function(x){ ifelse (x %% 2 == 0, "even number", "odd number")}test_even_odd_v2(5)
FALSE [1] "odd number"test_even_odd_v2(c(1,6))
FALSE [1] "odd number" "even number"grade_marks <- function(marks){ if (marks < 20) { "D" } else if (marks <= 50) { "C" } else if (marks <= 60) { "B" } else { "A" }}grade_marks(75)
[1] "A"R for Data Science-Exercises 9.4.4 - Q2
Help:
lubridate::now() and lubridate::hour()
10:00
for loopfor (i in 1:5) { print(i*100)}
[1] 100[1] 200[1] 300[1] 400[1] 500continents <- c("Asia", "EU", "AUS", "NA", "SA", "Africa")for (i in continents) { print(continents[i])}for (i in 1:4) { print(continents[i])}for (i in seq(continents)) { print(continents[i])}for (i in 1:4) print(continents[i])
## [1] "Asia"## [1] "EU"## [1] "AUS"## [1] "NA"## [1] "SA"## [1] "Africa"mat <- matrix(1:6, ncol=2)mat
[,1] [,2][1,] 1 4[2,] 2 5[3,] 3 6for (i in 1:3) { for (j in 1:2) { print(mat[i, j]) }}
[1] 1[1] 4[1] 2[1] 5[1] 3[1] 6Write a function to count the number of even numbers in a vector.
08:00
i <- 1 # initial valuewhile (i < 10) { print(i) i <- i + 1 # increment}
[1] 1[1] 2[1] 3[1] 4[1] 5[1] 6[1] 7[1] 8[1] 9Print the first n numbers of the Fibonacci Sequence.
0, 1, 1, 2, 3, 5, 8....

Iterate over a block of code multiple number of times.
No condition check in repeat loop to exit the loop.
The only way to exit a repeat loop is to call break.
Example 1
x <- 5repeat { print(x) x = x+1 if (x == 10){ break }}
[1] 5[1] 6[1] 7[1] 8[1] 9Iterate over a block of code multiple number of times.
No condition check in repeat loop to exit the loop.
The only way to exit a repeat loop is to call break.
Example 1
x <- 5repeat { print(x) x = x+1 if (x == 10){ break }}
[1] 5[1] 6[1] 7[1] 8[1] 9Example 2
set.seed(1)repeat { x<-runif(1, 5, 10) print(x) if(x < 6.1){ break }}
[1] 6.327543[1] 6.860619[1] 7.864267[1] 9.541039[1] 6.00841for(i in 1:10) { if(i <= 5) { next # Skip the first 5 iterations }print(i)}
[1] 6[1] 7[1] 8[1] 9[1] 10When you want a function to do different things in different circumstances, then the switch function can be useful.
feelings <- c("sad", "afraid")for (i in feelings){ print( switch(i, happy = "I am glad you are happy", afraid = "There is nothing to fear", sad = "Cheer up", angry = "Calm down now" ))}
[1] "Cheer up"[1] "There is nothing to fear"Keyboard shortcuts
| ↑, ←, Pg Up, k | Go to previous slide |
| ↓, →, Pg Dn, Space, j | Go to next slide |
| Home | Go to first slide |
| End | Go to last slide |
| Number + Return | Go to specific slide |
| b / m / f | Toggle blackout / mirrored / fullscreen mode |
| c | Clone slideshow |
| p | Toggle presenter mode |
| s | Start & Stop the presentation timer |
| t | Reset the presentation timer |
| ?, h | Toggle this help |
| Esc | Back to slideshow |