library(mozzie)data(mozzie)head(mozzie, 2)
ID Year Week Colombo Gampaha Kalutara Kandy Matale Nuwara Eliya Galle1 1 2008 52 15 7 1 11 4 0 02 2 2009 1 44 23 5 16 21 2 0 Hambantota Matara Jaffna Kilinochchi Mannar Vavuniya Mulative Batticalo1 6 22 0 0 8 0 0 12 5 18 1 0 0 0 0 0 Ampara Trincomalee Kurunagala Puttalam Anuradhapura Polonnaruwa Badulla1 0 0 2 1 2 0 12 1 1 10 5 0 0 1 Monaragala Ratnapura Kegalle1 1 2 162 0 1 25
library(mozzie)data(mozzie)head(mozzie, 2)
ID Year Week Colombo Gampaha Kalutara Kandy Matale Nuwara Eliya Galle1 1 2008 52 15 7 1 11 4 0 02 2 2009 1 44 23 5 16 21 2 0 Hambantota Matara Jaffna Kilinochchi Mannar Vavuniya Mulative Batticalo1 6 22 0 0 8 0 0 12 5 18 1 0 0 0 0 0 Ampara Trincomalee Kurunagala Puttalam Anuradhapura Polonnaruwa Badulla1 0 0 2 1 2 0 12 1 1 10 5 0 0 1 Monaragala Ratnapura Kegalle1 1 2 162 0 1 25
Use Min-Max transformation to rescale all the districts variables onto 0-1 range.
Min-Max transformation is xi−min(x)max(x)−min(x) where x=(x1,x2,...xn).
mozzie
# Colombo districtminmax.colombo <- (mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263
mozzie
# Colombo districtminmax.colombo <- (mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263
# Gampaha districtminmax.gampaha <- (mozzie$Gampaha - min(mozzie$Gampaha, na.rm = TRUE)) / (max(mozzie$Gampaha, na.rm = TRUE) - min(mozzie$Gampaha, na.rm = TRUE))head(minmax.gampaha)
[1] 0.02734375 0.08984375 0.07421875 0.08984375 0.09375000 0.06640625
mozzie
# Colombo districtminmax.colombo <- (mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263
# Gampaha districtminmax.gampaha <- (mozzie$Gampaha - min(mozzie$Gampaha, na.rm = TRUE)) / (max(mozzie$Gampaha, na.rm = TRUE) - min(mozzie$Gampaha, na.rm = TRUE))head(minmax.gampaha)
[1] 0.02734375 0.08984375 0.07421875 0.08984375 0.09375000 0.06640625
# Kalutara districtminmax.kalutara <- (mozzie$Gampaha - min(mozzie$Kalutara, na.rm = TRUE)) / (max(mozzie$Kalutara, na.rm = TRUE) - min(mozzie$Kalutara, na.rm = TRUE))head(minmax.kalutara)
[1] 0.09333333 0.30666667 0.25333333 0.30666667 0.32000000 0.22666667
mozzie
# Colombo districtminmax.colombo <- (mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263
# Gampaha districtminmax.gampaha <- (mozzie$Gampaha - min(mozzie$Gampaha, na.rm = TRUE)) / (max(mozzie$Gampaha, na.rm = TRUE) - min(mozzie$Gampaha, na.rm = TRUE))head(minmax.gampaha)
[1] 0.02734375 0.08984375 0.07421875 0.08984375 0.09375000 0.06640625
# Kalutara districtminmax.kalutara <- (mozzie$Gampaha - min(mozzie$Kalutara, na.rm = TRUE)) / (max(mozzie$Kalutara, na.rm = TRUE) - min(mozzie$Kalutara, na.rm = TRUE))head(minmax.kalutara)
[1] 0.09333333 0.30666667 0.25333333 0.30666667 0.32000000 0.22666667
Very easily made errors when copying-and-pasting the codes.
A mistake copied becomes a mistake repeated.
Whenever you need to copy and paste a block of codes many times.
If you don't find a suitable built-in function to serve your purpose, you can write your own function.
To share your work with others.
rescale_minmax
rescale_minmax
rescale_minmax <-
rescale_minmax
rescale_minmax <-
rescale_minmax <- function() # Arguments/inputs should be defined inside ()
rescale_minmax
rescale_minmax <-
rescale_minmax <- function() # Arguments/inputs should be defined inside ()
rescale_minmax <- function(){# Task# output}
Find all the inputs that correspond to a given function output?
# Colombo district(mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))
Find all the inputs that correspond to a given function output?
# Colombo district(mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))
Re-write the code with general names
x <- mozzie$Colombo(x - min(x, na.rm = TRUE)) / (max(x, na.rm=TRUE) - min(x, na.rm=TRUE))
Find all the inputs that correspond to a given function output?
# Colombo district(mozzie$Colombo - min(mozzie$Colombo, na.rm = TRUE)) / (max(mozzie$Colombo, na.rm=TRUE) - min(mozzie$Colombo, na.rm=TRUE))
Re-write the code with general names
x <- mozzie$Colombo(x - min(x, na.rm = TRUE)) / (max(x, na.rm=TRUE) - min(x, na.rm=TRUE))
Remove duplication/ Make your code efficient and readable
rng <- range(x, na.rm = TRUE)rng
[1] 0 475
rng <- range(x, na.rm = TRUE)(x - rng[1]) / (rng[2] - rng[1])
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) out.rescaled <- (x - rng[1]) / (rng[2] - rng[1]) out.rescaled}
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) out.rescaled <- (x - rng[1]) / (rng[2] - rng[1]) out.rescaled}
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) out.rescaled <- (x - rng[1]) / (rng[2] - rng[1]) return(out.rescaled)}
In this situation Type A is the best.
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax(c(1, 200, 250, 80, NA))
[1] 0.0000000 0.7991968 1.0000000 0.3172691 NA
minmax.colombo <- rescale_minmax(mozzie$Colombo)head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE) (x - rng[1]) / (rng[2] - rng[1])}
rescale_minmax(c(1, 200, 250, 80, NA))
[1] 0.0000000 0.7991968 1.0000000 0.3172691 NA
minmax.colombo <- rescale_minmax(mozzie$Colombo)head(minmax.colombo)
[1] 0.03157895 0.09263158 0.08210526 0.12000000 0.11157895 0.06105263
minmax.gampaha <- rescale_minmax(mozzie$Gampaha)minmax.kalutara <- rescale_minmax(mozzie$Kalutara)
new.data.col <- c(400, 500, 350, 250, 60, 70, Inf)rescale_minmax(new.data.col)
[1] 0 0 0 0 0 0 NaN
new.data.col <- c(400, 500, 350, 250, 60, 70, Inf)rescale_minmax(new.data.col)
[1] 0 0 0 0 0 0 NaN
rescale_minmax <- function(x){ rng <- range(x, na.rm = TRUE, finite=TRUE) (x - rng[1]) / (rng[2] - rng[1])}
new.data.col <- c(400, 500, 350, 250, 60, 70, Inf)rescale_minmax(new.data.col)
[1] 0.77272727 1.00000000 0.65909091 0.43181818 0.00000000 0.02272727 Inf
Rewrite rescale_minmax
so that -Inf
is set to 0, and Inf
is mapped to 1.
04:00
R for Data Science - Exercise 19.2.1, Question 3
04:00
R for Data Science - Exercise 19.2.1, Question 4
10:00
Descriptive names for variables.
Comment your code.
Write your own function to calculate parameter estimates of simple linear regression model.
Help: ^β=(XTX)−1XTY
05:00
Write a function to calculate confidence intervals for mean. ¯x±tα/2,(n−1)s√(n)
10:00
cal_mean_ci <- function(x, conf){ len.x <- length(x) se <- sd(x) / sqrt(len.x) alpha <- 1-conf mean(x) + se * qt(c(alpha / 2, 1 - alpha / 2), df = len.x-1)}data <- c(165, 170, 175, 180, 185)cal_mean_ci(data, 0.95)
[1] 165.1838 184.8162
cal_mean_ci <- function(x, conf){ len.x <- length(x) se <- sd(x) / sqrt(len.x) alpha <- 1-conf mean(x) + se * qt(c(alpha / 2, 1 - alpha / 2), df = len.x-1)}data <- c(165, 170, 175, 180, 185)cal_mean_ci(data, 0.95)
[1] 165.1838 184.8162
cal_mean_ci <- function(x, conf = 0.95){ len.x <- length(x) se <- sd(x) / sqrt(len.x) alpha <- 1-conf mean(x) + se * qt(c(alpha / 2, 1 - alpha / 2), df = len.x-1)}cal_mean_ci(data)
[1] 165.1838 184.8162
cal_mean_ci(data, 0.99)
[1] 158.7221 191.2779
Control the flow of the execution.
Common ones include:
if, else
for
while
repeat
break
next
switch
if (condition) { # do something} else { # do something else}
Example
test_even_odd <- function(x){ if (x %% 2 == 0){ print("even number") } else { print("odd number") }}
test_even_odd(5)
[1] "odd number"
test_even_odd(6)
[1] "even number"
ifelse(condition, if TRUE the output, if FALSE the output)
Example
test_even_odd_v2 <- function(x){ ifelse(x %% 2 == 0, "even number", "odd number")}test_even_odd_v2(5)
FALSE [1] "odd number"
test_even_odd_v2(c(1,6))
FALSE [1] "odd number" "even number"
if, else
and ifelse
test_even_odd <- function(x){ if (x %% 2 == 0) { print("even number") } else { print("odd number") }}test_even_odd(5)
FALSE [1] "odd number"
test_even_odd(c(1,6))
FALSE Warning in if (x%%2 == 0) {: the condition has length > 1 and only the firstFALSE element will be used
FALSE [1] "odd number"
test_even_odd_v2 <- function(x){ ifelse (x %% 2 == 0, "even number", "odd number")}test_even_odd_v2(5)
FALSE [1] "odd number"
test_even_odd_v2(c(1,6))
FALSE [1] "odd number" "even number"
grade_marks <- function(marks){ if (marks < 20) { "D" } else if (marks <= 50) { "C" } else if (marks <= 60) { "B" } else { "A" }}grade_marks(75)
[1] "A"
R for Data Science-Exercises 9.4.4 - Q2
Help:
lubridate::now()
and lubridate::hour()
10:00
for
loopfor (i in 1:5) { print(i*100)}
[1] 100[1] 200[1] 300[1] 400[1] 500
continents <- c("Asia", "EU", "AUS", "NA", "SA", "Africa")for (i in continents) { print(continents[i])}for (i in 1:4) { print(continents[i])}for (i in seq(continents)) { print(continents[i])}for (i in 1:4) print(continents[i])
## [1] "Asia"## [1] "EU"## [1] "AUS"## [1] "NA"## [1] "SA"## [1] "Africa"
mat <- matrix(1:6, ncol=2)mat
[,1] [,2][1,] 1 4[2,] 2 5[3,] 3 6
for (i in 1:3) { for (j in 1:2) { print(mat[i, j]) }}
[1] 1[1] 4[1] 2[1] 5[1] 3[1] 6
Write a function to count the number of even numbers in a vector.
08:00
i <- 1 # initial valuewhile (i < 10) { print(i) i <- i + 1 # increment}
[1] 1[1] 2[1] 3[1] 4[1] 5[1] 6[1] 7[1] 8[1] 9
Print the first n
numbers of the Fibonacci Sequence.
0, 1, 1, 2, 3, 5, 8....
Iterate over a block of code multiple number of times.
No condition check in repeat loop to exit the loop.
The only way to exit a repeat loop is to call break.
Example 1
x <- 5repeat { print(x) x = x+1 if (x == 10){ break }}
[1] 5[1] 6[1] 7[1] 8[1] 9
Iterate over a block of code multiple number of times.
No condition check in repeat loop to exit the loop.
The only way to exit a repeat loop is to call break.
Example 1
x <- 5repeat { print(x) x = x+1 if (x == 10){ break }}
[1] 5[1] 6[1] 7[1] 8[1] 9
Example 2
set.seed(1)repeat { x<-runif(1, 5, 10) print(x) if(x < 6.1){ break }}
[1] 6.327543[1] 6.860619[1] 7.864267[1] 9.541039[1] 6.00841
for(i in 1:10) { if(i <= 5) { next # Skip the first 5 iterations }print(i)}
[1] 6[1] 7[1] 8[1] 9[1] 10
When you want a function to do different things in different circumstances, then the switch function can be useful.
feelings <- c("sad", "afraid")for (i in feelings){ print( switch(i, happy = "I am glad you are happy", afraid = "There is nothing to fear", sad = "Cheer up", angry = "Calm down now" ))}
[1] "Cheer up"[1] "There is nothing to fear"
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
s | Start & Stop the presentation timer |
t | Reset the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |