-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
68 lines (59 loc) · 2.74 KB
/
run_analysis.R
File metadata and controls
68 lines (59 loc) · 2.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
library(dplyr)
# download and unzip raw data if necessary
url <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
file <- "Dataset.zip"
dir <- "UCI HAR Dataset"
if (!file.exists(file)) {
download.file(url, destfile=file, method="curl")
}
if (!file.exists(dir)) {
unzip(file)
}
# build vector with NULL values for all columns
# that should not be read from data_x features
cols_3d <- c(rep("numeric",6),rep("NULL",34)) # 40
cols_3df <- c(rep("numeric",6),rep("NULL",73)) # 79
cols_2d <- c(rep("numeric",2),rep("NULL",11)) # 13
cols <- c(rep(cols_3d,5), # 200
rep(cols_2d,5), # 265
rep(cols_3df,3), # 502
rep(cols_2d,4), # 554
rep("NULL", 7)) # 561
# same for the column labels
cols_labels <- cols == "numeric"
# read and merge subjects
subjects <- read.csv(paste(dir, "test/subject_test.txt", sep="/"), sep="", header=FALSE)
subjects <- rbind(subjects,read.csv(paste(dir, "train/subject_train.txt", sep="/"), sep="", header=FALSE))
names(subjects) <- "subject"
# read and merge features
data_x <- read.csv(paste(dir, "test/X_test.txt", sep="/"), sep="", header=FALSE, colClasses=cols)
data_y <- read.csv(paste(dir, "test/y_test.txt", sep="/"), sep="", header=FALSE)
data_x <- rbind(data_x, read.csv(paste(dir, "train/X_train.txt", sep="/"), sep="", header=FALSE, colClasses=cols))
data_y <- rbind(data_y, read.csv(paste(dir, "train/y_train.txt", sep="/"), sep="", header=FALSE))
# read, fix and set column labels (features)
feats <- read.csv(paste(dir, "features.txt", sep="/"),
sep="", header=FALSE, colClasses=c("NULL","character"))
features_fixed <- gsub("\\(\\)", "",feats[cols_labels,1])
features_fixed <- gsub("BodyBody", "Body", features_fixed)
features_fixed <- gsub("^t", "Time", features_fixed)
features_fixed <- gsub("^f", "Frequency", features_fixed)
features_fixed <- gsub("-mean", "Mean", features_fixed)
features_fixed <- gsub("-std", "Std", features_fixed)
features_fixed <- gsub("-X", "X", features_fixed)
features_fixed <- gsub("-Y", "Y", features_fixed)
features_fixed <- gsub("-Z", "Z", features_fixed)
names(data_x) <- features_fixed
names(data_y) <- "activitynr"
# merge subjects, data_x and data_y
data <- cbind(subjects, data_y, data_x)
# read and merge activities, delete activitynr
activities <- read.csv(paste(dir, "activity_labels.txt", sep="/"), sep="", header=FALSE, col.names=c("nr", "activity"))
data <- merge(data, activities, by.x="activitynr", by.y="nr")
data$activitynr <- NULL
# group by subject & activity, calculate mean of other cols
# (using dplyr & function chaining)
result <- data %>%
group_by(subject,activity) %>%
summarise_each(funs(mean))
# write result to file result.txt
write.table(result, "result.txt", row.name=FALSE)