티스토리 뷰
1. 데이터프레임 생성
df <- data.frame(class = c(1, 2, 3, 4),
+ korean = c(81, 79, 85, 83),
+ english = c(82, 86, 83, 87),
+ math = c(78, 79, 81, 80))
> df
class korean english math
1 1 81 82 78
2 2 79 86 79
3 3 85 83 81
4 4 83 87 80
1.1 데이터 저장
write.csv()
write.csv(df, "class_mean.csv")
write.csv(df, "class_mean.csv") #write.csv(데이터이름, "파일명.csv")
2. 내장 데이터셋 읽어오기
> data("iris")
> write.csv(iris, "iris2.csv" )
> setwd("C:/Users/USER/Dropbox/R/Mybook/data")
> write.csv(iris, "iris2.csv" )

2.2 파일불러오기
### 1. csv파일 불러오기, 별도의 패키지 없이 내장 함수 사용(p202)
> read.csv("class_mean.csv") #단순히 읽음
X class korean english math
1 1 1 81 82 78
2 2 2 79 86 79
3 3 3 85 83 81
4 4 4 83 87 80
>
> data <- read.csv("class_mean.csv")
> str(data)
'data.frame': 4 obs. of 5 variables:
$ X : int 1 2 3 4
$ class : int 1 2 3 4
$ korean : int 81 79 85 83
$ english: int 82 86 83 87
$ math : int 78 79 81 80
> View(data)

2.엑셀파일 불러오기
> install.packages("gapminder")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/USER/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/gapminder_0.3.0.zip'
Content type 'application/zip' length 2031907 bytes (1.9 MB)
downloaded 1.9 MB
package ‘gapminder’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in
C:\Users\USER\AppData\Local\Temp\RtmpmohEY4\downloaded_packages
install.packages("gapminder")
library(gapminder)
?gapminder

> gapminder <- as.data.frame(gapminder)
> head(gapminder)
country continent year lifeExp pop gdpPercap
1 Afghanistan Asia 1952 28.801 8425333 779.4453
2 Afghanistan Asia 1957 30.332 9240934 820.8530
3 Afghanistan Asia 1962 31.997 10267083 853.1007
4 Afghanistan Asia 1967 34.020 11537966 836.1971
5 Afghanistan Asia 1972 36.088 13079460 739.9811
6 Afghanistan Asia 1977 38.438 14880372 786.1134
> tail(gapminder)
country continent year lifeExp pop gdpPercap
1699 Zimbabwe Africa 1982 60.363 7636524 788.8550
1700 Zimbabwe Africa 1987 62.351 9216418 706.1573
1701 Zimbabwe Africa 1992 60.377 10704340 693.4208
1702 Zimbabwe Africa 1997 46.809 11404948 792.4500
1703 Zimbabwe Africa 2002 39.989 11926563 672.0386
1704 Zimbabwe Africa 2007 43.487 12311143 469.7093
1. 데이터 추출
> data(iris)
> colnames(iris)
[1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
iris["Species"]=="setosa"
Species
[1,] TRUE
[2,] TRUE
[3,] TRUE
[4,] TRUE
[5,] TRUE
[6,] TRUE
[7,] TRUE
[8,] TRUE
[9,] TRUE
[10,] TRUE
#library dplyr
library(dplyr)
> filter(iris, Species=="setosa")
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
7 4.6 3.4 1.4 0.3 setosa
8 5.0 3.4 1.5 0.2 setosa
9 4.4 2.9 1.4 0.2 setosa
10 4.9 3.1 1.5 0.1 setosa
> data <- filter(iris, Species=="setosa")
> dim(data)
[1] 50 5
> summary(data)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
Min. :4.300 Min. :2.300 Min. :1.000 Min. :0.100 setosa :50
1st Qu.:4.800 1st Qu.:3.200 1st Qu.:1.400 1st Qu.:0.200 versicolor: 0
Median :5.000 Median :3.400 Median :1.500 Median :0.200 virginica : 0
Mean :5.006 Mean :3.428 Mean :1.462 Mean :0.246
3rd Qu.:5.200 3rd Qu.:3.675 3rd Qu.:1.575 3rd Qu.:0.300
Max. :5.800 Max. :4.400 Max. :1.900 Max. :0.600
예 : Petal 길이가 평균값 이하인 붓꽃
> summary(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100 setosa :50
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300 versicolor:50
Median :5.800 Median :3.000 Median :4.350 Median :1.300 virginica :50
Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
> data_1 <- filter(iris, Petal.Length<=3.765)
> dim(data_1)
[1] 57 5
> summary(data_1)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.1000 setosa :50
1st Qu.:4.800 1st Qu.:3.000 1st Qu.:1.400 1st Qu.:0.2000 versicolor: 7
Median :5.000 Median :3.400 Median :1.500 Median :0.2000 virginica : 0
Mean :5.037 Mean :3.307 Mean :1.702 Mean :0.3456
3rd Qu.:5.200 3rd Qu.:3.600 3rd Qu.:1.600 3rd Qu.:0.4000
Max. :5.800 Max. :4.400 Max. :3.700 Max. :1.3000
subset()
subset(x, 조건, 열)
#열 작성 않을 시 전체 열
> subset(iris, Species=="setosa")
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
> subset(iris, Petal.Length<=3.76)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 5.1 3.5 1.4 0.2 setosa
2 4.9 3.0 1.4 0.2 setosa
3 4.7 3.2 1.3 0.2 setosa
4 4.6 3.1 1.5 0.2 setosa
5 5.0 3.6 1.4 0.2 setosa
6 5.4 3.9 1.7 0.4 setosa
7 4.6 3.4 1.4 0.3 setosa
8 5.0 3.4 1.5 0.2 setosa
9 4.4 2.9 1.4 0.2 setosa
10 4.9 3.1 1.5 0.1 setosa
3.2 일괄처리하기
ifelse()
> x <- c(1:10)
> x
[1] 1 2 3 4 5 6 7 8 9 10
> ifelse(x%%2==1, T, F)
[1] TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE
> x[ifelse(x%%2==1, T, F)]
[1] 1 3 5 7 9
> iris$"Petal.Mean" <- ifelse(iris$Petal.Length>=3.76, "above", "below")
> head(iris)
Sepal.Length Sepal.Width Petal.Length Petal.Width Species Petal.Mean
1 5.1 3.5 1.4 0.2 setosa below
2 4.9 3.0 1.4 0.2 setosa below
3 4.7 3.2 1.3 0.2 setosa below
4 4.6 3.1 1.5 0.2 setosa below
5 5.0 3.6 1.4 0.2 setosa below
6 5.4 3.9 1.7 0.4 setosa below
apply(x, 1(행)or2(열), 함수)
> set.seed(1)
> exams <- data.frame(ID=c(1:10),
+ ex1=sample(x=60:100, size=10),
+ ex2=sample(x=50:95, size=10),
+ ex3=sample(x=70:99, size=10),
+ ex4=sample(x=63:90, size=10),
+ ex5=sample(x=70:90, size=10))
> exams
ID ex1 ex2 ex3 ex4 ex5
1 1 63 91 71 72 76
2 2 98 59 79 68 88
3 3 60 56 94 77 79
4 4 93 58 81 82 75
5 5 82 64 84 87 83
6 6 73 70 70 74 71
7 7 77 86 89 89 82
8 8 92 74 72 70 85
9 9 80 89 75 85 87
10 10 97 87 98 84 84
> exams$ID <- apply(exams[1], 1, as.factor)
> str(exams$ID)
Factor w/ 10 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10
> apply(exams[2:6],2, max)
ex1 ex2 ex3 ex4 ex5
98 91 98 89 88
> apply(exams[2:6],2, min)
ex1 ex2 ex3 ex4 ex5
60 56 70 68 71
사용자정의 함수
함수이름 <- function(인자1, 인자2...){
함수 동작 시 수행할 코드
return(반환값)
}
myfunc <- function(x){
result <- ifelse(x>=70, "Pass","Fail")
return(result)
}
> myfunc(80)
[1] "Pass"
> myfunc(60)
[1] "Fail"
> apply(exams,2,myfunc)
ID ex1 ex2 ex3 ex4 ex5
[1,] "Fail" "Fail" "Pass" "Pass" "Pass" "Pass"
[2,] "Fail" "Pass" "Fail" "Pass" "Fail" "Pass"
[3,] "Fail" "Fail" "Fail" "Pass" "Pass" "Pass"
[4,] "Fail" "Pass" "Fail" "Pass" "Pass" "Pass"
[5,] "Fail" "Pass" "Fail" "Pass" "Pass" "Pass"
[6,] "Fail" "Pass" "Pass" "Pass" "Pass" "Pass"
[7,] "Fail" "Pass" "Pass" "Pass" "Pass" "Pass"
[8,] "Pass" "Pass" "Pass" "Pass" "Pass" "Pass"
[9,] "Pass" "Pass" "Pass" "Pass" "Pass" "Pass"
[10,] "Fail" "Pass" "Pass" "Pass" "Pass" "Pass"728x90
반응형
공지사항
최근에 올라온 글
최근에 달린 댓글
- Total
- Today
- Yesterday
반응형