빅데이터 시각화 5주차 강의

getwd()

‘C:/Users/MyCom/R/수업자료’

setwd("C:/Users/MyCom/R/수업자료/data")
students<-read.table("C:/Users/MyCom/R/수업자료/data/r2.txt",sep=" ",header=T,encoding = "UTF-8")

str(students)
'data.frame':	5 obs. of  1 variable:
 $ name.korean.english.math: Factor w/ 5 levels "강서준\t100\t90\t100",..: 1 2 3 4 5
students
name.korean.english.math
강서준 100 90 100
김도형 90 100 80
박정원 90 95 90
이상훈 100 85 95
최건우 85 100 100
studentss <- read.csv("C:/Users/MyCom/R/수업자료/data/students.csv",fileEncoding = "UCS-2LE")

studentss
namekoreanenglishmath
강서준100 90 100
김도형 90 120 80
박정원 90 95 90
이상훈100 85 95
최건우 85 120 -100

한글 깨짐 해결방안

  • fileEncoding = “UCS-2LE” 을 read.csv()함수에 넣어주기
#큰따음표 
students<-read.table("C:/Users/MyCom/R/수업자료/data/r2.txt",header=T,as.is=T,encoding = "UTF-8")
write.table(students,file = "C:/Users/MyCom/R/수업자료/data/output.txt")

Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, : line 3 did not have 4 elements
Traceback:


1. read.table("C:/Users/MyCom/R/수업자료/data/r2.txt", header = T, 
 .     as.is = T, encoding = "UTF-8")

2. scan(file = file, what = what, sep = sep, quote = quote, dec = dec, 
 .     nmax = nrows, skip = 0, na.strings = na.strings, quiet = TRUE, 
 .     fill = fill, strip.white = strip.white, blank.lines.skip = blank.lines.skip, 
 .     multi.line = FALSE, comment.char = comment.char, allowEscapes = allowEscapes, 
 .     flush = flush, encoding = encoding, skipNul = skipNul)
#따음표 제거
write.table(students,file = "C:/Users/MyCom/R/수업자료/data/output.txt",quote=F)
#CSV파일로 저장
write.csv(students,file="C:/Users/MyCom/R/수업자료/data/output.csv",quote=F)
#조건문 [] 행/열 조건 명시 
#벡터의 경우
test =c(15,20,30,NA,45)
test[test<40]
test[test%%3!=0]

<ol class=list-inline> <li>15</li> <li>20</li> <li>30</li> <li><NA></li> </ol>

<ol class=list-inline> <li>20</li> <li><NA></li> </ol>

test[is.na(test)] #na인 요소 추출
test[!is.na(test)]
test[test%%2==0&!is.na(test)]

<NA>

<ol class=list-inline> <li>15</li> <li>20</li> <li>30</li> <li>45</li> </ol>

<ol class=list-inline> <li>20</li> <li>30</li> </ol>

# 조건문 [] 행/열 조건 명시
# 데이터 프레임의 경우
characters=data.frame(name=c("길동","춘향","철수"),age=c(30,16,21),gender=factor(c("M","F","M")))
characters

nameagegender
길동30 M
춘향16 F
철수21 M
characters[characters$gender=="F",]
nameagegender
2춘향16 F
characters[characters$age<30&characters$gender=="M",]
nameagegender
3철수21 M
#조건문 if문 사용
x = 5 
if(x%%2==0){
 print('x는 짝수') 
} else{
  print('x는 홀수')
}  

[1] "x는 홀수"
x=-1
if(x>0){
  print('x is a positive value')  
}else if(x<0){
  print('x is a negative value')
}else{
  print('x is zero')
}
[1] "x is a negative value"
#조건문 ifelse문 사용
x=c(-5:5)
options(digits=3) #숫자 표현시 유효자릿수를 3자리로 설정
sqrt(x)

Warning message in sqrt(x):
"NaN이 생성되었습니다"

<ol class=list-inline> <li>NaN</li> <li>NaN</li> <li>NaN</li> <li>NaN</li> <li>NaN</li> <li>0</li> <li>1</li> <li>1.4142135623731</li> <li>1.73205080756888</li> <li>2</li> <li>2.23606797749979</li> </ol>

sqrt(ifelse(x>=0, x, NA)) #NaN이 발생하지 않게 음수면 NA로 표시 

<ol class=list-inline> <li><NA></li> <li><NA></li> <li><NA></li> <li><NA></li> <li><NA></li> <li>0</li> <li>1</li> <li>1.4142135623731</li> <li>1.73205080756888</li> <li>2</li> <li>2.23606797749979</li> </ol>

#조건문 ifelse문 사용
students =read.csv("C:/Users/MyCom/R/수업자료/data/students.csv",fileEncoding = "UCS-2LE")
students
namekoreanenglishmath
강서준100 90 100
김도형 90 120 80
박정원 90 95 90
이상훈100 85 95
최건우 85 120 -100
students[,2]=ifelse(students[,2]>=0&students[,2]<=100,students[,2],NA)
students[,3]=ifelse(students[,3]>=0&students[,3]<=100,students[,3],NA)
students[,4]=ifelse(students[,4]>=0&students[,4]<=100,students[,4],NA)
students

namekoreanenglishmath
강서준100 90 100
김도형 90 NA 80
박정원 90 95 90
이상훈100 85 95
최건우 85 NA NA
#반복문 repeat{}
i=1
repeat {
  if(i>10){
    break
  } else{
    print(i)
    i=i+1
  }
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
#while문 사용 
i=1
while(i<11){
  print(i)
  i = i+1
}


[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
#구구단 2단 
i=1
while(i<10){
  print(paste(2,"x",i,"=",2*i))
  i = i+1
}
[1] "2 x 1 = 2"
[1] "2 x 2 = 4"
[1] "2 x 3 = 6"
[1] "2 x 4 = 8"
[1] "2 x 5 = 10"
[1] "2 x 6 = 12"
[1] "2 x 7 = 14"
[1] "2 x 8 = 16"
[1] "2 x 9 = 18"
#for

for(i in 1:10){
  print(i)
}
[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
# for 문 구구단
for(i in 2:9){
  for(j in 1:9){
    print(paste(i,"X",j,"=",i*j))
  }
}
[1] "2 X 1 = 2"
[1] "2 X 2 = 4"
[1] "2 X 3 = 6"
[1] "2 X 4 = 8"
[1] "2 X 5 = 10"
[1] "2 X 6 = 12"
[1] "2 X 7 = 14"
[1] "2 X 8 = 16"
[1] "2 X 9 = 18"
[1] "3 X 1 = 3"
[1] "3 X 2 = 6"
[1] "3 X 3 = 9"
[1] "3 X 4 = 12"
[1] "3 X 5 = 15"
[1] "3 X 6 = 18"
[1] "3 X 7 = 21"
[1] "3 X 8 = 24"
[1] "3 X 9 = 27"
[1] "4 X 1 = 4"
[1] "4 X 2 = 8"
[1] "4 X 3 = 12"
[1] "4 X 4 = 16"
[1] "4 X 5 = 20"
[1] "4 X 6 = 24"
[1] "4 X 7 = 28"
[1] "4 X 8 = 32"
[1] "4 X 9 = 36"
[1] "5 X 1 = 5"
[1] "5 X 2 = 10"
[1] "5 X 3 = 15"
[1] "5 X 4 = 20"
[1] "5 X 5 = 25"
[1] "5 X 6 = 30"
[1] "5 X 7 = 35"
[1] "5 X 8 = 40"
[1] "5 X 9 = 45"
[1] "6 X 1 = 6"
[1] "6 X 2 = 12"
[1] "6 X 3 = 18"
[1] "6 X 4 = 24"
[1] "6 X 5 = 30"
[1] "6 X 6 = 36"
[1] "6 X 7 = 42"
[1] "6 X 8 = 48"
[1] "6 X 9 = 54"
[1] "7 X 1 = 7"
[1] "7 X 2 = 14"
[1] "7 X 3 = 21"
[1] "7 X 4 = 28"
[1] "7 X 5 = 35"
[1] "7 X 6 = 42"
[1] "7 X 7 = 49"
[1] "7 X 8 = 56"
[1] "7 X 9 = 63"
[1] "8 X 1 = 8"
[1] "8 X 2 = 16"
[1] "8 X 3 = 24"
[1] "8 X 4 = 32"
[1] "8 X 5 = 40"
[1] "8 X 6 = 48"
[1] "8 X 7 = 56"
[1] "8 X 8 = 64"
[1] "8 X 9 = 72"
[1] "9 X 1 = 9"
[1] "9 X 2 = 18"
[1] "9 X 3 = 27"
[1] "9 X 4 = 36"
[1] "9 X 5 = 45"
[1] "9 X 6 = 54"
[1] "9 X 7 = 63"
[1] "9 X 8 = 72"
[1] "9 X 9 = 81"
#1~10까지 수 중 짝수만 출력
for(i in 1:10){
  if(i%%2==0){
      print(i) 
   }
  }

[1] 2
[1] 4
[1] 6
[1] 8
[1] 10
#1~10까지 수 중 홀수만 출력
for(i in 1:10){
  if(i%%2!=0){
    print(i) 
  }
}
[1] 1
[1] 3
[1] 5
[1] 7
[1] 9
#1~10 까지 수 중 소수 출력하기 
for(i in 1:10){
  check=0
  for(j in 1:i){
    if(i%%j==0){
      check=check+1
    }
}
  if(check==2){
    print(i)
  }
}
[1] 2
[1] 3
[1] 5
[1] 7
# for문으로 NA처리
students=read.csv("C:/Users/MyCom/R/수업자료/data/students.csv",fileEncoding = "UCS-2LE")
students
namekoreanenglishmath
강서준100 90 100
김도형 90 120 80
박정원 90 95 90
이상훈100 85 95
최건우 85 120 -100
for(i in 2:4){
  students[,i]=ifelse(students[,i]>=0 & students[,i]<=100,students[,i],NA)
}
students
namekoreanenglishmath
강서준100 90 100
김도형 90 NA 80
박정원 90 95 90
이상훈100 85 95
최건우 85 NA NA
#함수 
fact = function(x){
  fa=1
  while (x>1) {
    fa = fa*x
    x=x-1
    
  }
  return(fa)
}

fact(5)

120