빅데이터 시각화 4주차 강의

x <-1 
x

1

y <-2
y

2

# 변경됨
temp =x
x = y
y =temp
y

1

xi = 1+2i
yi =1-2i
xi+yi

2+0i

str = "heel"
str

‘heel’

blood.type=factor(c('a','b','o','ab'))
blood.type

<ol class=list-inline> <li>a</li> <li>b</li> <li>o</li> <li>ab</li> </ol>

<summary style=display:list-item;cursor:pointer> Levels: </summary> <ol class=list-inline>
  • 'a'
  • 'ab'
  • 'b'
  • 'o'
  • </ol>
    #NaN
    
    xinf=Inf
    yinf=-Inf
    xinf/yinf
    

    NaN

    is.integer(1)
    

    FALSE

    is.numeric(1)
    

    TRUE

    is.integer(1L) #정수 형 L붙이기
    

    TRUE

    x=1
    is.integer(x)
    

    FALSE

    x=1L
    is.integer(x) #true
    

    TRUE

    1:7
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>4</li> <li>5</li> <li>6</li> <li>7</li> </ol>

    7:1
    

    <ol class=list-inline> <li>7</li> <li>6</li> <li>5</li> <li>4</li> <li>3</li> <li>2</li> <li>1</li> </ol>

    c(1:5)
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>4</li> <li>5</li> </ol>

    c(1,2,3,c(4:6))
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>4</li> <li>5</li> <li>6</li> </ol>

    x=c(1,2,3)
    x
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> </ol>

    y=c()
    y=c(y,c(1:3))
    y
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> </ol>

    seq(1,10,2)
    

    <ol class=list-inline> <li>1</li> <li>3</li> <li>5</li> <li>7</li> <li>9</li> </ol>

    seq(1,10,by=2)
    

    <ol class=list-inline> <li>1</li> <li>3</li> <li>5</li> <li>7</li> <li>9</li> </ol>

    seq(0,1,by=0.1)
    

    <ol class=list-inline> <li>0</li> <li>0.1</li> <li>0.2</li> <li>0.3</li> <li>0.4</li> <li>0.5</li> <li>0.6</li> <li>0.7</li> <li>0.8</li> <li>0.9</li> <li>1</li> </ol>

    seq(0,1,length.out=11)##11개 값생성
    

    <ol class=list-inline> <li>0</li> <li>0.1</li> <li>0.2</li> <li>0.3</li> <li>0.4</li> <li>0.5</li> <li>0.6</li> <li>0.7</li> <li>0.8</li> <li>0.9</li> <li>1</li> </ol>

    rep(c(1:3),times=2)
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>1</li> <li>2</li> <li>3</li> </ol>

    rep(c(1:3),times=3)
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>1</li> <li>2</li> <li>3</li> <li>1</li> <li>2</li> <li>3</li> </ol>

    rep(c(1:3),each=2)
    

    <ol class=list-inline> <li>1</li> <li>1</li> <li>2</li> <li>2</li> <li>3</li> <li>3</li> </ol>

    x=c(2,4,6,8,10)
    length(x)
    

    5

    x[1]
    

    2

    x[c(1,2,3)]
    

    <ol class=list-inline> <li>2</li> <li>4</li> <li>6</li> </ol>

    x[-c(1,2,3)] #1,2,3 주소값 제거 후 결과 출력력
    

    <ol class=list-inline> <li>8</li> <li>10</li> </ol>

    x[c(1:3)]
    

    <ol class=list-inline> <li>2</li> <li>4</li> <li>6</li> </ol>

    x= c(1,2,3,4)
    y= c(5,6,7,8)
    z=c(3,4)
    w=c(5,6,7)
    
    x+2
    

    <ol class=list-inline> <li>3</li> <li>4</li> <li>5</li> <li>6</li> </ol>

    x+y
    

    <ol class=list-inline> <li>6</li> <li>8</li> <li>10</li> <li>12</li> </ol>

    x+z
    

    <ol class=list-inline> <li>4</li> <li>6</li> <li>6</li> <li>8</li> </ol>

    x+w
    
    Warning message in x + w:
    "longer object length is not a multiple of shorter object length"
    

    <ol class=list-inline> <li>6</li> <li>8</li> <li>10</li> <li>9</li> </ol>

    x=c(1,2,3,4)
    y=c(11,22,33)
    z=c(3,44)
    w=c(44,11,22)
    
    x+2
    x+5
    x+z
    

    <ol class=list-inline> <li>3</li> <li>4</li> <li>5</li> <li>6</li> </ol>

    <ol class=list-inline> <li>6</li> <li>7</li> <li>8</li> <li>9</li> </ol>

    <ol class=list-inline> <li>4</li> <li>46</li> <li>6</li> <li>48</li> </ol>

    x=1:20
    
    x
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>4</li> <li>5</li> <li>6</li> <li>7</li> <li>8</li> <li>9</li> <li>10</li> <li>11</li> <li>12</li> <li>13</li> <li>14</li> <li>15</li> <li>16</li> <li>17</li> <li>18</li> <li>19</li> <li>20</li> </ol>

    x>4
    

    <ol class=list-inline> <li>FALSE</li> <li>FALSE</li> <li>FALSE</li> <li>FALSE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> <li>TRUE</li> </ol>

    all(x>4) #모두 4보다 크나
    

    FALSE

    any(x>4)
    

    TRUE

    a=1:10
    
    a
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>4</li> <li>5</li> <li>6</li> <li>7</li> <li>8</li> <li>9</li> <li>10</li> </ol>

    head(a) #데이터 앞 6개 출력
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>4</li> <li>5</li> <li>6</li> </ol>

    tail(a) #데이터를 뒤에서 6개 출력
    

    <ol class=list-inline> <li>5</li> <li>6</li> <li>7</li> <li>8</li> <li>9</li> <li>10</li> </ol>

    head(a,3)
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> </ol>

    tail(a,3)
    

    <ol class=list-inline> <li>8</li> <li>9</li> <li>10</li> </ol>

    x = c(1,2,3)
    y = c(3,4,5)
    z = c(3,1,2)
    
    union(x,y) #합집합
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>4</li> <li>5</li> </ol>

    intersect(x,y) #교집합
    

    3

    setdiff(x,y) # x에서 y와 동일한 값 제거 
    

    <ol class=list-inline> <li>1</li> <li>2</li> </ol>

    setdiff(y,x)
    

    <ol class=list-inline> <li>4</li> <li>5</li> </ol>

    setequal(x,y)
    

    FALSE

    setequal(x,z)
    

    TRUE

    x = array(1:5,c(2,4))
    
    x
    
    1352
    2413
    x[1,]
    

    <ol class=list-inline> <li>1</li> <li>3</li> <li>5</li> <li>2</li> </ol>

    x[,2]
    

    <ol class=list-inline> <li>3</li> <li>4</li> </ol>

    
    x[,3]
    
    x[,4]
    
    

    <ol class=list-inline> <li>5</li> <li>1</li> </ol>

    <ol class=list-inline> <li>2</li> <li>3</li> </ol>

    x=1:12
    x
    

    <ol class=list-inline> <li>1</li> <li>2</li> <li>3</li> <li>4</li> <li>5</li> <li>6</li> <li>7</li> <li>8</li> <li>9</li> <li>10</li> <li>11</li> <li>12</li> </ol>

    matrix(x,nrow = 3)
    
    1 4 7 10
    2 5 8 11
    3 6 9 12
    matrix(x,nrow = 5)
    
    Warning message in matrix(x, nrow = 5):
    "data length [12] is not a sub-multiple or multiple of the number of rows [5]"
    
    1 611
    2 712
    3 8 1
    4 9 2
    5 10 3
    matrix(x,nrow = 3,byrow = T)
    
    1 2 3 4
    5 6 7 8
    9 101112
    V1=c(1,2,3,4)
    v2=c(5,6,7,8)
    V3=c(9,10,11,12)
    
    cbind(V1,v2,V3)
    
    rbind(V1,v2,V3)
    
    
    V1v2V3
    1 5 9
    2 6 10
    3 7 11
    4 8 12
    V11 2 3 4
    v25 6 7 8
    V39 101112
    a=array(1:4,c(2,2))
    a
    
    y=array(5:8,c(2,2))
    y
    
    13
    24
    57
    68
    a+y
    a-y 
    
    
    6 10
    8 12
    -4-4
    -4-4
    a%*%y #행렬 곱셈
    
    
    2331
    3446
    t(a) #전치행렬
    
    12
    34
    x=array(1:12,c(3,4))
    x
    
    1 4 7 10
    2 5 8 11
    3 6 9 12
    apply(x,1,mean) #행별 평균
    

    <ol class=list-inline> <li>5.5</li> <li>6.5</li> <li>7.5</li> </ol>

    apply(x,2,mean) #열 별 평균
    

    <ol class=list-inline> <li>2</li> <li>5</li> <li>8</li> <li>11</li> </ol>

    x=array(1:12,c(3,4))
    x
    
    1 4 7 10
    2 5 8 11
    3 6 9 12
    sample(x)
    

    <ol class=list-inline> <li>7</li> <li>10</li> <li>11</li> <li>9</li> <li>4</li> <li>2</li> <li>8</li> <li>3</li> <li>6</li> <li>1</li> <li>5</li> <li>12</li> </ol>

    sample(x,10,prob = c(1:12)/24) #확률 달리 해서 추출하기/#가중치를 고려한 표본추출
    

    <ol class=list-inline> <li>11</li> <li>2</li> <li>9</li> <li>4</li> <li>6</li> <li>10</li> <li>8</li> <li>5</li> <li>12</li> <li>7</li> </ol>

    sample(42)
    

    <ol class=list-inline> <li>16</li> <li>2</li> <li>32</li> <li>5</li> <li>30</li> <li>15</li> <li>18</li> <li>13</li> <li>23</li> <li>17</li> <li>10</li> <li>3</li> <li>28</li> <li>26</li> <li>42</li> <li>39</li> <li>31</li> <li>35</li> <li>12</li> <li>14</li> <li>19</li> <li>22</li> <li>1</li> <li>7</li> <li>33</li> <li>41</li> <li>8</li> <li>11</li> <li>21</li> <li>25</li> <li>27</li> <li>24</li> <li>6</li> <li>40</li> <li>36</li> <li>9</li> <li>37</li> <li>38</li> <li>29</li> <li>20</li> <li>4</li> <li>34</li> </ol>

    name=c('철수','춘향','길동')
    age=c(22,20,25)
    gender=factor(c('m','f','m'))
    blood.type=factor(c('A','B','O'))
    patients=data.frame(name,age,gender,blood.type)
    
    
    patients
    
    nameagegenderblood.type
    철수22 m A
    춘향20 f B
    길동25 m O
    patients$name
    

    <ol class=list-inline> <li>철수</li> <li>춘향</li> <li>길동</li> </ol>

    <summary style=display:list-item;cursor:pointer> Levels: </summary> <ol class=list-inline>
  • '길동'
  • '철수'
  • '춘향'
  • </ol>
    patients[1,]
    
    nameagegenderblood.type
    철수22 m A
    patients[,2]
    

    <ol class=list-inline> <li>22</li> <li>20</li> <li>25</li> </ol>

    patients[3,1]
    

    길동

    <summary style=display:list-item;cursor:pointer> Levels: </summary> <ol class=list-inline>
  • '길동'
  • '철수'
  • '춘향'
  • </ol>
    patients[patients$name=="철수",]
    
    nameagegenderblood.type
    철수22 m A
    patients[patients$name=="철수",c("name","age")]
    
    nameage
    철수22
    head(cars)
    
    speeddist
    4 2
    4 10
    7 4
    7 22
    8 16
    9 10
    attach(cars)
    
    speed
    

    <ol class=list-inline> <li>4</li> <li>4</li> <li>7</li> <li>7</li> <li>8</li> <li>9</li> <li>10</li> <li>10</li> <li>10</li> <li>11</li> <li>11</li> <li>12</li> <li>12</li> <li>12</li> <li>12</li> <li>13</li> <li>13</li> <li>13</li> <li>13</li> <li>14</li> <li>14</li> <li>14</li> <li>14</li> <li>15</li> <li>15</li> <li>15</li> <li>16</li> <li>16</li> <li>17</li> <li>17</li> <li>17</li> <li>18</li> <li>18</li> <li>18</li> <li>18</li> <li>19</li> <li>19</li> <li>19</li> <li>20</li> <li>20</li> <li>20</li> <li>20</li> <li>20</li> <li>22</li> <li>23</li> <li>24</li> <li>24</li> <li>24</li> <li>24</li> <li>25</li> </ol>

    dist
    

    <ol class=list-inline> <li>2</li> <li>10</li> <li>4</li> <li>22</li> <li>16</li> <li>10</li> <li>18</li> <li>26</li> <li>34</li> <li>17</li> <li>28</li> <li>14</li> <li>20</li> <li>24</li> <li>28</li> <li>26</li> <li>34</li> <li>34</li> <li>46</li> <li>26</li> <li>36</li> <li>60</li> <li>80</li> <li>20</li> <li>26</li> <li>54</li> <li>32</li> <li>40</li> <li>32</li> <li>40</li> <li>50</li> <li>42</li> <li>56</li> <li>76</li> <li>84</li> <li>36</li> <li>46</li> <li>68</li> <li>32</li> <li>48</li> <li>52</li> <li>56</li> <li>64</li> <li>66</li> <li>54</li> <li>70</li> <li>92</li> <li>93</li> <li>120</li> <li>85</li> </ol>

    str(cars)
    
    'data.frame':	50 obs. of  2 variables:
     $ speed: num  4 4 7 7 8 9 10 10 10 11 ...
     $ dist : num  2 10 4 22 16 10 18 26 34 17 ...
    
    detach(cars)
    
    mean(cars$speed)
    

    15.4

    max(cars$speed)
    

    25

    with(cars,max(speed))
    

    25

    #subset 일부만 추출한다
    
    subset(cars,speed>20)
    
    speeddist
    4422 66
    4523 54
    4624 70
    4724 92
    4824 93
    4924 120
    5025 85
    subset(cars,speed>20,select=c(dist))
    
    dist
    44 66
    45 54
    46 70
    47 92
    48 93
    49120
    50 85
    subset(cars,speed>10,select=-c(speed))#speed 제외
    
    dist
    10 17
    11 28
    12 14
    13 20
    14 24
    15 28
    16 26
    17 34
    18 34
    19 46
    20 26
    21 36
    22 60
    23 80
    24 20
    25 26
    26 54
    27 32
    28 40
    29 32
    30 40
    31 50
    32 42
    33 56
    34 76
    35 84
    36 36
    37 46
    38 68
    39 32
    40 48
    41 52
    42 56
    43 64
    44 66
    45 54
    46 70
    47 92
    48 93
    49120
    50 85
    airquality
    
    OzoneSolar.RWindTempMonthDay
    41 190 7.467 5 1
    36 118 8.072 5 2
    12 149 12.674 5 3
    18 313 11.562 5 4
    NA NA 14.356 5 5
    28 NA 14.966 5 6
    23 299 8.665 5 7
    19 99 13.859 5 8
    8 19 20.161 5 9
    NA 194 8.669 5 10
    7 NA 6.974 5 11
    16 256 9.769 5 12
    11 290 9.266 5 13
    14 274 10.968 5 14
    18 65 13.258 5 15
    14 334 11.564 5 16
    34 307 12.066 5 17
    6 78 18.457 5 18
    30 322 11.568 5 19
    11 44 9.762 5 20
    1 8 9.759 5 21
    11 320 16.673 5 22
    4 25 9.761 5 23
    32 92 12.061 5 24
    NA 66 16.657 5 25
    NA 266 14.958 5 26
    NA NA 8.057 5 27
    23 13 12.067 5 28
    45 252 14.981 5 29
    115 223 5.779 5 30
    ..................
    96 167 6.991 9 1
    78 197 5.192 9 2
    73 183 2.893 9 3
    91 189 4.693 9 4
    47 95 7.487 9 5
    32 92 15.584 9 6
    20 252 10.980 9 7
    23 220 10.378 9 8
    21 230 10.975 9 9
    24 259 9.773 9 10
    44 236 14.981 9 11
    21 259 15.576 9 12
    28 238 6.377 9 13
    9 24 10.971 9 14
    13 112 11.571 9 15
    46 237 6.978 9 16
    18 224 13.867 9 17
    13 27 10.376 9 18
    24 238 10.368 9 19
    16 201 8.082 9 20
    13 238 12.664 9 21
    23 14 9.271 9 22
    36 139 10.381 9 23
    7 49 10.369 9 24
    14 20 16.663 9 25
    30 193 6.970 9 26
    NA 145 13.277 9 27
    14 191 14.375 9 28
    18 131 8.076 9 29
    20 223 11.568 9 30
    head(airquality)
    
    OzoneSolar.RWindTempMonthDay
    41 190 7.467 5 1
    36 118 8.072 5 2
    12 149 12.674 5 3
    18 313 11.562 5 4
    NA NA 14.356 5 5
    28 NA 14.966 5 6
    head(na.omit(airquality)) #na값 제거
    
    OzoneSolar.RWindTempMonthDay
    141 190 7.467 5 1
    236 118 8.072 5 2
    312 149 12.674 5 3
    418 313 11.562 5 4
    723 299 8.665 5 7
    819 99 13.859 5 8
    na.omit(airquality)
    
    OzoneSolar.RWindTempMonthDay
    1 41 190 7.467 5 1
    2 36 118 8.072 5 2
    3 12 149 12.674 5 3
    4 18 313 11.562 5 4
    7 23 299 8.665 5 7
    8 19 99 13.859 5 8
    9 8 19 20.161 5 9
    12 16 256 9.769 5 12
    13 11 290 9.266 5 13
    14 14 274 10.968 5 14
    15 18 65 13.258 5 15
    16 14 334 11.564 5 16
    17 34 307 12.066 5 17
    18 6 78 18.457 5 18
    19 30 322 11.568 5 19
    20 11 44 9.762 5 20
    21 1 8 9.759 5 21
    22 11 320 16.673 5 22
    23 4 25 9.761 5 23
    24 32 92 12.061 5 24
    28 23 13 12.067 5 28
    29 45 252 14.981 5 29
    30115 223 5.779 5 30
    31 37 279 7.476 5 31
    38 29 127 9.782 6 7
    40 71 291 13.890 6 9
    41 39 323 11.587 6 10
    44 23 148 8.082 6 13
    47 21 191 14.977 6 16
    48 37 284 20.772 6 17
    .....................
    12385 188 6.394 8 31
    12496 167 6.991 9 1
    12578 197 5.192 9 2
    12673 183 2.893 9 3
    12791 189 4.693 9 4
    12847 95 7.487 9 5
    12932 92 15.584 9 6
    13020 252 10.980 9 7
    13123 220 10.378 9 8
    13221 230 10.975 9 9
    13324 259 9.773 9 10
    13444 236 14.981 9 11
    13521 259 15.576 9 12
    13628 238 6.377 9 13
    137 9 24 10.971 9 14
    13813 112 11.571 9 15
    13946 237 6.978 9 16
    14018 224 13.867 9 17
    14113 27 10.376 9 18
    14224 238 10.368 9 19
    14316 201 8.082 9 20
    14413 238 12.664 9 21
    14523 14 9.271 9 22
    14636 139 10.381 9 23
    147 7 49 10.369 9 24
    14814 20 16.663 9 25
    14930 193 6.970 9 26
    15114 191 14.375 9 28
    15218 131 8.076 9 29
    15320 223 11.568 9 30
    name=c('철수','춘향','길동')
    age=c(22,20,25)
    gender=factor(c('m','f','m'))
    blood.type=factor(c('A','B','O'))
    
    patient1 = data.frame(name,age,gender)
    
    patient1
    
    nameagegender
    철수22 m
    춘향20 f
    길동25 m
    patient2= data.frame(name,blood.type)
    
    patient2
    
    nameblood.type
    철수A
    춘향B
    길동O
    patients = merge(patient1,patient2, by="name")
    
    patients
    
    nameagegenderblood.type
    길동25 m O
    철수22 m A
    춘향20 f B
    x=array(1:12,c(3:4))
    
    x
    
    1 4 7 10
    2 5 8 11
    3 6 9 12
    is.data.frame(x)
    

    FALSE

    x = as.data.frame(x) #데이터 프레임으로 변경
    
    x
    
    V1V2V3V4
    1 4 7 10
    2 5 8 11
    3 6 9 12
    is.data.frame(x)
    

    TRUE

    names(x)=c("1st","2nd","3rd","4th") #열 이름 변경
    
    x
    
    1st2nd3rd4th
    1 4 7 10
    2 5 8 11
    3 6 9 12
    no.patients = data.frame(day=c(1:6),no=c(50,60,55,52,65,58))
    no.patients
    
    
    dayno
    1 50
    2 60
    3 55
    4 52
    5 65
    6 58
    listpatients=list(patients=patients,no.patients=no.patients)
    listpatients
    
    $patients
    nameagegenderblood.type
    길동25 m O
    철수22 m A
    춘향20 f B
    $no.patients
    dayno
    1 50
    2 60
    3 55
    4 52
    5 65
    6 58
    listpatients$no.patients
    
    dayno
    1 50
    2 60
    3 55
    4 52
    5 65
    6 58
    listpatients$patients
    
    nameagegenderblood.type
    길동25 m O
    철수22 m A
    춘향20 f B
    listpatients[[1]]
    
    nameagegenderblood.type
    길동25 m O
    철수22 m A
    춘향20 f B
    listpatients[1]
    

    $patients = <table>

    nameagegenderblood.type 길동25 m O 철수22 m A 춘향20 f B

    </table>

    listpatients[2]
    

    $no.patients = <table>

    dayno 1 50 2 60 3 55 4 52 5 65 6 58

    </table>

    listpatients[[2]]
    
    dayno
    1 50
    2 60
    3 55
    4 52
    5 65
    6 58
    listpatients[["no.patients"]]
    
    dayno
    1 50
    2 60
    3 55
    4 52
    5 65
    6 58
    listpatients["no.patients"]
    

    $no.patients = <table>

    dayno 1 50 2 60 3 55 4 52 5 65 6 58

    </table>

    listpatients[["patients"]]
    
    nameagegenderblood.type
    길동25 m O
    철수22 m A
    춘향20 f B
    listpatients["patients"]
    

    $patients = <table>

    nameagegenderblood.type 길동25 m O 철수22 m A 춘향20 f B

    </table>

    ##lapply 결과를 list 형태로 출력함
    lapply(listpatients$no.patients,mean)
    
    $day
    3.5
    $no
    56.6666666666667
    lapply(listpatients$patients, mean)
    
    Warning message in mean.default(X[[i]], ...):
    "argument is not numeric or logical: returning NA"Warning message in mean.default(X[[i]], ...):
    "argument is not numeric or logical: returning NA"Warning message in mean.default(X[[i]], ...):
    "argument is not numeric or logical: returning NA"
    
    $name
    <NA>
    $age
    22.3333333333333
    $gender
    <NA>
    $blood.type
    <NA>
    ##sapply 결과를 벡터 결과로 출력함
    
    x<-list(a=1:10)
    x
    
    lapply(x,mean)
    
    
    
    

    $a = <ol class=list-inline>

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • </ol>

    $a = 5.5

    z<-sapply(x, mean)
    z
    
    is.vector(z)
    
    is.vector(x)
    

    a: 5.5

    TRUE

    TRUE