Summarize_Data

20211005

import pandas as pd
import seaborn as sns
import numpy as np
df = sns.load_dataset('iris')
df.shape
(150, 5)
df.head(2)
sepal_length sepal_width petal_length petal_width species
0 5.1 3.5 1.4 0.2 setosa
1 4.9 3.0 1.4 0.2 setosa
df['species'].value_counts()
setosa        50
versicolor    50
virginica     50
Name: species, dtype: int64
df["sepal_length"].value_counts()

5.0    10
5.1     9
6.3     9
5.7     8
6.7     8
5.8     7
5.5     7
6.4     7
4.9     6
5.4     6
6.1     6
6.0     6
5.6     6
4.8     5
6.5     5
6.2     4
7.7     4
6.9     4
4.6     4
5.2     4
5.9     3
4.4     3
7.2     3
6.8     3
6.6     2
4.7     2
7.6     1
7.4     1
7.3     1
7.0     1
7.1     1
5.3     1
4.3     1
4.5     1
7.9     1
Name: sepal_length, dtype: int64
len(df)
150
df.shape #150행, 5열
(150, 5)
df['species'].nunique()
3
df.describe(include='all')
sepal_length sepal_width petal_length petal_width species
count 150.000000 150.000000 150.000000 150.000000 150
unique NaN NaN NaN NaN 3
top NaN NaN NaN NaN setosa
freq NaN NaN NaN NaN 50
mean 5.843333 3.057333 3.758000 1.199333 NaN
std 0.828066 0.435866 1.765298 0.762238 NaN
min 4.300000 2.000000 1.000000 0.100000 NaN
25% 5.100000 2.800000 1.600000 0.300000 NaN
50% 5.800000 3.000000 4.350000 1.300000 NaN
75% 6.400000 3.300000 5.100000 1.800000 NaN
max 7.900000 4.400000 6.900000 2.500000 NaN
df['petal_width'].sum()
179.90000000000003
df['petal_width'].count() #행의 수
150
df.median()
<ipython-input-12-6d467abf240d>:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  df.median()





sepal_length    5.80
sepal_width     3.00
petal_length    4.35
petal_width     1.30
dtype: float64
df.mean()
<ipython-input-13-c61f0c8f89b5>:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  df.mean()





sepal_length    5.843333
sepal_width     3.057333
petal_length    3.758000
petal_width     1.199333
dtype: float64
df['petal_width'].quantile([0.25,0.75])
0.25    0.3
0.75    1.8
Name: petal_width, dtype: float64
df.quantile([0.25,0.75])

sepal_length sepal_width petal_length petal_width
0.25 5.1 2.8 1.6 0.3
0.75 6.4 3.3 5.1 1.8
df.min()
sepal_length       4.3
sepal_width        2.0
petal_length       1.0
petal_width        0.1
species         setosa
dtype: object
df.max()
sepal_length          7.9
sepal_width           4.4
petal_length          6.9
petal_width           2.5
species         virginica
dtype: object
df.var()
<ipython-input-18-28ded241fd7c>:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  df.var()





sepal_length    0.685694
sepal_width     0.189979
petal_length    3.116278
petal_width     0.581006
dtype: float64
df.std()
<ipython-input-19-ce97bb7eaef8>:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError.  Select only valid columns before calling the reduction.
  df.std()





sepal_length    0.828066
sepal_width     0.435866
petal_length    1.765298
petal_width     0.762238
dtype: float64

apply(함수)

def smp(x):
    #뒤에서 3번째 까지의 문자를 가져오는 함수
    x = x[-3:]
    return x
df['species_3'] = df['species'].apply(lambda x : x[:3])
df['species_3'] = df['species'].apply(smp)
df
sepal_length sepal_width petal_length petal_width species species_3
0 5.1 3.5 1.4 0.2 setosa osa
1 4.9 3.0 1.4 0.2 setosa osa
2 4.7 3.2 1.3 0.2 setosa osa
3 4.6 3.1 1.5 0.2 setosa osa
4 5.0 3.6 1.4 0.2 setosa osa
... ... ... ... ... ... ...
145 6.7 3.0 5.2 2.3 virginica ica
146 6.3 2.5 5.0 1.9 virginica ica
147 6.5 3.0 5.2 2.0 virginica ica
148 6.2 3.4 5.4 2.3 virginica ica
149 5.9 3.0 5.1 1.8 virginica ica

150 rows × 6 columns