import pandas as pd
import seaborn as sns
df = sns.load_dataset("mpg")
df.shape
|
mpg |
cylinders |
displacement |
horsepower |
weight |
acceleration |
model_year |
origin |
name |
0 |
18.0 |
8 |
307.0 |
130.0 |
3504 |
12.0 |
70 |
usa |
chevrolet chevelle malibu |
1 |
15.0 |
8 |
350.0 |
165.0 |
3693 |
11.5 |
70 |
usa |
buick skylark 320 |
2 |
18.0 |
8 |
318.0 |
150.0 |
3436 |
11.0 |
70 |
usa |
plymouth satellite |
3 |
16.0 |
8 |
304.0 |
150.0 |
3433 |
12.0 |
70 |
usa |
amc rebel sst |
4 |
17.0 |
8 |
302.0 |
140.0 |
3449 |
10.5 |
70 |
usa |
ford torino |
df.sort_values('mpg',ascending=False)
|
mpg |
cylinders |
displacement |
horsepower |
weight |
acceleration |
model_year |
origin |
name |
322 |
46.6 |
4 |
86.0 |
65.0 |
2110 |
17.9 |
80 |
japan |
mazda glc |
329 |
44.6 |
4 |
91.0 |
67.0 |
1850 |
13.8 |
80 |
japan |
honda civic 1500 gl |
325 |
44.3 |
4 |
90.0 |
48.0 |
2085 |
21.7 |
80 |
europe |
vw rabbit c (diesel) |
394 |
44.0 |
4 |
97.0 |
52.0 |
2130 |
24.6 |
82 |
europe |
vw pickup |
326 |
43.4 |
4 |
90.0 |
48.0 |
2335 |
23.7 |
80 |
europe |
vw dasher (diesel) |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
103 |
11.0 |
8 |
400.0 |
150.0 |
4997 |
14.0 |
73 |
usa |
chevrolet impala |
67 |
11.0 |
8 |
429.0 |
208.0 |
4633 |
11.0 |
72 |
usa |
mercury marquis |
25 |
10.0 |
8 |
360.0 |
215.0 |
4615 |
14.0 |
70 |
usa |
ford f250 |
26 |
10.0 |
8 |
307.0 |
200.0 |
4376 |
15.0 |
70 |
usa |
chevy c20 |
28 |
9.0 |
8 |
304.0 |
193.0 |
4732 |
18.5 |
70 |
usa |
hi 1200d |
398 rows × 9 columns
df = df.rename(columns = {'model_year':'year'}) #칼럼 이름 변경하기( model_year 에서 year으로 이름 변경)
|
mpg |
cylinders |
displacement |
horsepower |
weight |
acceleration |
year |
origin |
name |
0 |
18.0 |
8 |
307.0 |
130.0 |
3504 |
12.0 |
70 |
usa |
chevrolet chevelle malibu |
1 |
15.0 |
8 |
350.0 |
165.0 |
3693 |
11.5 |
70 |
usa |
buick skylark 320 |
2 |
18.0 |
8 |
318.0 |
150.0 |
3436 |
11.0 |
70 |
usa |
plymouth satellite |
3 |
16.0 |
8 |
304.0 |
150.0 |
3433 |
12.0 |
70 |
usa |
amc rebel sst |
4 |
17.0 |
8 |
302.0 |
140.0 |
3449 |
10.5 |
70 |
usa |
ford torino |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
393 |
27.0 |
4 |
140.0 |
86.0 |
2790 |
15.6 |
82 |
usa |
ford mustang gl |
394 |
44.0 |
4 |
97.0 |
52.0 |
2130 |
24.6 |
82 |
europe |
vw pickup |
395 |
32.0 |
4 |
135.0 |
84.0 |
2295 |
11.6 |
82 |
usa |
dodge rampage |
396 |
28.0 |
4 |
120.0 |
79.0 |
2625 |
18.6 |
82 |
usa |
ford ranger |
397 |
31.0 |
4 |
119.0 |
82.0 |
2720 |
19.4 |
82 |
usa |
chevy s-10 |
398 rows × 9 columns
df = df.drop(columns=['mpg','year', 'name'])
df.head()
|
cylinders |
displacement |
horsepower |
weight |
acceleration |
origin |
0 |
8 |
307.0 |
130.0 |
3504 |
12.0 |
usa |
1 |
8 |
350.0 |
165.0 |
3693 |
11.5 |
usa |
2 |
8 |
318.0 |
150.0 |
3436 |
11.0 |
usa |
3 |
8 |
304.0 |
150.0 |
3433 |
12.0 |
usa |
4 |
8 |
302.0 |
140.0 |
3449 |
10.5 |
usa |
Object `pd.melt` not found.
df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
'B': {0: 1, 1: 3, 2: 5},
'C': {0: 2, 1: 4, 2: 6}})
df
|
A |
B |
C |
0 |
a |
1 |
2 |
1 |
b |
3 |
4 |
2 |
c |
5 |
6 |
melt 함수 참고 사이트
https://blog.naver.com/PostView.nhn?blogId=youji4ever&logNo=222048943354
pd.melt(df, id_vars=['A'], value_vars=['B'])
|
A |
variable |
value |
0 |
a |
B |
1 |
1 |
b |
B |
3 |
2 |
c |
B |
5 |
df = pd.melt(df, id_vars=['A'], value_vars=['B', 'C'])
df
|
A |
variable |
value |
0 |
a |
B |
1 |
1 |
b |
B |
3 |
2 |
c |
B |
5 |
3 |
a |
C |
2 |
4 |
b |
C |
4 |
5 |
c |
C |
6 |
df.groupby("variable").sum()
|
A |
variable |
value |
0 |
a |
B |
1 |
1 |
b |
B |
3 |
2 |
c |
B |
5 |
3 |
a |
C |
2 |
4 |
b |
C |
4 |
5 |
c |
C |
6 |
df.pivot(columns='variable', values='value')
variable |
B |
C |
0 |
1.0 |
NaN |
1 |
3.0 |
NaN |
2 |
5.0 |
NaN |
3 |
NaN |
2.0 |
4 |
NaN |
4.0 |
5 |
NaN |
6.0 |
df = pd.DataFrame({'foo': ['one','one','one','two','two','two'],
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
'baz': [1, 2, 3, 4, 5, 6]})
df
|
foo |
bar |
baz |
0 |
one |
A |
1 |
1 |
one |
B |
2 |
2 |
one |
C |
3 |
3 |
two |
A |
4 |
4 |
two |
B |
5 |
5 |
two |
C |
6 |
df2 = df.pivot(index='foo', columns='bar', values='baz')
df.pivot(columns='bar')['baz']
bar |
A |
B |
C |
0 |
1.0 |
NaN |
NaN |
1 |
NaN |
2.0 |
NaN |
2 |
NaN |
NaN |
3.0 |
3 |
4.0 |
NaN |
NaN |
4 |
NaN |
5.0 |
NaN |
5 |
NaN |
NaN |
6.0 |
df3 = df2.reset_index()
df3
bar |
foo |
A |
B |
C |
0 |
one |
1 |
2 |
3 |
1 |
two |
4 |
5 |
6 |
df3.melt(id_vars=['foo'], value_vars=['A', 'B', 'C']).sort_values(by=['foo', 'bar'])
|
foo |
bar |
value |
0 |
one |
A |
1 |
2 |
one |
B |
2 |
4 |
one |
C |
3 |
1 |
two |
A |
4 |
3 |
two |
B |
5 |
5 |
two |
C |
6 |