Pandas_Reshaping_Data

import pandas as pd
import seaborn as sns
df = sns.load_dataset("mpg")
df.shape
(398, 9)
df.head()
mpg cylinders displacement horsepower weight acceleration model_year origin name
0 18.0 8 307.0 130.0 3504 12.0 70 usa chevrolet chevelle malibu
1 15.0 8 350.0 165.0 3693 11.5 70 usa buick skylark 320
2 18.0 8 318.0 150.0 3436 11.0 70 usa plymouth satellite
3 16.0 8 304.0 150.0 3433 12.0 70 usa amc rebel sst
4 17.0 8 302.0 140.0 3449 10.5 70 usa ford torino
df.sort_values('mpg',ascending=False)
mpg cylinders displacement horsepower weight acceleration model_year origin name
322 46.6 4 86.0 65.0 2110 17.9 80 japan mazda glc
329 44.6 4 91.0 67.0 1850 13.8 80 japan honda civic 1500 gl
325 44.3 4 90.0 48.0 2085 21.7 80 europe vw rabbit c (diesel)
394 44.0 4 97.0 52.0 2130 24.6 82 europe vw pickup
326 43.4 4 90.0 48.0 2335 23.7 80 europe vw dasher (diesel)
... ... ... ... ... ... ... ... ... ...
103 11.0 8 400.0 150.0 4997 14.0 73 usa chevrolet impala
67 11.0 8 429.0 208.0 4633 11.0 72 usa mercury marquis
25 10.0 8 360.0 215.0 4615 14.0 70 usa ford f250
26 10.0 8 307.0 200.0 4376 15.0 70 usa chevy c20
28 9.0 8 304.0 193.0 4732 18.5 70 usa hi 1200d

398 rows × 9 columns

df.sort_values?
df = df.rename(columns = {'model_year':'year'}) #칼럼 이름 변경하기( model_year 에서 year으로 이름 변경)
df.sort_index()
mpg cylinders displacement horsepower weight acceleration year origin name
0 18.0 8 307.0 130.0 3504 12.0 70 usa chevrolet chevelle malibu
1 15.0 8 350.0 165.0 3693 11.5 70 usa buick skylark 320
2 18.0 8 318.0 150.0 3436 11.0 70 usa plymouth satellite
3 16.0 8 304.0 150.0 3433 12.0 70 usa amc rebel sst
4 17.0 8 302.0 140.0 3449 10.5 70 usa ford torino
... ... ... ... ... ... ... ... ... ...
393 27.0 4 140.0 86.0 2790 15.6 82 usa ford mustang gl
394 44.0 4 97.0 52.0 2130 24.6 82 europe vw pickup
395 32.0 4 135.0 84.0 2295 11.6 82 usa dodge rampage
396 28.0 4 120.0 79.0 2625 18.6 82 usa ford ranger
397 31.0 4 119.0 82.0 2720 19.4 82 usa chevy s-10

398 rows × 9 columns

df.reset_index?
df = df.drop(columns=['mpg','year', 'name'])
df.head()
cylinders displacement horsepower weight acceleration origin
0 8 307.0 130.0 3504 12.0 usa
1 8 350.0 165.0 3693 11.5 usa
2 8 318.0 150.0 3436 11.0 usa
3 8 304.0 150.0 3433 12.0 usa
4 8 302.0 140.0 3449 10.5 usa
pd.melt?
Object `pd.melt` not found.
df = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
                   'B': {0: 1, 1: 3, 2: 5},
                   'C': {0: 2, 1: 4, 2: 6}})
df
A B C
0 a 1 2
1 b 3 4
2 c 5 6

melt 함수 참고 사이트

https://blog.naver.com/PostView.nhn?blogId=youji4ever&logNo=222048943354

pd.melt(df, id_vars=['A'], value_vars=['B'])
A variable value
0 a B 1
1 b B 3
2 c B 5
df = pd.melt(df, id_vars=['A'], value_vars=['B', 'C'])
df
A variable value
0 a B 1
1 b B 3
2 c B 5
3 a C 2
4 b C 4
5 c C 6
df.groupby("variable").sum()
value
variable
B 9
C 12
df
A variable value
0 a B 1
1 b B 3
2 c B 5
3 a C 2
4 b C 4
5 c C 6
df.pivot(columns='variable', values='value')
variable B C
0 1.0 NaN
1 3.0 NaN
2 5.0 NaN
3 NaN 2.0
4 NaN 4.0
5 NaN 6.0
df = pd.DataFrame({'foo': ['one','one','one','two','two','two'],
                       'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                       'baz': [1, 2, 3, 4, 5, 6]})
df
foo bar baz
0 one A 1
1 one B 2
2 one C 3
3 two A 4
4 two B 5
5 two C 6
df2 = df.pivot(index='foo', columns='bar', values='baz')
df.pivot(columns='bar')['baz']
bar A B C
0 1.0 NaN NaN
1 NaN 2.0 NaN
2 NaN NaN 3.0
3 4.0 NaN NaN
4 NaN 5.0 NaN
5 NaN NaN 6.0
df3 = df2.reset_index()
df3
bar foo A B C
0 one 1 2 3
1 two 4 5 6
df3.melt(id_vars=['foo'], value_vars=['A', 'B', 'C']).sort_values(by=['foo', 'bar'])
foo bar value
0 one A 1
2 one B 2
4 one C 3
1 two A 4
3 two B 5
5 two C 6