Concat 함수로 데이터 프레임 병합하기
학습목표
- concat 함수를 활용하여 dataframe 병합시키기
import pandas as pd
import numpy as np
concat 함수 사용하여 dataframe 병합하기
- pandas.concat 함수
- 축을 따라 dataframe을 병합 가능
- 기본 axis = 0 -> 행단위 병합
- column명이 같은 경우
df1 = pd.DataFrame({'key1' : np.arange(10), 'value1' : np.random.randn(10)})
df2 = pd.DataFrame({'key1' : np.arange(10), 'value1' : np.random.randn(10)})
df2
key1 | value1 | |
---|---|---|
0 | 0 | -0.338849 |
1 | 1 | 0.688248 |
2 | 2 | 0.863890 |
3 | 3 | 0.431818 |
4 | 4 | -0.345499 |
5 | 5 | 0.626425 |
6 | 6 | 0.639522 |
7 | 7 | -0.677354 |
8 | 8 | -0.778642 |
9 | 9 | -0.600007 |
pd.concat([df1, df2], ignore_index=True)
key1 | value1 | |
---|---|---|
0 | 0 | 0.157695 |
1 | 1 | 0.815835 |
2 | 2 | 0.512740 |
3 | 3 | -0.575658 |
4 | 4 | -0.713351 |
5 | 5 | 1.701762 |
6 | 6 | 0.296171 |
7 | 7 | -0.018002 |
8 | 8 | -1.302774 |
9 | 9 | -2.626175 |
10 | 0 | -0.338849 |
11 | 1 | 0.688248 |
12 | 2 | 0.863890 |
13 | 3 | 0.431818 |
14 | 4 | -0.345499 |
15 | 5 | 0.626425 |
16 | 6 | 0.639522 |
17 | 7 | -0.677354 |
18 | 8 | -0.778642 |
19 | 9 | -0.600007 |
pd.concat([df1, df2], axis=0) #기본값
key1 | value1 | |
---|---|---|
0 | 0 | 0.157695 |
1 | 1 | 0.815835 |
2 | 2 | 0.512740 |
3 | 3 | -0.575658 |
4 | 4 | -0.713351 |
5 | 5 | 1.701762 |
6 | 6 | 0.296171 |
7 | 7 | -0.018002 |
8 | 8 | -1.302774 |
9 | 9 | -2.626175 |
0 | 0 | -0.338849 |
1 | 1 | 0.688248 |
2 | 2 | 0.863890 |
3 | 3 | 0.431818 |
4 | 4 | -0.345499 |
5 | 5 | 0.626425 |
6 | 6 | 0.639522 |
7 | 7 | -0.677354 |
8 | 8 | -0.778642 |
9 | 9 | -0.600007 |
pd.concat([df1, df2], axis=1)
key1 | value1 | key1 | value1 | |
---|---|---|---|---|
0 | 0 | 0.157695 | 0 | -0.338849 |
1 | 1 | 0.815835 | 1 | 0.688248 |
2 | 2 | 0.512740 | 2 | 0.863890 |
3 | 3 | -0.575658 | 3 | 0.431818 |
4 | 4 | -0.713351 | 4 | -0.345499 |
5 | 5 | 1.701762 | 5 | 0.626425 |
6 | 6 | 0.296171 | 6 | 0.639522 |
7 | 7 | -0.018002 | 7 | -0.677354 |
8 | 8 | -1.302774 | 8 | -0.778642 |
9 | 9 | -2.626175 | 9 | -0.600007 |
- column 명이 다른 경우
df3 = pd.DataFrame({'key2' : np.arange(10), 'value2' : np.random.randn(10)})
pd.concat([df1, df3], axis=1)
key1 | value1 | key2 | value2 | |
---|---|---|---|---|
0 | 0 | 0.157695 | 0 | 2.096654 |
1 | 1 | 0.815835 | 1 | 1.434691 |
2 | 2 | 0.512740 | 2 | -0.211020 |
3 | 3 | -0.575658 | 3 | 1.498715 |
4 | 4 | -0.713351 | 4 | -1.106296 |
5 | 5 | 1.701762 | 5 | -0.678457 |
6 | 6 | 0.296171 | 6 | -0.420552 |
7 | 7 | -0.018002 | 7 | -0.091809 |
8 | 8 | -1.302774 | 8 | 0.603147 |
9 | 9 | -2.626175 | 9 | -0.918178 |