Concat 함수로 데이터 프레임 병합하기
학습목표
- concat 함수를 활용하여 dataframe 병합시키기
import pandas as pd
import numpy as np
concat 함수 사용하여 dataframe 병합하기
- pandas.concat 함수
- 축을 따라 dataframe을 병합 가능
- 기본 axis = 0 -> 행단위 병합
- column명이 같은 경우
df1 = pd.DataFrame({'key1' : np.arange(10), 'value1' : np.random.randn(10)})
df2 = pd.DataFrame({'key1' : np.arange(10), 'value1' : np.random.randn(10)})
df2
| key1 | value1 | |
|---|---|---|
| 0 | 0 | -0.338849 |
| 1 | 1 | 0.688248 |
| 2 | 2 | 0.863890 |
| 3 | 3 | 0.431818 |
| 4 | 4 | -0.345499 |
| 5 | 5 | 0.626425 |
| 6 | 6 | 0.639522 |
| 7 | 7 | -0.677354 |
| 8 | 8 | -0.778642 |
| 9 | 9 | -0.600007 |
pd.concat([df1, df2], ignore_index=True)
| key1 | value1 | |
|---|---|---|
| 0 | 0 | 0.157695 |
| 1 | 1 | 0.815835 |
| 2 | 2 | 0.512740 |
| 3 | 3 | -0.575658 |
| 4 | 4 | -0.713351 |
| 5 | 5 | 1.701762 |
| 6 | 6 | 0.296171 |
| 7 | 7 | -0.018002 |
| 8 | 8 | -1.302774 |
| 9 | 9 | -2.626175 |
| 10 | 0 | -0.338849 |
| 11 | 1 | 0.688248 |
| 12 | 2 | 0.863890 |
| 13 | 3 | 0.431818 |
| 14 | 4 | -0.345499 |
| 15 | 5 | 0.626425 |
| 16 | 6 | 0.639522 |
| 17 | 7 | -0.677354 |
| 18 | 8 | -0.778642 |
| 19 | 9 | -0.600007 |
pd.concat([df1, df2], axis=0) #기본값
| key1 | value1 | |
|---|---|---|
| 0 | 0 | 0.157695 |
| 1 | 1 | 0.815835 |
| 2 | 2 | 0.512740 |
| 3 | 3 | -0.575658 |
| 4 | 4 | -0.713351 |
| 5 | 5 | 1.701762 |
| 6 | 6 | 0.296171 |
| 7 | 7 | -0.018002 |
| 8 | 8 | -1.302774 |
| 9 | 9 | -2.626175 |
| 0 | 0 | -0.338849 |
| 1 | 1 | 0.688248 |
| 2 | 2 | 0.863890 |
| 3 | 3 | 0.431818 |
| 4 | 4 | -0.345499 |
| 5 | 5 | 0.626425 |
| 6 | 6 | 0.639522 |
| 7 | 7 | -0.677354 |
| 8 | 8 | -0.778642 |
| 9 | 9 | -0.600007 |
pd.concat([df1, df2], axis=1)
| key1 | value1 | key1 | value1 | |
|---|---|---|---|---|
| 0 | 0 | 0.157695 | 0 | -0.338849 |
| 1 | 1 | 0.815835 | 1 | 0.688248 |
| 2 | 2 | 0.512740 | 2 | 0.863890 |
| 3 | 3 | -0.575658 | 3 | 0.431818 |
| 4 | 4 | -0.713351 | 4 | -0.345499 |
| 5 | 5 | 1.701762 | 5 | 0.626425 |
| 6 | 6 | 0.296171 | 6 | 0.639522 |
| 7 | 7 | -0.018002 | 7 | -0.677354 |
| 8 | 8 | -1.302774 | 8 | -0.778642 |
| 9 | 9 | -2.626175 | 9 | -0.600007 |
- column 명이 다른 경우
df3 = pd.DataFrame({'key2' : np.arange(10), 'value2' : np.random.randn(10)})
pd.concat([df1, df3], axis=1)
| key1 | value1 | key2 | value2 | |
|---|---|---|---|---|
| 0 | 0 | 0.157695 | 0 | 2.096654 |
| 1 | 1 | 0.815835 | 1 | 1.434691 |
| 2 | 2 | 0.512740 | 2 | -0.211020 |
| 3 | 3 | -0.575658 | 3 | 1.498715 |
| 4 | 4 | -0.713351 | 4 | -1.106296 |
| 5 | 5 | 1.701762 | 5 | -0.678457 |
| 6 | 6 | 0.296171 | 6 | -0.420552 |
| 7 | 7 | -0.018002 | 7 | -0.091809 |
| 8 | 8 | -1.302774 | 8 | 0.603147 |
| 9 | 9 | -2.626175 | 9 | -0.918178 |