🌖

6일차 코로나 데이터 분석

 
notion imagenotion image

목차

 
# 그래프에 한글 출력하기 위한 코드 from matplotlib import font_manager, rc font_name = font_manager.FontProperties( fname = 'c:/Windows/Fonts/malgun.ttf').get_name() rc('font', family=font_name) # import matplotlib.pyplot as plt # plt.rc('font', family='AppleGothic') # mac # plt.rc('font', family='Malgun Gothic') # window

1. 성별, 나이별

import numpy as np import pandas as pd # 출처 : 질병관리본부 confirmed = pd.read_csv('./data/daily_Confirmed.csv', index_col = ['Date'])
#pd.read_csv('./data.csv', encoding='utf-8') # Default(기본) #pd.read_csv('./data.csv', encoding='cp949') # 11172자 #pd.read_csv('./data.csv', encoding='euc-kr') # 2350자
confirmed.info() # confirmed의 정보 확인
<class 'pandas.core.frame.DataFrame'> Index: 32 entries, 2020-03-01 to 2020-04-01 Data columns (total 11 columns): Female 32 non-null int64 Male 32 non-null int64 0-9 32 non-null int64 10- 32 non-null int64 20-29 32 non-null int64 30-39 32 non-null int64 40-49 32 non-null int64 50-59 32 non-null int64 60-69 32 non-null int64 70-79 32 non-null int64 80- 32 non-null int64 dtypes: int64(11) memory usage: 3.0+ KB
confirmed.head() # defult 값은 5
Out[-] Female Male 0-9 10- 20-29 30-39 40-49 50-59 60-69 70-79 80- Date 2020-03-01 2197 1329 27 137 1054 426 521 687 453 158 63 2020-03-02 2621 1591 32 169 1235 506 633 834 530 192 81 2020-03-03 3002 1810 34 204 1417 578 713 952 597 224 93 2020-03-04 3332 1996 34 233 1575 631 790 1051 646 260 108 2020-03-05 3617 2149 38 257 1727 6 59 847 1127 699 288 124
confirmed.tail()
Out[-] Female Male 0-9 10- 20-29 30-39 40-49 50-59 60-69 70-79 80- Date 2020-03-28 5742 3736 109 501 2567 978 1278 1780 1201 632 432 2020-03-29 5784 3799 111 508 2602 991 1292 1798 1210 635 434 2020-03-30 5827 3836 112 513 2630 1002 1297 1812 1218 640 437 2020-03-31 5881 3905 112 515 2656 1012 1312 1851 1235 651 442 2020-04-01 5941 3946 116 519 2682 1027 1323 1865 1245 658 452
confirmed.isnull().sum() # 결측치 개수 구하기
Out[-] Female 0 Male 0 0-9 0 10- 0 20-29 0 30-39 0 40-49 0 50-59 0 60-69 0 70-79 0 80- 0 dtype: int64
# 혹시 결측치가 있다면! # pd.to_numeric(df['컬럼이름'], errors='coerce') -> nan
confirmed['Female'].describe()
Out[-] count 32.000000 mean 4882.531250 std 999.970515 min 2197.000000 25% 4547.250000 50% 5146.500000 75% 5601.000000 max 5986.000000 Name: Female, dtype: float64

시각화

  • plt.pie
    • rcParams : 차트 크기, 선의 색, 두께 등 설정
    • explode : Pie 차트 조각 추출되는 크기
    • autopct : Pie 차트 조각의 전체 대비 백분율
# 마지막 날인 2020년 4월 1일 데이터 # 성별 데이터 가져오기 sex = confirmed.iloc[-1, :2] sex
Out[-] Female 5941 Male 3946 Name: 2020-04-01, dtype: int64
# 성별에 따른 확진자 수 pie 그래프 import matplotlib.pyplot as plt plt.rcParams['figure.figsize'] = 12,8 group_explodes = (0.05, 0) plt.pie(sex, labels=['female', 'male'], explode = group_explodes, shadow = True, startangle=90, colors = ['lightcoral', 'lightskyblue'], autopct='%1.2f%%') plt.title('Confirmed(sex)', size=15) plt.axis('equal') plt.show()
Out[-]
notion imagenotion image
# 2020년 3월 1일 ~ 4월 1일 시계열 데이터 sex = confirmed.iloc[:, :2] sex
Out[-] Female Male Date 2020-03-01 2197 1329 2020-03-02 2621 1591 2020-03-03 3002 1810 2020-03-04 3332 1996 2020-03-05 3617 2149 2020-03-06 3939 2345 2020-03-07 4245 2522 2020-03-08 4440 2694 2020-03-09 4583 2799 2020-03-10 4661 2852 2020-03-11 4808 2947 2020-03-12 4875 2994 2020-03-13 4936 3043 2020-03-14 5986 3100 2020-03-15 5026 3136 2020-03-16 5067 3169 2020-03-17 5120 3200 2020-03-18 5173 3240 2020-03-19 5269 3296 2020-03-20 5322 3330 2020-03-21 5412 3387 2020-03-22 5467 3430 2020-03-23 5504 3457 2020-03-24 5540 3497 2020-03-25 5587 3550 2020-03-26 5643 3598 2020-03-27 5694 3638 2020-03-28 5742 3736 2020-03-29 5784 3799 2020-03-30 5827 3836 2020-03-31 5881 3905 2020-04-01 5941 3946
plt.xticks(rotation=70) # x 축 방향 기울이기(기울이지 않으면 겹침) plt.bar(sex.index, sex.loc[:,'Female'], color='lightcoral', label='Female') plt.bar(sex.index, sex.loc[:,'Male'], color='lightskyblue', label='Male') plt.show() # 3월 14일의 데이터의 확인이 필요
Out[-]
notion imagenotion image
confirmed['Female']['2020-03-14'] = 5000
# 2020년 3월 1일 ~ 4월 1일 시계열 데이터(수정) sex = confirmed.iloc[:, :2] sex
Out[-] Female Male Date 2020-03-01 2197 1329 2020-03-02 2621 1591 2020-03-03 3002 1810 2020-03-04 3332 1996 2020-03-05 3617 2149 2020-03-06 3939 2345 2020-03-07 4245 2522 2020-03-08 4440 2694 2020-03-09 4583 2799 2020-03-10 4661 2852 2020-03-11 4808 2947 2020-03-12 4875 2994 2020-03-13 4936 3043 2020-03-14 5000 3100 # 변경 후 (5986 -> 5000) 2020-03-15 5026 3136 2020-03-16 5067 3169 2020-03-17 5120 3200 2020-03-18 5173 3240 2020-03-19 5269 3296 2020-03-20 5322 3330 2020-03-21 5412 3387 2020-03-22 5467 3430 2020-03-23 5504 3457 2020-03-24 5540 3497 2020-03-25 5587 3550 2020-03-26 5643 3598 2020-03-27 5694 3638 2020-03-28 5742 3736 2020-03-29 5784 3799 2020-03-30 5827 3836 2020-03-31 5881 3905 2020-04-01 5941 3946
plt.xticks(rotation=70) plt.bar(sex.index, sex.loc[:,'Female'], color='lightcoral', label='Female') plt.bar(sex.index, sex.loc[:,'Male'], color='lightskyblue', label='Male', bottom = sex.loc[:,'Female'], alpha = 0.5) plt.xlabel('Date') plt.ylabel('Number') plt.title('Confirmed(sex)', size=15) plt.legend() plt.show()
Out[-]
notion imagenotion image

Plotly

sex = confirmed.iloc[-1, :2] print(sex) print(type(sex))
Out[-] Female 5941 Male 3946 Name: 2020-04-01, dtype: int64 <class 'pandas.core.series.Series'>
import plotly.graph_objects as go labels = sex.index values = sex.values fig = go.Figure(data=[go.Pie(labels=labels, values=values)]) fig.show()
Out[-]
notion imagenotion image
# 공식홈페이지 튜토리얼, 보통은 df으로 그립니다. import plotly.express as px # tips데이터는 244 x 7 로 이루어져 있고 day는 4개의 고유값이 있다. df = px.data.tips() fig = px.pie(df, values='tip', names='day') fig.show()
Out[-]
notion imagenotion image
sex = confirmed.iloc[:, :2] sex
Out[-] Female Male Date 2020-03-01 2197 1329 2020-03-02 2621 1591 2020-03-03 3002 1810 2020-03-04 3332 1996 2020-03-05 3617 2149 2020-03-06 3939 2345 2020-03-07 4245 2522 2020-03-08 4440 2694 2020-03-09 4583 2799 2020-03-10 4661 2852 2020-03-11 4808 2947 2020-03-12 4875 2994 2020-03-13 4936 3043 2020-03-14 5000 3100 2020-03-15 5026 3136 2020-03-16 5067 3169 2020-03-17 5120 3200 2020-03-18 5173 3240 2020-03-19 5269 3296 2020-03-20 5322 3330 2020-03-21 5412 3387 2020-03-22 5467 3430 2020-03-23 5504 3457 2020-03-24 5540 3497 2020-03-25 5587 3550 2020-03-26 5643 3598 2020-03-27 5694 3638 2020-03-28 5742 3736 2020-03-29 5784 3799 2020-03-30 5827 3836 2020-03-31 5881 3905 2020-04-01 5941 3946
female = go.Bar(x=sex.index, y=sex.iloc[:,0], name='Female') male = go.Bar(x=sex.index, y=sex.iloc[:,1], name='Male') data = female, male # data layout = go.Layout(title='Confirmed(sex)', barmode='stack') fig = go.Figure(data=data, layout=layout) fig.show()
Out[-]
notion imagenotion image
age = confirmed.iloc[-1, 2:] age
Out[-] 0-9 116 10- 519 20-29 2682 30-39 1027 40-49 1323 50-59 1865 60-69 1245 70-79 658 80- 452 Name: 2020-04-01, dtype: int64
# 4월 1일 나이별 확진자 # bar 그래프 data = [go.Bar(x=age.index, y=age.values)] layout = go.Layout(title='Confirmed(age)') fig = go.Figure(data=data, layout=layout) fig.show()
Out[-]
notion imagenotion image
# 4월 1일 나이별 확진자 # bar 그래프 data = [go.Pie(labels=age.index, values=age.values)] layout = go.Layout(title='Confirmed(age)') fig = go.Figure(data=data, layout=layout) fig.show()
Out[-]
notion imagenotion image
# 치명률 # 4월 1일 기준 # 치명률 = 사망자수/확진자수 * 100
death = pd.read_csv('./data/daily_Deceased.csv', index_col = ['Date']) # 사망자 death.tail()
Out[-] Female Male 0-9 10- 20-29 30-39 40-49 50-59 60-69 70-79 80- Date 2020-03-28 67 77 0 0 0 1 1 10 21 41 70 2020-03-29 74 78 0 0 0 1 1 10 21 43 76 2020-03-30 78 80 0 0 0 1 1 10 21 45 80 2020-03-31 80 82 0 0 0 1 1 10 22 46 82 2020-04-01 81 84 0 0 0 1 1 10 23 46 84
confirmed_temp = confirmed.iloc[-1, :] death_temp = death.iloc[-1, :]
confirmed_temp
Out[-] Female 5941 Male 3946 0-9 116 10- 519 20-29 2682 30-39 1027 40-49 1323 50-59 1865 60-69 1245 70-79 658 80- 452 Name: 2020-04-01, dtype: int64
death_temp
Out[-] Female 81 Male 84 0-9 0 10- 0 20-29 0 30-39 1 40-49 1 50-59 10 60-69 23 70-79 46 80- 84 Name: 2020-04-01, dtype: int64
death_rate = round((death_temp/confirmed_temp)*100, 2) # round 소수점 2번째자리까지 반올림 death_rate
Out[-] Female 1.36 Male 2.13 0-9 0.00 10- 0.00 20-29 0.00 30-39 0.10 40-49 0.08 50-59 0.54 60-69 1.85 70-79 6.99 80- 18.58 Name: 2020-04-01, dtype: float64
sex = confirmed_temp[:2] sex
Out[-] Female 5941 Male 3946 Name: 2020-04-01, dtype: int64
sex_death = death_temp[:2] sex_death
Out[-] Female 81 Male 84 Name: 2020-04-01, dtype: int64
from plotly.subplots import make_subplots fig = make_subplots(rows=1,cols=2,specs=[[{'type':'domain'}, {'type':'domain'}]]) labels = sex.index values = sex.values labels2 = sex_death.index values2 = sex_death.values fig.add_trace(go.Pie(labels=labels, values=values), row=1, col=1) fig.add_trace(go.Pie(labels=labels2, values=values2), row=1, col=2) fig.update_layout(title='confiremd - death_rate') fig.show()
Out[-]
notion imagenotion image
age_confirmed = confirmed.iloc[-1, 2:] age_confirmed
Out[-] 0-9 116 10- 519 20-29 2682 30-39 1027 40-49 1323 50-59 1865 60-69 1245 70-79 658 80- 452 Name: 2020-04-01, dtype: int64
age_death = death_rate.iloc[2:] age_death
Out[-] 0-9 0.00 10- 0.00 20-29 0.00 30-39 0.10 40-49 0.08 50-59 0.54 60-69 1.85 70-79 6.99 80- 18.58 Name: 2020-04-01, dtype: float64
fig = make_subplots(rows=1,cols=2,specs=[[{'type':'bar'}, {'type':'bar'}]]) labels = age_confirmed.index values = age_confirmed.values labels2 = age_death.index values2 = age_death.values fig.add_trace(go.Bar(x=labels, y=values, name='confirmed_age'), row=1, col=1) fig.add_trace(go.Bar(x=labels2, y=values2, name='death_rate'), row=1, col=2) fig.update_layout(title='confiremd - death_rate(age)') fig.show()
Out[-]
notion imagenotion image

2. 코로나 지역별 분석

# 데이터 불러오기(출처 : 질병관리본부) import pandas as pd from matplotlib import pyplot as plt region = pd.read_csv("./data/region.csv", index_col=['day'])
region.head()
Out[-] 서울 부산 대구 인천 광주 대전 울산 세종 경기 강원 충북 충남 전북 전남 경북 경남 제주 검역 총확진자수 day 2020-03-01 82 81 2569 6 9 13 17 1 84 7 11 60 5 3 514 62 2 0 3526 2020-03-02 91 88 3081 7 9 14 20 1 92 19 11 78 6 5 624 64 2 0 4212 2020-03-03 98 90 3601 7 11 14 20 1 94 20 11 81 7 5 685 64 3 0 4812 2020-03-04 99 93 4006 9 13 15 23 1 101 21 11 82 7 5 774 65 3 0 5328 2020-03-05 103 92 4326 9 14 16 23 1 110 23 12 86 7 5 861 74 4 0 5766
region.tail()
Out[-] 서울 부산 대구 인천 광주 대전 울산 세종 경기 강원 충북 충남 전북 전남 경북 경남 제주 검역 총확진자수 day 2020-03-28 390 114 6587 51 20 31 39 44 433 32 41 126 10 8 1285 91 8 168 9478 2020-03-29 410 117 6610 58 20 34 39 46 448 34 41 127 12 9 1287 94 8 189 9583 2020-03-30 426 118 6624 58 20 34 39 46 463 36 44 127 13 9 1298 95 9 202 9661 2020-03-31 450 119 6684 64 20 36 39 46 476 36 44 128 13 9 1300 96 9 217 9786 2020-04-01 474 122 6704 69 24 36 39 46 499 38 44 131 14 12 1302 100 9 224 9887
region.info()
Out[-] <class 'pandas.core.frame.DataFrame'> Index: 32 entries, 2020-03-01 to 2020-04-01 Data columns (total 19 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 서울 32 non-null int64 1 부산 32 non-null int64 2 대구 32 non-null int64 3 인천 32 non-null int64 4 광주 32 non-null int64 5 대전 32 non-null int64 6 울산 32 non-null int64 7 세종 32 non-null int64 8 경기 32 non-null int64 9 강원 32 non-null int64 10 충북 32 non-null int64 11 충남 32 non-null int64 12 전북 32 non-null int64 13 전남 32 non-null int64 14 경북 32 non-null int64 15 경남 32 non-null int64 16 제주 32 non-null int64 17 검역 32 non-null int64 18 총확진자수 32 non-null int64 dtypes: int64(19) memory usage: 5.0+ KB
region.isnull().sum()
Out[-] 서울 0 부산 0 대구 0 인천 0 광주 0 대전 0 울산 0 세종 0 경기 0 강원 0 충북 0 충남 0 전북 0 전남 0 경북 0 경남 0 제주 0 검역 0 총확진자수 0 dtype: int64
pd.options.display.max_rows = 1000 pd.options.display.max_columns = 100
region.tail()
Out[-] 서울 부산 대구 인천 광주 대전 울산 세종 경기 강원 충북 충남 전북 전남 경북 경남 제주 검역 총확진자수 day 2020-03-28 390 114 6587 51 20 31 39 44 433 32 41 126 10 8 1285 91 8 168 9478 2020-03-29 410 117 6610 58 20 34 39 46 448 34 41 127 12 9 1287 94 8 189 9583 2020-03-30 426 118 6624 58 20 34 39 46 463 36 44 127 13 9 1298 95 9 202 9661 2020-03-31 450 119 6684 64 20 36 39 46 476 36 44 128 13 9 1300 96 9 217 9786 2020-04-01 474 122 6704 69 24 36 39 46 499 38 44 131 14 12 1302 100 9 224 9887
last = region.iloc[-1,:] last
Out[-] 서울 474 부산 122 대구 6704 인천 69 광주 24 대전 36 울산 39 세종 46 경기 499 강원 38 충북 44 충남 131 전북 14 전남 12 경북 1302 경남 100 제주 9 검역 224 총확진자수 9887 Name: 2020-04-01, dtype: int64
last = region.iloc[-1,:-2] last
Out[-] 서울 474 부산 122 대구 6704 인천 69 광주 24 대전 36 울산 39 세종 46 경기 499 강원 38 충북 44 충남 131 전북 14 전남 12 경북 1302 경남 100 제주 9 Name: 2020-04-01, dtype: int64
location_data = go.Bar(x=last.index, y=last.values) layout = go.Layout(title='전국 확진자수', xaxis_title='지역', yaxis_title='확진자수') fig = go.Figure(data=location_data, layout=layout) fig.show()
Out[-]
notion imagenotion image
last = last.sort_values(ascending=True) # ascending 오름차순 # 낮은 것부터 차례로 배열 # descending 내림차순 # 높은 것부터 차례로 배열 location_data = go.Bar(x=last.index, y=last.values) layout = go.Layout(title='전국 확진자수', xaxis_title='지역', yaxis_title='확진자수') fig = go.Figure(data=location_data, layout=layout) fig.show()
Out[-]
notion imagenotion image
region.index
Out[-] Index(['2020-03-01', '2020-03-02', '2020-03-03', '2020-03-04', '2020-03-05', '2020-03-06', '2020-03-07', '2020-03-08', '2020-03-09', '2020-03-10', '2020-03-11', '2020-03-12', '2020-03-13', '2020-03-14', '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18', '2020-03-19', '2020-03-20', '2020-03-21', '2020-03-22', '2020-03-23', '2020-03-24', '2020-03-25', '2020-03-26', '2020-03-27', '2020-03-28', '2020-03-29', '2020-03-30', '2020-03-31', '2020-04-01'], dtype='object', name='day')
region.iloc[:,2]
Out[-] day 2020-03-01 2569 2020-03-02 3081 2020-03-03 3601 2020-03-04 4006 2020-03-05 4326 2020-03-06 4693 2020-03-07 5084 2020-03-08 5381 2020-03-09 5571 2020-03-10 5663 2020-03-11 5794 2020-03-12 5867 2020-03-13 5928 2020-03-14 5990 2020-03-15 6031 2020-03-16 6066 2020-03-17 6098 2020-03-18 6144 2020-03-19 6241 2020-03-20 6275 2020-03-21 6344 2020-03-22 6387 2020-03-23 6411 2020-03-24 6442 2020-03-25 6456 2020-03-26 6482 2020-03-27 6516 2020-03-28 6587 2020-03-29 6610 2020-03-30 6624 2020-03-31 6684 2020-04-01 6704 Name: 대구, dtype: int64
# 전체 지역 그래프 그리기 import plotly.graph_objects as go fig = go.Figure() for i in range(len(last)) : fig.add_trace(go.Scatter(x=region.index, y=region.iloc[:,i], mode='lines+markers', name = region.columns[i])) fig.update_layout(title='코나확진자수', xaxis_title='Day', yaxis_title='확진자수') fig.show()
Out[-]
notion imagenotion image
last = region.iloc[-1, :18] last
Out[-] 서울 474 부산 122 대구 6704 인천 69 광주 24 대전 36 울산 39 세종 46 경기 499 강원 38 충북 44 충남 131 전북 14 전남 12 경북 1302 경남 100 제주 9 검역 224 Name: 2020-04-01, dtype: int64
last.index
Out[-] Index(['서울', '부산', '대구', '인천', '광주', '대전', '울산', '세종', '경기', '강원', '충북', '충남', '전북', '전남', '경북', '경남', '제주', '검역'], dtype='object')
last.values
Out[-] array([ 474, 122, 6704, 69, 24, 36, 39, 46, 499, 38, 44, 131, 14, 12, 1302, 100, 9, 224], dtype=int64)
labels = last.index values = last.values fig = go.Figure(data=[go.Pie(labels=labels, values=values)]) fig.update_layout(title="지역별 확진자 비율") fig.show()
Out[-]
notion imagenotion image
region.iloc[:, 16]
Out[-] day 2020-03-01 2 2020-03-02 2 2020-03-03 3 2020-03-04 3 2020-03-05 4 2020-03-06 4 2020-03-07 4 2020-03-08 4 2020-03-09 4 2020-03-10 4 2020-03-11 4 2020-03-12 4 2020-03-13 4 2020-03-14 4 2020-03-15 4 2020-03-16 4 2020-03-17 4 2020-03-18 4 2020-03-19 4 2020-03-20 4 2020-03-21 4 2020-03-22 4 2020-03-23 4 2020-03-24 4 2020-03-25 6 2020-03-26 6 2020-03-27 7 2020-03-28 8 2020-03-29 8 2020-03-30 9 2020-03-31 9 2020-04-01 9 Name: 제주, dtype: int64
# region.index index = region.columns list(index).index('제주')
Out[-] 16
jeju = region.iloc[:, 16] jeju
Out[-] day 2020-03-01 2 2020-03-02 2 2020-03-03 3 2020-03-04 3 2020-03-05 4 2020-03-06 4 2020-03-07 4 2020-03-08 4 2020-03-09 4 2020-03-10 4 2020-03-11 4 2020-03-12 4 2020-03-13 4 2020-03-14 4 2020-03-15 4 2020-03-16 4 2020-03-17 4 2020-03-18 4 2020-03-19 4 2020-03-20 4 2020-03-21 4 2020-03-22 4 2020-03-23 4 2020-03-24 4 2020-03-25 6 2020-03-26 6 2020-03-27 7 2020-03-28 8 2020-03-29 8 2020-03-30 9 2020-03-31 9 2020-04-01 9 Name: 제주, dtype: int64
# plotly data = go.Bar(x=jeju.index, y=jeju.values,) layout = go.Layout(title='제주도 확진자수',xaxis_title='Day', yaxis_title='확진자수') fig = go.Figure(data=data, layout=layout) fig.show()
Out[-]
notion imagenotion image
# plotly semi log 그래프 data = go.Line(x=jeju.index, y=jeju.values,) layout = go.Layout(title='제주도 확진자수', xaxis_title='Day', yaxis_title='확진자수', yaxis_type='log', ) fig = go.Figure(data=data, layout=layout) fig.show()
Out[-]
notion imagenotion image

Map

import pandas as pd data = pd.read_csv("./data/region.csv", index_col=['day']) data.tail()
Out[-] 서울 부산 대구 인천 광주 대전 울산 세종 경기 강원 충북 충남 전북 전남 경북 경남 제주 검역 총확진자수 day 2020-03-28 390 114 6587 51 20 31 39 44 433 32 41 126 10 8 1285 91 8 168 9478 2020-03-29 410 117 6610 58 20 34 39 46 448 34 41 127 12 9 1287 94 8 189 9583 2020-03-30 426 118 6624 58 20 34 39 46 463 36 44 127 13 9 1298 95 9 202 9661 2020-03-31 450 119 6684 64 20 36 39 46 476 36 44 128 13 9 1300 96 9 217 9786 2020-04-01 474 122 6704 69 24 36 39 46 499 38 44 131 14 12 1302 100 9 224 9887
map_data = data.iloc[-1, :17] map_data = pd.DataFrame(map_data) map_data
Out[-] 2020-04-01 서울 474 부산 122 대구 6704 인천 69 광주 24 대전 36 울산 39 세종 46 경기 499 강원 38 충북 44 충남 131 전북 14 전남 12 경북 1302 경남 100 제주 9
loc = { '서울' : [37.566418, 126.977950],#서울시청 '부산' : [35.180152, 129.074980],#부산시청 '대구' : [35.871468, 128.601757],#대구시청 '인천' : [37.456445, 126.705873],#인천시청 '광주' : [35.160068, 126.851426],#광주광역시청 '대전' : [36.350664, 127.384819],#대전시청 '울산' : [35.539772, 129.311486],#울산시청 '세종' : [36.480838, 127.289181],#세종시청 '경기' : [37.275221, 127.009382],#경기도청 '강원' : [37.885300, 127.729835],#강원(강원도청) '충북' : [36.635947, 127.491345],#충북도청 '충남' : [36.658826, 126.672849],#충남도청 '전북' : [35.820599, 127.108759],#전북도청 '전남' : [34.816351, 126.462924],#전남도청 '경북' : [36.574108, 128.509303],#경북도청 '경남' : [35.238398, 128.692371],#경남도청 '제주' : [33.3617007, 126.511657]#제주 } type(loc)
Out[-] dict
# 위도(latitude)와 경도(longitude) loc = pd.DataFrame(loc).T loc.columns = ['lat', 'lon'] loc
Out[-] lat lon 서울 37.566418 126.977950 부산 35.180152 129.074980 대구 35.871468 128.601757 인천 37.456445 126.705873 광주 35.160068 126.851426 대전 36.350664 127.384819 울산 35.539772 129.311486 세종 36.480838 127.289181 경기 37.275221 127.009382 강원 37.885300 127.729835 충북 36.635947 127.491345 충남 36.658826 126.672849 전북 35.820599 127.108759 전남 34.816351 126.462924 경북 36.574108 128.509303 경남 35.238398 128.692371 제주 33.361701 126.511657
!pip install folium
Out[-] Collecting folium Downloading folium-0.10.1-py2.py3-none-any.whl (91 kB) Requirement already satisfied: numpy in c:\programdata\anaconda3\lib\site-packages (from folium) (1.16.4) Collecting branca>=0.3.0 Downloading branca-0.4.0-py3-none-any.whl (25 kB) Requirement already satisfied: requests in c:\programdata\anaconda3\lib\site-packages (from folium) (2.22.0) Requirement already satisfied: jinja2>=2.9 in c:\programdata\anaconda3\lib\site-packages (from folium) (2.10.1) Requirement already satisfied: six in c:\programdata\anaconda3\lib\site-packages (from branca>=0.3.0->folium) (1.12.0) Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\programdata\anaconda3\lib\site-packages (from requests->folium) (1.24.2) Requirement already satisfied: idna<2.9,>=2.5 in c:\programdata\anaconda3\lib\site-packages (from requests->folium) (2.8) Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\programdata\anaconda3\lib\site-packages (from requests->folium) (3.0.4) Requirement already satisfied: certifi>=2017.4.17 in c:\programdata\anaconda3\lib\site-packages (from requests->folium) (2019.6.16) Requirement already satisfied: MarkupSafe>=0.23 in c:\programdata\anaconda3\lib\site-packages (from jinja2>=2.9->folium) (1.1.1) Installing collected packages: branca, folium Successfully installed branca-0.4.0 folium-0.10.1
import matplotlib.pyplot as plt import folium map_osm = folium.Map(location=[35.824,127.147], zoom_start=7) map_osm
Out[-]
notion imagenotion image
from folium.plugins import MarkerCluster marker_cluster = MarkerCluster().add_to(map_osm) for i in range(17): folium.Marker( [loc.iloc[i:i+1,0], loc.iloc[i:i+1,1]], popup = map_data[i:i+1], icon=folium.Icon(color='red',icon='star'), ).add_to(marker_cluster) map_osm.save("map.html") # jupyter notebook에 한글 깨져서 html로 저장 map_osm
Out[-]
notion imagenotion image
map_data.iloc[1]
Out[-] 2020-04-01 122 Name: 부산, dtype: int64
from folium.plugins import MarkerCluster map_osm = folium.Map(location=[35.824,127.147], zoom_start=7) marker_cluster = MarkerCluster().add_to(map_osm) for i in range(17): folium.Circle( [loc.iloc[i:i+1,0], loc.iloc[i:i+1,1]], tooltip = map_data[i:i+1], radius = int(map_data.iloc[i])*10, ## 확진자 수 * 20배 fill_color='red', color='red' ).add_to(marker_cluster) map_osm.save("map.html") map_osm
Out[-]
notion imagenotion image

옵션값

  • stroke (Bool, True) : Whether to draw stroke along the path. Set it to false to disable borders on polygons or circles.
  • color (str, '#3388ff') : Stroke color.
  • weight (int, 3) : Stroke width in pixels.
  • opacity (float, 1.0) : Stroke opacity.
  • line_cap (str, 'round' (lineCap)) : A string that defines shape to be used at the end of the stroke.
  • line_join (str, 'round' (lineJoin)) : A string that defines shape to be used at the corners of the stroke.
  • dash_array (str, None (dashArray)) : A string that defines the stroke dash pattern. Doesn’t work on Canvas-powered layers in some old browsers.
  • dash_offset (str, None (dashOffset)) : A string that defines the distance into the dash pattern to start the dash. Doesn’t work on Canvas-powered layers in some old browsers.
  • fill (Bool, False) : Whether to fill the path with color. Set it to false to disable filling on polygons or circles.
  • fill_color (str, default to color (fillColor)) : Fill color. Defaults to the value of the color option.
  • fill_opacity (float, 0.2 (fillOpacity)) : Fill opacity.
  • fill_rule (str, 'evenodd' (fillRule)) : A string that defines how the inside of a shape is determined.
  • bubbling_mouse_events (Bool, True (bubblingMouseEvents)) : When true a mouse event on this path will trigger the same event on the map (unless L.DomEvent.stopPropagation is used).

3. 코로나19 한국 데이터 분석

# 현재 코로나 검사 현황 데이터(출처 : 질병관리본부) import pandas as pd df = pd.read_csv("./data/total.csv", index_col = ["date"]) df.head()
Out[-] 총계 확진자 격리해제 격리중 사망 검사중 결과 음성 date 2020-03-01 96985 3526 30 3479 17 32422 61037 2020-03-02 109591 4212 31 4159 22 33799 71580 2020-03-03 125851 4812 34 4750 28 35555 85484 2020-03-04 136707 5328 41 5255 32 28414 102965 2020-03-05 146541 5766 88 5643 35 21810 118965
df.tail()
Out[-] 총계 확진자 격리해제 격리중 사망 검사중 결과 음성 date 2020-03-28 387925 9478 4811 4523 144 16564 361883 2020-03-29 394141 9583 5033 4398 152 15028 369530 2020-03-30 395194 9661 5228 4275 158 13531 372002 2020-03-31 410564 9786 5408 4216 162 16892 383886 2020-04-01 421547 9887 5567 4155 165 16585 395075
df.info()
Out[-] <class 'pandas.core.frame.DataFrame'> Index: 32 entries, 2020-03-01 to 2020-04-01 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 총계 32 non-null int64 1 확진자 32 non-null int64 2 격리해제 32 non-null int64 3 격리중 32 non-null int64 4 사망 32 non-null int64 5 검사중 32 non-null int64 6 결과 음성 32 non-null int64 dtypes: int64(7) memory usage: 2.0+ KB
df.isnull().sum()
Out[-] 총계 0 확진자 0 격리해제 0 격리중 0 사망 0 검사중 0 결과 음성 0 dtype: int64
df.index
Out[-] Index(['2020-03-01', '2020-03-02', '2020-03-03', '2020-03-04', '2020-03-05', '2020-03-06', '2020-03-07', '2020-03-08', '2020-03-09', '2020-03-10', '2020-03-11', '2020-03-12', '2020-03-13', '2020-03-14', '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18', '2020-03-19', '2020-03-20', '2020-03-21', '2020-03-22', '2020-03-23', '2020-03-24', '2020-03-25', '2020-03-26', '2020-03-27', '2020-03-28', '2020-03-29', '2020-03-30', '2020-03-31', '2020-04-01'], dtype='object', name='date')
df.iloc[:,6]
Out[-] date 2020-03-01 61037 2020-03-02 71580 2020-03-03 85484 2020-03-04 102965 2020-03-05 118965 2020-03-06 136624 2020-03-07 151802 2020-03-08 162008 2020-03-09 171778 2020-03-10 184179 2020-03-11 196100 2020-03-12 209402 2020-03-13 222728 2020-03-14 235615 2020-03-15 243778 2020-03-16 251297 2020-03-17 261105 2020-03-18 270888 2020-03-19 282555 2020-03-20 292487 2020-03-21 303006 2020-03-22 308343 2020-03-23 315447 2020-03-24 324105 2020-03-25 334481 2020-03-26 341332 2020-03-27 352410 2020-03-28 361883 2020-03-29 369530 2020-03-30 372002 2020-03-31 383886 2020-04-01 395075 Name: 결과 음성, dtype: int64
df.columns[6]
Out[-] '결과 음성'
df.columns[1]
Out[-] '확진자'
# 코로나 확진자-음성 그래프 import plotly.graph_objects as go fig = go.Figure() fig.add_trace(go.Scatter(x=df.index, y=df.iloc[:,6], mode = 'lines+markers', name='결과 음성')) fig.add_trace(go.Scatter(x=df.index, y=df. iloc[:,1], mode = 'lines+markers', name='확진자')) fig.update_layout(title='코로나 양성, 음성', xaxis_title='Day', yaxis_title='count') fig.show()
Out[-]
notion imagenotion image
# 코로나 확진자-음성 그래프(semi-log) import plotly.graph_objects as go fig = go.Figure() fig.add_trace(go.Scatter(x=df.index, y=df.iloc[:,6], mode = 'lines+markers', name='결과 음성')) fig.add_trace(go.Scatter(x=df.index, y=df. iloc[:,1], mode = 'lines+markers', name='확진자')) fig.update_layout(title='코로나 양성, 음성', xaxis_title='Day', yaxis_title='count', yaxis_type='log' ) fig.show()
Out[-]
notion imagenotion image
d = df.copy() d.tail()
Out[-] 총계 확진자 격리해제 격리중 사망 검사중 결과 음성 date 2020-03-28 387925 9478 4811 4523 144 16564 361883 2020-03-29 394141 9583 5033 4398 152 15028 369530 2020-03-30 395194 9661 5228 4275 158 13531 372002 2020-03-31 410564 9786 5408 4216 162 16892 383886 2020-04-01 421547 9887 5567 4155 165 16585 395075
# 치명률 = 사망자수/확진자수 *100 # 완치율 = 격리해제/확진자수 *100 d['치명률'] = round(d.iloc[:,4]/d.iloc[:,1]*100, 2) d['완치율'] = round(d.iloc[:,2]/d.iloc[:,1]*100, 2) d.tail()
Out[-] 총계 확진자 격리해제 격리중 사망 검사중 결과 음성 치명률 완치율 date 2020-03-28 387925 9478 4811 4523 144 16564 361883 1.52 50.76 2020-03-29 394141 9583 5033 4398 152 15028 369530 1.59 52.52 2020-03-30 395194 9661 5228 4275 158 13531 372002 1.64 54.11 2020-03-31 410564 9786 5408 4216 162 16892 383886 1.66 55.26 2020-04-01 421547 9887 5567 4155 165 16585 395075 1.67 56.31
d.index
Out[-] Index(['2020-03-01', '2020-03-02', '2020-03-03', '2020-03-04', '2020-03-05', '2020-03-06', '2020-03-07', '2020-03-08', '2020-03-09', '2020-03-10', '2020-03-11', '2020-03-12', '2020-03-13', '2020-03-14', '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18', '2020-03-19', '2020-03-20', '2020-03-21', '2020-03-22', '2020-03-23', '2020-03-24', '2020-03-25', '2020-03-26', '2020-03-27', '2020-03-28', '2020-03-29', '2020-03-30', '2020-03-31', '2020-04-01'], dtype='object', name='date')
d.iloc[:,7]
Out[-] date 2020-03-01 0.48 2020-03-02 0.52 2020-03-03 0.58 2020-03-04 0.60 2020-03-05 0.61 2020-03-06 0.67 2020-03-07 0.65 2020-03-08 0.70 2020-03-09 0.69 2020-03-10 0.72 2020-03-11 0.77 2020-03-12 0.84 2020-03-13 0.84 2020-03-14 0.89 2020-03-15 0.92 2020-03-16 0.91 2020-03-17 0.97 2020-03-18 1.00 2020-03-19 1.06 2020-03-20 1.09 2020-03-21 1.16 2020-03-22 1.17 2020-03-23 1.24 2020-03-24 1.33 2020-03-25 1.38 2020-03-26 1.42 2020-03-27 1.49 2020-03-28 1.52 2020-03-29 1.59 2020-03-30 1.64 2020-03-31 1.66 2020-04-01 1.67 Name: 치명률, dtype: float64
d_g = go.Bar(x=d.index, y=d.iloc[:,7]) layout = go.Layout(title='치명율(사망/확진자수)') fig = go.Figure(data=d_g, layout=layout) fig.show()
Out[-]
notion imagenotion image
death = go.Bar(x=d.index, y=d.iloc[:,7], name = "치명율") cure = go.Bar(x=d.index, y=d.iloc[:,8], name = "완치율") data = cure, death layout = go.Layout(title="치명율-완치율그래프", barmode='stack') fig = go.Figure(data=data, layout = layout) fig.show()
Out[-]
notion imagenotion image
fig = go.Figure() fig.add_trace(go.Scatter(x=d.index, y=d.iloc[:,7], mode = 'lines+markers', name='치명율')) fig.add_trace(go.Scatter(x=d.index, y=d.iloc[:,8], mode = 'lines+markers', name='완치율')) fig.update_layout(title='치명율-완치율 그래프', xaxis_title='Day', yaxis_title='count') fig.show()
Out[-]
notion imagenotion image
today = df.copy() today.tail()
Out[-] 총계 확진자 격리해제 격리중 사망 검사중 결과 음성 date 2020-03-28 387925 9478 4811 4523 144 16564 361883 2020-03-29 394141 9583 5033 4398 152 15028 369530 2020-03-30 395194 9661 5228 4275 158 13531 372002 2020-03-31 410564 9786 5408 4216 162 16892 383886 2020-04-01 421547 9887 5567 4155 165 16585 395075
print(today.columns[1]) print(today.columns[2]) print(today.columns[4])
Out[-] 확진자 격리해제 사망
일일확진자 = [] 일일격리해제 = [] 일일사망자 = [] 일일확진자.append(595) 일일격리해제.append(3) 일일사망자.append(1) for i in range(len(today)-1): 일일확진자.append(today.iloc[:, 1][i+1]-today.iloc[:, 1][i]) 일일격리해제.append(today.iloc[:, 2][i+1]-today.iloc[:, 2][i]) 일일사망자.append(today.iloc[:, 4][i+1]-today.iloc[:, 4][i]) 일일확진자 = pd.DataFrame(일일확진자, index=today.index) 일일격리해제 = pd.DataFrame(일일격리해제, index=today.index) 일일사망자 = pd.DataFrame(일일사망자, index=today.index) 일일확진자 = 일일확진자.rename(columns={0:'일일확진자'}) 일일격리해제 = 일일격리해제.rename(columns={0:'일일격리해제'}) 일일사망자 = 일일사망자.rename(columns={0:'일일사망자'}) today = pd.concat([today, 일일확진자, 일일격리해제, 일일사망자], axis=1) today
Out[-] 총계 확진자 격리해제 격리중 사망 검사중 결과 음성 일일확진자 일일격리해제 일일사망자 date 2020-03-28 387925 9478 4811 4523 144 16564 361883 146 283 5 2020-03-29 394141 9583 5033 4398 152 15028 369530 105 222 8 2020-03-30 395194 9661 5228 4275 158 13531 372002 78 195 6 2020-03-31 410564 9786 5408 4216 162 16892 383886 125 180 4 2020-04-01 421547 9887 5567 4155 165 16585 395075 101 159 3
# 일일확진자-격리해제 그래프 fig=go.Figure() fig.add_trace(go.Scatter(x=today.index, y=today.iloc[:,7], mode = 'lines+markers', name='일일확진자')) fig.add_trace(go.Scatter(x=today.index, y=today.iloc[:,8], mode = 'lines+markers', name='일일격리해제')) fig.update_layout(title='일일확진자-격리해제 그래프', xaxis_title='Day', yaxis_title='count') fig.show()
Out[-]
notion imagenotion image
fig=go.Figure() t1 = go.Bar(x=today.index, y=today.iloc[:,7], name='일일확진자') t2 = go.Bar(x=today.index, y=today.iloc[:,8], name='일일격리해제') data = t2, t1 layout = go.Layout(title='일일확진자-격리해제 그래프', barmode='stack') fig = go.Figure(data=data, layout=layout) fig.show()
Out[-]
notion imagenotion image
일일사망자 = today.iloc[:,-1] # 일일사망자 치명률 = d.iloc[ : , -2] # 치명률 치명률
Out[-] date 2020-03-01 0.48 2020-03-02 0.52 2020-03-03 0.58 2020-03-04 0.60 2020-03-05 0.61 2020-03-06 0.67 2020-03-07 0.65 2020-03-08 0.70 2020-03-09 0.69 2020-03-10 0.72 2020-03-11 0.77 2020-03-12 0.84 2020-03-13 0.84 2020-03-14 0.89 2020-03-15 0.92 2020-03-16 0.91 2020-03-17 0.97 2020-03-18 1.00 2020-03-19 1.06 2020-03-20 1.09 2020-03-21 1.16 2020-03-22 1.17 2020-03-23 1.24 2020-03-24 1.33 2020-03-25 1.38 2020-03-26 1.42 2020-03-27 1.49 2020-03-28 1.52 2020-03-29 1.59 2020-03-30 1.64 2020-03-31 1.66 2020-04-01 1.67 Name: 치명률, dtype: float64
import matplotlib.pyplot as plt plt.rcParams['figure.figsize']=15,8 # 그래프 크기 fig , ax = plt.subplots() plt.xticks(rotation=90) # ax 막대그래프 ax.bar(death_1.index ,death_1.values, color = 'gray') ax.set_xlabel("년/월/일") ax.set_ylabel('사망자(수)') ax.set_ylim(0 ,10) # ax1 선그래프 ax1 = ax.twinx() ax1.plot(death , color = 'r', label = '치명율') ax1.set_ylim(0.0 , 2) ax1.set_ylabel('치명율') plt.title('일일 사망자 현황') plt.grid(True) plt.show()
Out[-]
notion imagenotion image

4. 세계 코로나 현황

import pandas as pd confirmed = pd.read_csv('./data/time_series_covid_19_confirmed.csv') deaths = pd.read_csv('./data/time_series_covid_19_deaths.csv') recovered = pd.read_csv('./data/time_series_covid_19_recovered.csv')
confirmed.info()
Out[-] <class 'pandas.core.frame.DataFrame'> RangeIndex: 258 entries, 0 to 257 Data columns (total 76 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Province/State 80 non-null object 1 Country/Region 258 non-null object 2 Lat 258 non-null float64 3 Long 258 non-null float64 4 1/22/20 258 non-null int64 5 1/23/20 258 non-null int64 6 1/24/20 258 non-null int64 7 1/25/20 258 non-null int64 8 1/26/20 258 non-null int64 9 1/27/20 258 non-null int64 10 1/28/20 258 non-null int64 11 1/29/20 258 non-null int64 12 1/30/20 258 non-null int64 13 1/31/20 258 non-null int64 14 2/1/20 258 non-null int64 15 2/2/20 258 non-null int64 16 2/3/20 258 non-null int64 17 2/4/20 258 non-null int64 18 2/5/20 258 non-null int64 19 2/6/20 258 non-null int64 20 2/7/20 258 non-null int64 21 2/8/20 258 non-null int64 22 2/9/20 258 non-null int64 23 2/10/20 258 non-null int64 24 2/11/20 258 non-null int64 25 2/12/20 258 non-null int64 26 2/13/20 258 non-null int64 27 2/14/20 258 non-null int64 28 2/15/20 258 non-null int64 29 2/16/20 258 non-null int64 30 2/17/20 258 non-null int64 31 2/18/20 258 non-null int64 32 2/19/20 258 non-null int64 33 2/20/20 258 non-null int64 34 2/21/20 258 non-null int64 35 2/22/20 258 non-null int64 36 2/23/20 258 non-null int64 37 2/24/20 258 non-null int64 38 2/25/20 258 non-null int64 39 2/26/20 258 non-null int64 40 2/27/20 258 non-null int64 41 2/28/20 258 non-null int64 42 2/29/20 258 non-null int64 43 3/1/20 258 non-null int64 44 3/2/20 258 non-null int64 45 3/3/20 258 non-null int64 46 3/4/20 258 non-null int64 47 3/5/20 258 non-null int64 48 3/6/20 258 non-null int64 49 3/7/20 258 non-null int64 50 3/8/20 258 non-null int64 51 3/9/20 258 non-null int64 52 3/10/20 258 non-null int64 53 3/11/20 258 non-null int64 54 3/12/20 258 non-null int64 55 3/13/20 258 non-null int64 56 3/14/20 258 non-null int64 57 3/15/20 258 non-null int64 58 3/16/20 258 non-null int64 59 3/17/20 258 non-null int64 60 3/18/20 258 non-null int64 61 3/19/20 258 non-null int64 62 3/20/20 258 non-null int64 63 3/21/20 258 non-null int64 64 3/22/20 258 non-null int64 65 3/23/20 258 non-null int64 66 3/24/20 258 non-null int64 67 3/25/20 258 non-null int64 68 3/26/20 258 non-null int64 69 3/27/20 258 non-null int64 70 3/28/20 258 non-null int64 71 3/29/20 258 non-null int64 72 3/30/20 258 non-null int64 73 3/31/20 258 non-null int64 74 4/1/20 258 non-null int64 75 4/2/20 258 non-null int64 dtypes: float64(2), int64(72), object(2) memory usage: 153.3+ KB
confirmed.head()
Out[-] Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/28/20 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 0 NaN Afghanistan 33.0000 65.0000 0 0 0 0 0 0 ... 110 120 170 174 237 273 1 NaN Albania 41.1533 20.1683 0 0 0 0 0 0 ... 197 212 223 243 259 277 2 NaN Algeria 28.0339 1.6596 0 0 0 0 0 0 ... 454 511 584 716 847 986 3 NaN Andorra 42.5063 1.5218 0 0 0 0 0 0 ... 308 334 370 376 390 428 4 NaN Angola -11.2027 17.8739 0 0 0 0 0 0 ... 5 7 7 7 8 8 5 rows × 76 columns
confirmed.tail()
Out[-] Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/28/20 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 253 NaN Botswana -22.328500 24.684900 0 0 0 0 0 0 ... 0 0 3 4 4 4 254 NaN Burundi -3.373100 29.918900 0 0 0 0 0 0 ... 0 0 0 2 2 3 255 NaN Sierra Leone 8.460555 -11.779889 0 0 0 0 0 0 ... 0 0 0 1 2 2 Bonaire, Sint 256 Eustatius and Netherlands 12.178400 -68.238500 0 0 0 0 0 0 ... 0 0 0 0 0 2 Saba 257 NaN Malawi -13.254308 34.301525 0 0 0 0 0 0 ... 0 0 0 0 0 3 5 rows × 76 columns

데이터 전처리

# 데이터 전처리 (Province/State) 제거 confirmed_1 = confirmed.drop(['Province/State'], axis = 1) deaths_1 = deaths.drop(['Province/State'], axis = 1) recovered_1 = recovered.drop(['Province/State'], axis = 1)
confirmed_1.head()
Out[-] Country/Region 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 3/28/20 3/29/20 3/30/20 3/31/20 4/1/20 4/2/20 0 Afghanistan 0 0 0 0 0 0 ... 110 120 170 174 237 273 1 Albania 0 0 0 0 0 0 ... 197 212 223 243 259 277 2 Algeria 0 0 0 0 0 0 ... 454 511 584 716 847 986 3 Andorra 0 0 0 0 0 0 ... 308 334 370 376 390 428 4 Angola 0 0 0 0 0 0 ... 5 7 7 7 8 8 5 rows × 75 columns

중복된 Country를 찾고 그룹화 하기

# 중복된 나라 찾기 country = confirmed_2.iloc[:,0] df = country.value_counts() df.index[1] df[0] # 중복된 나라 1개 초과일 경우 for i in range(len(df)): temp = df[i] if temp > 1: print(df.index[i])
Out[-] China Canada France United Kingdom Australia Netherlands Denmark
# 중복된 나라 출력하기 confirmed_2.iloc[:,0] confirmed_2.iloc[:,0].value_counts()
Out[-] China 33 Canada 15 France 10 United Kingdom 10 Australia 8 .. Congo (Brazzaville) 1 Holy See 1 Armenia 1 Sweden 1 Malawi 1 Name: Country/Region, Length: 181, dtype: int64
  • 중복된 나라 : China, Canada, United Kingdom, France, Australia, Netherlands, Denmark
# 중복된 나라 그룹화 해서 값을 합함 confirmed_3 = confirmed_2.groupby('Country/Region').sum() deaths_3= deaths_2.groupby('Country/Region').sum() recovered_3= recovered_2.groupby('Country/Region').sum()
confirmed_3.loc[['China']].T confirmed_3.loc[['United Kingdom']].T
Out[-] Country/Region United Kingdom 1/22/20 0 1/23/20 0 1/24/20 0 1/25/20 0 1/26/20 0 ... ... 3/28/20 17312 3/29/20 19780 3/30/20 22453 3/31/20 25481 4/1/20 29865 4/2/20 34173 72 rows × 1 columns

시각화

confirmed_3.iloc[:, -1] confirmed_3.iloc[:, -1]['China']
Out[-] 82432
# 4월 2일 기준으로 누적 확진자 데이터 confirmed_4 = confirmed_3.iloc[:, -1 ] deaths_4 = deaths_3.iloc[:, -1 ] recovered_4 = recovered_3.iloc[:, -1 ]
df = confirmed_4.sort_values(ascending=False) df
Out[-] Country/Region US 243453 Italy 115242 Spain 112065 Germany 84794 China 82432 ... Burundi 3 Sierra Leone 2 Saint Vincent and the Grenadines 2 Papua New Guinea 1 Timor-Leste 1 Name: 4/2/20, Length: 181, dtype: int64
import plotly.graph_objects as go import plotly.express as px fig = px.bar(confirmed_4, x = confirmed_4.index, y = confirmed_4.values, height = 600) fig.update_layout(title_text='Covid19 : Confirmed', xaxis_title='Country', yaxis_title='confirmed') fig.show()
Out[-]
notion imagenotion image
import plotly.graph_objects as go import plotly.express as px fig = px.bar(df, x = df.index, y = df.values, height = 600) fig.update_layout(title_text='Covid19 : Confirmed', xaxis_title='Country', yaxis_title='confirmed', yaxis_type='log', ) fig.show()
Out[-]
notion imagenotion image
import plotly.graph_objects as go df1 = confirmed_4.sort_values(ascending=False) df2 = deaths_4.sort_values(ascending=False) df3 = recovered_4.sort_values(ascending=False) fig = go.Figure( data = [go.Bar(name = 'confirmed', x = df1.index, y = df1.values), go.Bar(name = 'deaths', x = df2.index, y = df2.values ), go.Bar(name = 'recoverd', x = df3.index, y = df3.values)]) fig.update_layout(barmode = 'stack', title_text = '4월 2일 코로나 확진자, 사망자, 완치자', yaxis_type='log', ) fig.show()
Out[-]
 
notion imagenotion image

Top 15

# 누적 확진자 기준으로 나라 top15를 구한다 # 사망자 데이터, 완치자 데이터 에서 위에 나라의 변수 값을 추출한다. confirmed_5 = confirmed_4.sort_values(ascending = False).head(15) # confirmed_5 confirmed_list15 = [] for i in confirmed_5.index: confirmed_list15.append(i) confirmed_list15
Out[-] ['US', 'Italy', 'Spain', 'Germany', 'China', 'France', 'Iran', 'United Kingdom', 'Switzerland', 'Turkey', 'Belgium', 'Netherlands', 'Canada', 'Austria', 'Korea, South']
confirmed_5
Out[-] Country/Region US 243453 Italy 115242 Spain 112065 Germany 84794 China 82432 France 59929 Iran 50468 United Kingdom 34173 Switzerland 18827 Turkey 18135 Belgium 15348 Netherlands 14788 Canada 11284 Austria 11129 Korea, South 9976 Name: 4/2/20, dtype: int64
confirmed_5 = pd.DataFrame(confirmed_5) confirmed_5
Out[-] 4/2/20 Country/Region US 243453 Italy 115242 Spain 112065 Germany 84794 China 82432 France 59929 Iran 50468 United Kingdom 34173 Switzerland 18827 Turkey 18135 Belgium 15348 Netherlands 14788 Canada 11284 Austria 11129 Korea, South 9976
confirmed_list15 # top 15 누적사망자 deaths_5 = [] for i in confirmed_list15: for index, values in enumerate(deaths_4.index): if i == values: deaths_5.append(deaths_4[index]) deaths_5 = pd.DataFrame(deaths_5, index=confirmed_5.index, columns=confirmed_5.columns) deaths_5
Out[-] 4/2/20 Country/Region US 7087 Italy 14681 Spain 11198 Germany 1275 China 3326 France 6520 Iran 3294 United Kingdom 3611 Switzerland 591 Turkey 425 Belgium 1143 Netherlands 1490 Canada 179 Austria 168 Korea, South 174
deaths_4 # deaths_4.index deaths_4[2]
Out[-] 105
for index, values in enumerate(confirmed_list15): print(index, values)
Out[-] 0 US 1 Italy 2 Spain 3 Germany 4 China 5 France 6 Iran 7 United Kingdom 8 Switzerland 9 Turkey 10 Belgium 11 Netherlands 12 Canada 13 Austria 14 Korea, South
# top 15 누적완치자 recovered_5 = [] for i in confirmed_list15: for index, values in enumerate(recovered_4.index): if i == values : recovered_5.append(recovered_4[index]) recovered_5 = pd.DataFrame(recovered_5, index = confirmed_5.index, columns = confirmed_5.columns) recovered_5
Out[-] 4/2/20 Country/Region US 9707 Italy 19758 Spain 30513 Germany 24575 China 76760 France 14135 Iran 17935 United Kingdom 208 Switzerland 4846 Turkey 484 Belgium 2872 Netherlands 260 Canada 2175 Austria 2022 Korea, South 6021
import plotly.graph_objects as go fig = go.Figure( data = [go.Bar(name = 'confiremd', x = confirmed_5.index, y = confirmed_5['4/2/20']), go.Bar(name = 'deaths', x = deaths_5.index, y = deaths_5['4/2/20'] ), go.Bar(name = 'recovered', x = recovered_5.index, y = recovered_5['4/2/20'])]) fig.update_layout(barmode = 'stack', title_text = ' top 15 : 4월 2일 코로나 확진자, 사망자, 완치자 (수)') fig.show()
Out[-]
notion imagenotion image

US covid - 19

confirmed_3.loc["US", ]
Out[-] 1/22/20 1 1/23/20 1 1/24/20 2 1/25/20 2 1/26/20 5 ... 3/29/20 140886 3/30/20 161807 3/31/20 188172 4/1/20 213372 4/2/20 243453 Name: US, Length: 72, dtype: int64
# 확진자 , 사망자 , 완치자 시계열 막대그래프 fig = go.Figure() fig.add_trace(go.Bar(x = confirmed_3.loc["US",].index, y = confirmed_3.loc["US",], name = 'confirmed')) fig.add_trace(go.Bar(x = deaths_3.loc["US",].index, y = deaths_3.loc["US",], name = 'deaths')) fig.add_trace(go.Bar(x = recovered_3.loc["US",].index, y = recovered_3.loc["US",], name = 'recovered_3')) fig.update_layout(yaxis_type='log') fig.show()
Out[-]
notion imagenotion image
# 확진자 , 사망자 , 완치자 시계열 선그래프 fig = go.Figure() fig.add_trace(go.Scatter(x = confirmed_3.loc["US",].index, y = confirmed_3.loc["US",], mode = 'lines + markers', name = 'confirmed')) fig.add_trace(go.Scatter(x = deaths_3.loc["US",].index, y = deaths_3.loc["US",], mode = 'lines + markers', name = 'deaths')) fig.add_trace(go.Scatter(x = recovered_3.loc["US",].index, y = recovered_3.loc["US",], mode = 'lines + markers', name = 'recovered_3')) fig.update_layout(yaxis_type='log') fig.show()
Out[-]
notion imagenotion image