发布时间:2022-09-12 00:00
本模块的数据是小编自己从网上爬的,日期是从2020-01-22至2020-11-06全球的冠状病毒疫情数据。
# 读取数据
# 全球确诊病例,2020-01-22至2020-11-06
confirmed_df = pd.read_csv(r'./covid19_confirmed_global.csv')
# 全球死亡人数,2020-01-22至2020-11-06
deaths_df = pd.read_csv(r'./covid19_deaths_global.csv')
# 全球康复人数,2020-01-22至2020-11-06
recoveries_df = pd.read_csv(r'./covid19_recovered_global.csv')
# 最近的一天数据
latest_data = pd.read_csv(r'./11-06-2020.csv')
# 展示前几行
confirmed_df.head()
# 将数据切片,方便可视化
confirmed = confirmed_df.iloc[:, 5:]
deaths = deaths_df.iloc[:, 5:]
recoveries = recoveries_df.iloc[:, 5:]
dates = confirmed.keys() # 所有日期
world_cases = [] # 所有确诊病例
total_deaths = [] # 所有死亡人数
total_recovered = [] # 总康复
total_active = [] # 还存病例
mortality_rate = [] # 死亡率
recovery_rate = [] # 康复率
# 对每一天的数据进行统计
for i in dates:
confirmed_sum = confirmed[i].sum()
deaths_sum = deaths[i].sum()
recoveries_sum = recoveries[i].sum()
world_cases.append(confirmed_sum)
total_deaths.append(deaths_sum)
total_recovered.append(recoveries_sum)
total_active.append(confirmed_sum - deaths_sum - recoveries_sum)
# 计算死亡率和治愈率
mortality_rate.append(deaths_sum/confirmed_sum)
recovery_rate.append(total_recovered/confirmed_sum)
# 获得每日上涨和移动平均线
def daily_increase(data):
d = []
for i in range(len(data)):
if i == 0: # 第一天的增量就是当天的人数
d.append(data[0])
else:
d.append(data[i] - data[i-1]) # 其他的日期都是当天减去前一天
return d
def moving_average(data, window_size):
moving_avg = []
for i in range(len(data)):
if i + window_size < len(data):
moving_avg.append(np.mean(data[i:i+window_size]))
else:
moving_avg.append(np.mean(data[i:len(data)]))
return moving_avg
# 数据预处理
# 一空窗口设为一周
window = 7
# 确诊病例情况
world_daily_increase = daily_increase(world_cases)
world_confirmed_avg = moving_average(world_cases, window)
world_daily_increase_avg = moving_average(world_daily_increase, window)
# 死亡情况
world_daily_deaths = daily_increase(total_deaths)
world_deaths_avg = moving_average(total_deaths, window)
world_daily_deaths_avg = moving_average(world_daily_deaths, window)
# 治愈情况
world_daily_recovery = daily_increase(total_recovered)
world_recovery_avg = moving_average(total_recovered, window)
world_dialy_recovery_avg = moving_average(world_daily_recovery, window)
# 还存活
world_active_avg = moving_average(total_active, window)
# reshape(-1, m)即列数m固定,行数需要计算
days_since_1_22 = np.array([i for i in range(len(dates))]).reshape(-1, 1)
world_cases = np.array(world_cases).reshape(-1, 1)
total_deaths = np.array(total_deaths).reshape(-1, 1)
total_recovered = np.array(total_recovered).reshape(-1, 1
# 未来10天的预测
days_in_future = 10
future_forcast = np.array([i for i in range(len(dates)+days_in_future)]).reshape(-1, 1)
adjusted_dates = future_forcast[:-10]
import matplotlib as mpl
import matplotlib.gridspec as gridspec
# 绘制多个子图
mpl.rcParams['font.family'] = 'SimHei'
gs = gridspec.GridSpec(2, 2)
plt.figure(figsize=(12, 11), dpi=250)
ax1 = plt.subplot(gs[0, 0])
ax2 = plt.subplot(gs[0, 1])
ax3 = plt.subplot(gs[1, 0])
ax4 = plt.subplot(gs[1, 1])
# 子图1全国冠状病毒确诊病例
ax1.plot(adjusted_dates, world_cases)
ax1.plot(adjusted_dates, world_confirmed_avg, linestyle='dashed', color='orange')
# ax1.set_xlabel('# of Days 1/22/2020')
# ax1.set_title('# of Coronavirus Cases Over Time')
# 设置比较多时合并一起写来的快些
ax1.set(xlabel='# of Days Since 1/22/2020',
ylabel='# of Cases',
title='# of Coronavirus Cases Over Time')
ax1.legend(['Worldwide Coronavirus Cases', 'Moving Average {} Days'.format(window)])
# 子图2全国冠状病毒死亡病例
ax2.plot(adjusted_dates, total_deaths)
ax2.plot(adjusted_dates, world_deaths_avg, linestyle='dashed', color='orange')
ax2.set(xlabel='# of Days Since 1/22/2020',
ylabel='# of Cases',
title='# of Coronavirus Deaths Over Time')
ax2.legend(['Worldwide Coronavirus Deaths Cases', 'Moving Average {} Days'.format(window)], prop={'size':10})
# 子图3全国冠状病毒康复病例
ax3.plot(adjusted_dates, total_recovered)
ax3.plot(adjusted_dates, world_recovery_avg, linestyle='dashed', color='orange')
ax3.set(xlabel='# of Days Since 1/22/2020',
ylabel='# of Cases',
title='# of Coronavirus Recoveries Over Time')
ax3.legend(['Worldwide Coronavirus Recoveries', 'Moving Average {} Days'.format(window)], prop={'size':10})
# 子图4全国冠状病毒康复病例
ax4.plot(adjusted_dates, total_active)
ax4.plot(adjusted_dates, world_active_avg, linestyle='dashed', color='orange')
ax4.set(xlabel='# of Days Since 1/22/2020',
ylabel='# of Cases',
title='# of Coronavirus Active Over Time')
ax4.legend(['Worldwide Coronavirus Active', 'Moving Average {} Days'.format(window)], prop={'size':10})
worldwide_confirmed_df = pd.DataFrame({'Date': future_forcast_dates[-10:], '# of Confirmed Cases Worldwide': np.round(world_cases[-10:].reshape(1,-1)[0])})
worldwide_confirmed_df.style.background_gradient(cmap='Reds')
# 冠状病毒日增长和滑动窗口增长
plt.figure(figsize=(10,5))
plt.bar(adjusted_dates, world_daily_deaths)
plt.plot(adjusted_dates, world_daily_deaths_avg, linestyle='dashed', color='orange')
plt.title("World Daily Increases in Confirmed Deaths", size=20)
plt.xlabel("# of Days Since 1/22/2020", size=20)
plt.ylabel("# of Cases", size=20)
plt.legend(['World Daily Increase in COVID-19 Deaths','Moving Average {} Days'.format(window)], prop={'size':15})
plt.xticks=15
plt.yticks=15
plt.show()
其他代码就不附上了,太长了。。。
。。。。。