I performed Data Analysis on COVID 19 dataset by John Hopkins University and World Happiness Report and found really interesting results. It shows that people living in developed countries are more prone to infection of the Corona Virus than people living in less developed countries.
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
print("All modules imported!")
All modules imported!
https://github.com/CSSEGISandData/COVID-19
corona_dataset_csv = pd.read_csv("Datasets/time_series_covid19_confirmed_global.csv")
corona_dataset_csv.head()
Province/State | Country/Region | Lat | Long | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | ... | 5/26/20 | 5/27/20 | 5/28/20 | 5/29/20 | 5/30/20 | 5/31/20 | 6/1/20 | 6/2/20 | 6/3/20 | 6/4/20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | Afghanistan | 33.0000 | 65.0000 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 11831 | 12456 | 13036 | 13659 | 14525 | 15205 | 15750 | 16509 | 17267 | 18054 |
1 | NaN | Albania | 41.1533 | 20.1683 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1029 | 1050 | 1076 | 1099 | 1122 | 1137 | 1143 | 1164 | 1184 | 1197 |
2 | NaN | Algeria | 28.0339 | 1.6596 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 8697 | 8857 | 8997 | 9134 | 9267 | 9394 | 9513 | 9626 | 9733 | 9831 |
3 | NaN | Andorra | 42.5063 | 1.5218 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 763 | 763 | 763 | 764 | 764 | 764 | 765 | 844 | 851 | 852 |
4 | NaN | Angola | -11.2027 | 17.8739 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 70 | 71 | 74 | 81 | 84 | 86 | 86 | 86 | 86 | 86 |
5 rows × 139 columns
corona_dataset_csv.shape
(266, 139)
corona_dataset_csv.drop(["Lat", "Long"], axis = 1, inplace = True)
corona_dataset_csv.head(15)
Province/State | Country/Region | 1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | ... | 5/26/20 | 5/27/20 | 5/28/20 | 5/29/20 | 5/30/20 | 5/31/20 | 6/1/20 | 6/2/20 | 6/3/20 | 6/4/20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | Afghanistan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 11831 | 12456 | 13036 | 13659 | 14525 | 15205 | 15750 | 16509 | 17267 | 18054 |
1 | NaN | Albania | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1029 | 1050 | 1076 | 1099 | 1122 | 1137 | 1143 | 1164 | 1184 | 1197 |
2 | NaN | Algeria | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 8697 | 8857 | 8997 | 9134 | 9267 | 9394 | 9513 | 9626 | 9733 | 9831 |
3 | NaN | Andorra | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 763 | 763 | 763 | 764 | 764 | 764 | 765 | 844 | 851 | 852 |
4 | NaN | Angola | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 70 | 71 | 74 | 81 | 84 | 86 | 86 | 86 | 86 | 86 |
5 | NaN | Antigua and Barbuda | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 25 | 25 | 25 | 25 | 25 | 26 | 26 | 26 | 26 | 26 |
6 | NaN | Argentina | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 13228 | 13933 | 14702 | 15419 | 16214 | 16851 | 17415 | 18319 | 19268 | 20197 |
7 | NaN | Armenia | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 7402 | 7774 | 8216 | 8676 | 8927 | 9282 | 9492 | 10009 | 10524 | 11221 |
8 | Australian Capital Territory | Australia | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 107 | 107 | 107 | 107 | 107 | 107 | 107 | 107 | 107 | 107 |
9 | New South Wales | Australia | 0 | 0 | 0 | 0 | 3 | 4 | 4 | 4 | ... | 3089 | 3090 | 3092 | 3092 | 3095 | 3098 | 3104 | 3104 | 3106 | 3110 |
10 | Northern Territory | Australia | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 29 | 29 | 29 | 29 | 29 | 29 | 29 | 29 | 29 | 29 |
11 | Queensland | Australia | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | ... | 1058 | 1058 | 1058 | 1058 | 1058 | 1058 | 1059 | 1059 | 1060 | 1060 |
12 | South Australia | Australia | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 440 | 440 | 440 | 440 | 440 | 440 | 440 | 440 | 440 | 440 |
13 | Tasmania | Australia | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 228 | 228 | 228 | 228 | 228 | 228 | 228 | 228 | 228 | 228 |
14 | Victoria | Australia | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | ... | 1618 | 1628 | 1634 | 1645 | 1649 | 1653 | 1663 | 1670 | 1678 | 1681 |
15 rows × 137 columns
aggregated_corona_dataset = corona_dataset_csv.groupby("Country/Region").sum()
aggregated_corona_dataset.head(10)
1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | 1/30/20 | 1/31/20 | ... | 5/26/20 | 5/27/20 | 5/28/20 | 5/29/20 | 5/30/20 | 5/31/20 | 6/1/20 | 6/2/20 | 6/3/20 | 6/4/20 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Country/Region | |||||||||||||||||||||
Afghanistan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 11831 | 12456 | 13036 | 13659 | 14525 | 15205 | 15750 | 16509 | 17267 | 18054 |
Albania | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1029 | 1050 | 1076 | 1099 | 1122 | 1137 | 1143 | 1164 | 1184 | 1197 |
Algeria | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 8697 | 8857 | 8997 | 9134 | 9267 | 9394 | 9513 | 9626 | 9733 | 9831 |
Andorra | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 763 | 763 | 763 | 764 | 764 | 764 | 765 | 844 | 851 | 852 |
Angola | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 70 | 71 | 74 | 81 | 84 | 86 | 86 | 86 | 86 | 86 |
Antigua and Barbuda | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 25 | 25 | 25 | 25 | 25 | 26 | 26 | 26 | 26 | 26 |
Argentina | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 13228 | 13933 | 14702 | 15419 | 16214 | 16851 | 17415 | 18319 | 19268 | 20197 |
Armenia | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 7402 | 7774 | 8216 | 8676 | 8927 | 9282 | 9492 | 10009 | 10524 | 11221 |
Australia | 0 | 0 | 0 | 0 | 4 | 5 | 5 | 6 | 9 | 9 | ... | 7139 | 7150 | 7165 | 7184 | 7192 | 7202 | 7221 | 7229 | 7240 | 7247 |
Austria | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 16557 | 16591 | 16628 | 16655 | 16685 | 16731 | 16733 | 16759 | 16771 | 16805 |
10 rows × 135 columns
aggregated_corona_dataset.loc["Nepal"].plot()
plt.title("Rate of Covid 19 Growth in Nepal")
plt.legend()
<matplotlib.legend.Legend at 0x21a2bd54e88>
aggregated_corona_dataset.loc["Nepal"].diff().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x21a2be86408>
aggregated_corona_dataset.loc["Nepal"].diff().max()
334.0
countries = list(aggregated_corona_dataset.index)
max_infection_rates = []
for c in countries:
max_infection_rates.append(aggregated_corona_dataset.loc[c].diff().max())
aggregated_corona_dataset["max_infection_rates"] = max_infection_rates
aggregated_corona_dataset.head()
1/22/20 | 1/23/20 | 1/24/20 | 1/25/20 | 1/26/20 | 1/27/20 | 1/28/20 | 1/29/20 | 1/30/20 | 1/31/20 | ... | 5/27/20 | 5/28/20 | 5/29/20 | 5/30/20 | 5/31/20 | 6/1/20 | 6/2/20 | 6/3/20 | 6/4/20 | max_infection_rates | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Country/Region | |||||||||||||||||||||
Afghanistan | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 12456 | 13036 | 13659 | 14525 | 15205 | 15750 | 16509 | 17267 | 18054 | 866.0 |
Albania | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 1050 | 1076 | 1099 | 1122 | 1137 | 1143 | 1164 | 1184 | 1197 | 34.0 |
Algeria | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 8857 | 8997 | 9134 | 9267 | 9394 | 9513 | 9626 | 9733 | 9831 | 199.0 |
Andorra | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 763 | 763 | 764 | 764 | 764 | 765 | 844 | 851 | 852 | 79.0 |
Angola | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 71 | 74 | 81 | 84 | 86 | 86 | 86 | 86 | 86 | 8.0 |
5 rows × 136 columns
corona_data = pd.DataFrame(aggregated_corona_dataset["max_infection_rates"])
corona_data.head()
max_infection_rates | |
---|---|
Country/Region | |
Afghanistan | 866.0 |
Albania | 34.0 |
Algeria | 199.0 |
Andorra | 79.0 |
Angola | 8.0 |
happiness_report_csv = pd.read_csv("Datasets/worldwide_happiness_report.csv")
happiness_report_csv.head()
Overall rank | Country or region | Score | GDP per capita | Social support | Healthy life expectancy | Freedom to make life choices | Generosity | Perceptions of corruption | |
---|---|---|---|---|---|---|---|---|---|
0 | 1 | Finland | 7.769 | 1.340 | 1.587 | 0.986 | 0.596 | 0.153 | 0.393 |
1 | 2 | Denmark | 7.600 | 1.383 | 1.573 | 0.996 | 0.592 | 0.252 | 0.410 |
2 | 3 | Norway | 7.554 | 1.488 | 1.582 | 1.028 | 0.603 | 0.271 | 0.341 |
3 | 4 | Iceland | 7.494 | 1.380 | 1.624 | 1.026 | 0.591 | 0.354 | 0.118 |
4 | 5 | Netherlands | 7.488 | 1.396 | 1.522 | 0.999 | 0.557 | 0.322 | 0.298 |
useless_cols = ["Overall rank", "Score", "Generosity", "Perceptions of corruption"]
happiness_report_csv.drop(useless_cols, axis = 1, inplace = True)
happiness_report_csv.set_index("Country or region", inplace= True)
happiness_report_csv.head()
GDP per capita | Social support | Healthy life expectancy | Freedom to make life choices | |
---|---|---|---|---|
Country or region | ||||
Finland | 1.340 | 1.587 | 0.986 | 0.596 |
Denmark | 1.383 | 1.573 | 0.996 | 0.592 |
Norway | 1.488 | 1.582 | 1.028 | 0.603 |
Iceland | 1.380 | 1.624 | 1.026 | 0.591 |
Netherlands | 1.396 | 1.522 | 0.999 | 0.557 |
corona_data.shape
(188, 1)
happiness_report_csv.shape
(156, 4)
final_data = corona_data.join(happiness_report_csv, how = "inner")
final_data.head()
max_infection_rates | GDP per capita | Social support | Healthy life expectancy | Freedom to make life choices | |
---|---|---|---|---|---|
Afghanistan | 866.0 | 0.350 | 0.517 | 0.361 | 0.000 |
Albania | 34.0 | 0.947 | 0.848 | 0.874 | 0.383 |
Algeria | 199.0 | 1.002 | 1.160 | 0.785 | 0.086 |
Argentina | 949.0 | 1.092 | 1.432 | 0.881 | 0.471 |
Armenia | 697.0 | 0.850 | 1.055 | 0.815 | 0.283 |
final_data.corr()
max_infection_rates | GDP per capita | Social support | Healthy life expectancy | Freedom to make life choices | |
---|---|---|---|---|---|
max_infection_rates | 1.000000 | 0.207071 | 0.158977 | 0.218118 | 0.071825 |
GDP per capita | 0.207071 | 1.000000 | 0.757521 | 0.859431 | 0.394799 |
Social support | 0.158977 | 0.757521 | 1.000000 | 0.751632 | 0.456317 |
Healthy life expectancy | 0.218118 | 0.859431 | 0.751632 | 1.000000 | 0.423146 |
Freedom to make life choices | 0.071825 | 0.394799 | 0.456317 | 0.423146 | 1.000000 |
x = final_data["GDP per capita"]
y = final_data["max_infection_rates"]
sns.regplot(x,np.log(y)).set_title("Relationship Between Corona Infection Rate and GDP per Capita")
Text(0.5, 1.0, 'Relationship Between Corona Infection Rate and GDP per Capita')
x = final_data["Social support"]
y = final_data["max_infection_rates"]
sns.regplot(x,np.log(y)).set_title("Relationship Between Corona Infection Rate and Social Support")
Text(0.5, 1.0, 'Relationship Between Corona Infection Rate and Social Support')
x = final_data["Healthy life expectancy"]
y = final_data["max_infection_rates"]
sns.regplot(x,np.log(y)).set_title("Relationship Between Corona Infection Rate and Health Life Expectancy")
Text(0.5, 1.0, 'Relationship Between Corona Infection Rate and Health Life Expectancy')
x = final_data["Freedom to make life choices"]
y = final_data["max_infection_rates"]
sns.regplot(x,np.log(y)).set_title("Relationship Between Corona Infection Rate and Freedom to make life choices")
Text(0.5, 1.0, 'Relationship Between Corona Infection Rate and Freedom to make life choices')