import pandas as pd
import seaborn as sns
from io import StringIO
from matplotlib import pyplot as plt
pd.set_option('display.max_rows', 500)
pd.set_option('display.width', 200)
# Screen-scraped string containing the contents of
# https://en.wikipedia.org/wiki/List_of_European_countries_by_area#List_of_European_countries_and_dependencies_by_area
s = StringIO('''
– Europe 100% 10,014,000 3,866,000
1 K Russia 39.5% 3,952,550 1,526,090 [a]
2 Ukraine 6.0% 603,549 233,032 [b]
3 T France 5.4% 543,941 210,017 [c]
4 T Spain 5.0% 498,485 192,466 [d]
5 Sweden 4.4% 438,574 169,334
6 Germany 3.6% 357,581 138,063
7 Finland 3.4% 336,884 130,072 [e]
8 Norway 3.2% 323,772 125,009 [f]
9 Poland 3.1% 312,679 120,726
10 T Italy 3.0% 301,958 116,587 [g]
11 United Kingdom 2.4% 244,381 94,356 [h]
12 Romania 2.4% 238,298 92,007
13 Belarus 2.1% 207,600 80,200
14 K Kazakhstan 1.5% 148,000 57,000 [i]
15 T Greece 1.3% 131,957 50,949 [j]
16 Bulgaria 1.1% 110,372 42,615
17 Iceland 1.0% 103,000 40,000
18 Hungary 0.9% 93,025 35,917
19 T Portugal 0.9% 91,424 35,299 [k]
20 Austria 0.8% 83,878 32,385
21 Czechia 0.8% 78,871 30,452
22 Serbia 0.8% 77,589 29,957 [l]
23 Ireland 0.7% 69,825 26,960
24 Lithuania 0.7% 65,286 25,207
25 Latvia 0.6% 64,594 24,940
– Svalbard (Norway) 0.6% 62,045 23,956 [m]
26 Croatia 0.6% 56,594 21,851
27 Bosnia 0.5% 51,209 19,772
28 Slovakia 0.5% 49,035 18,933
29 Estonia 0.5% 45,399 17,529
30 T Denmark 0.4% 42,947 16,582 [n]
31 T Netherlands 0.4% 41,543 16,040 [o]
32 Switzerland 0.4% 41,291 15,943
33 Moldova 0.3% 33,847 13,068
34 Belgium 0.3% 30,528 11,787
35 Albania 0.3% 28,748 11,100
36 Macedonia 0.3% 25,713 9,928
37 K Turkey 0.2% 23,757 9,173 [p]
38 Slovenia 0.2% 20,273 7,827
39 Montenegro 0.1% 13,888 5,362
– Kosovo 0.1% 10,910 4,210 [q]
40 K Azerbaijan 0.07% 6,960 2,690 [r]
– Transnistria 4,163 1,607 [s]
41 K Georgia 0.03% 3,040 1,170 [t]
42 Luxembourg 0.03% 2,586 998
– Åland (Finland) 0.02% 1,583 611 [u]
– Faroe Islands (Denmark) 0.01% 1,393 538 [v]
– Isle of Man (UK) 0.006% 572 221
43 Andorra 0.005% 468 181
44 Malta 0.003% 315 122
45 Liechtenstein 0.002% 160 62
– Jersey (UK) 0.001% 116 45
– Guernsey (UK) 0.001% 78 30
46 San Marino 0.001% 61 24
– Gibraltar (UK) 0% 7 2.7 [w]
47 Monaco 0% 2 0.77 [x]
48 Vatican City 0% 0.49 0.19 [y]
– C Abkhazia 8,664.59 3,345.42 [z]
– C South Ossetia 3,885 1,500 [aa]
– C Akrotiri and Dhekelia (UK) 0% 254 98 [ab]
49 C Armenia 0% 29,743 11,484 [ac]
50 C Cyprus 0% 9,251 3,572
'''
table: pd.DataFrame = pd.read_csv(s, header=None, sep='\t', thousands=',',
names=['Rank', 'Country', 'Percent', 'Area_km2', 'Area_mi2', 'Refs'],
usecols=['Rank', 'Country', 'Percent', 'Area_km2'],
)
# Cleanup table
table['Caveat'] = None
table[['Rank', 'Caveat']] = table.Rank.str.split(n=1, expand=True)
table.Rank = table.Rank.replace(['-', '\u2012', '\u2013', '\u2014'], pd.NA).astype(pd.Int64Dtype())
table.Percent = table.Percent.str.removesuffix('%').astype(float)
table.Country = table.Country.str.strip()
# Create plot
def cmap(s: pd.Series):
if s.Area_km2>1e6:
c = (.5, 0, .5)
elif s.Area_km2>1e3:
c = (0, .625, 0)
else:
c = (1, 0, 0)
if s.Caveat in ('T', 'K'):
c = tuple((1-((1-x)*2/3)) for x in c)
elif s.Caveat == 'C':
c = tuple((1-((1-x)/4)) for x in c)
return c
fig, ax = plt.subplots(figsize=(12, 5))
t = table.query('(~Rank.isna() & ~Country.isna()) | Country=="Kosovo"').sort_values('Area_km2', ascending=False)
t['Color'] = t.apply(axis=1, func=cmap)
ax = sns.barplot(t, x='Country', y='Area_km2', palette=t['Color'].values, ax=ax, log=True)
ax.set_yticklabels(ax.get_yticklabels(), fontsize=12)
ax.set_xticklabels(ax.get_xticklabels(), rotation=60, ha='right', rotation_mode='anchor', fontsize=12)
ax.set_ylabel("Area (km\u00b2)", fontsize=20)
ax.set_xlabel("Country", fontsize=20)
# Save output
ax.figure.tight_layout() # prevent truncation
ax.figure.savefig('/tmp/Area_in_log_scale_of_European_countries.svg')