The Whole Program

Prev Next

Overview

Below you see the whole program described above.

Whole Program

// Part 1: Import

file download overwrite         ( "https://de.wikipedia.org/wiki/Liste_der_Staaten_der_Erde", countries 1.html );
table load                      ( c1, countries 1.html, HTML, "international gebr" );

file download overwrite         ( "https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)", countries 2.html );
table load                      ( c2, countries 2.html, HTML, "mid-year estimates" );

table initialize                ( country names resolved,
                                { { Country Name Pattern,              Country Name to use },
                                  { 'Gambia*',                         Gambia },
                                  { 'Congo*Democratic*,DR*Congo',      'Congo (DR)' },
                                  { 'Congo,Congo*Brazzaville*',        'Congo (Brazzaville)' },
                                  { 'Lao*',                            Laos },
                                  { 'Korea*Democratic*,North Korea',   'Korea (North)' },
                                  { 'Korea*Republic*,South Korea',     'Korea (South)' },
                                  { 'Micronesia*',                     Micronesia },
                                  { 'Brunei*',                         Brunei },
                                  { 'Türkiye',                         Turkey } } );

// Part 2: Clean

table clean                     ( c1 );
table delete selected rows      ( c1, [0]='',Erde );           // Remove blank and summary rows
table process all cells         ( c1, [.] = left( [.], '[' ) );        // Remove cited references
table delete rows               ( c1, -1 );                    // Delete last row (bottom header row - repeated)

table clean                     ( c2, trim spaces );
table delete selected rows      ( c2, [0]='',World );          // Remove blank and summary rows
table process all cells         ( c2, [.] = left( [.], '[' ) );        // Remove cited references


// Part 3: Validate

table correct headers           ( c1, { 'Einwohner*', 'Fläche*', Englischer Name },
                                     { Inhabitants,  Area,      Country,        } );
table keep columns              ( c1, { Country, Area, Inhabitants } );
table process cells in columns  ( c1, { Area, Inhabitants }, [.] = replace(str([.]) / '.',',','.') );
// Let's ignore messy data in CHINA and DENMARK (Free text on HK, Macao, Greenland, Faroer Is.)

table keep selected rows        ( c1, is numeric( [Area] ) );
table process                   ( c1, [Country] = left([Country],' oder ') ); // Remove country name in other language after ' oder'
table lookup smart ignore case  ( c1, Country, Country, country names resolved, Country Name Pattern, Country Name to use );


most recent population[]        = filter( [c2:..,0], 'Population*' ) { -1 }; // Take right most matching header name from row 0

table delete selected rows      ( c2, [Country]=World );
table correct headers           ( c2, { most recent population[], '*Continent*' }, { Population, Continent } );
table process cells in columns  ( c2, Population, [.] = str([.]) / ',' );
table keep columns              ( c2, { Country, Continent, Population } );
table delete selected rows      ( c2, [Country]='*(*)*' );             // Overseas departements with home countries in parentheses
table lookup smart ignore case  ( c2, Country, Country, country names resolved, Country Name Pattern, Country Name to use );


// Part 4: Merge

table merge extend columns      ( c2, c1, Country );
table delete selected rows      ( c1, [Continent]='' ); // Questionable countries

// Part 5: Augment

[c1:Country,{China,Denmark},Area] = {9597000,42952}; // km2  (Denmark without Greenland, and CN without China South Sea)

table insert columns            ( c1, { Inhabitants Variation, Inhabitants per km2 } );
table process selected rows     ( c1, [Inhabitants] == '', [Inhabitants] = [Population]);
table process                   ( c1, [Inhabitants per km2]   = [Inhabitants] / [Area];
                                      [Inhabitants Variation] = ([Inhabitants] - [Population])/[Inhabitants] );


// Part 6: Analyze

// Create a histogram distribution across number of inhabitants and density

intervals[inhabitants]          
  = {  1000000, 2000000, 5000000, 10000000, 20000000, 50000000, 100000000, 200000000, 500000000, 1000000000 };
intervals[density]              = { 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000 };
intervals[]                     = { intervals[inhabitants], intervals[density] }; // Combine them in 1 set

table histogram                 ( c1, histo, {Population,Inhabitants per km2}, intervals[] );
table process selected rows     ( histo, is numeric([Intervals]), [Intervals] = ">=" + str([Intervals], "#,##0", local ) );
table process selected columns  ( histo, 0, is numeric([.]), [.] = ">=" + str([.], "#,##0", local ) );

// Remove the 'Others' as not needed.

table delete columns           ( histo, Others );
table delete selected rows     ( histo, [Intervals]=Others );

// Provide consolidated info by continent

table copy table                ( c1, c2 );
table consolidate               ( c2, Continent, { Area, Population}, sum); // Consolidate by continent
table process                   ( c2, [Inhabitants per km2]   = [Inhabitants] / [Area];
                                      [Inhabitants Variation] = ([Inhabitants] - [Population])/[Inhabitants];
                                      [Country] = "All countries in " + [Continent] );
for all ( {c1, c2}, table[])   table delete columns( table[], Inhabitants);
table merge                     ( c2, c1 );


// Part 7: Format

table sort selected rows        ( c1, [Country]{0..3}!="All ", Country ); // Ensure ascending order in English

table style auto width          ( c1 );
table style columns             ( c1, Area, sheet, column width, 16 );
table style columns             ( c1, {Area, Population},    table, number format, "#,##0");
table style columns             ( c1, Inhabitants Variation, table, number format, "0.00%");
table style columns             ( c1, Inhabitants per km2,   table, number format, "#,##0.00");
table style table               ( c1, sheet, freeze rows, 1, autofilter, 0 );
table style rows                ( c1, 0, table, boldface, true );

table style table               ( histo, sheet, column width, 10 );
table style columns             ( histo, 0, sheet, column width, 20, boldface, true );
table style rows                ( histo, 0, table, boldface, true, row height, 30 );
table style cells               ( histo, 0, 0, single, wrap text, true );

table process cells             ( histo, if (col()>0)
                                  {
                                      shade[] = limit(255-10*[.],0,255);
                                      table style cells( histo, col(), row(), single, fill color, {shade[],255,shade[]});
                                  } );


// Part 8: Export

table save excel file            ( {c1, histo}, {Countries, Histogram}, Result.xlsx );
echo                             ( "The histogram");
table list                       ( histo );    

Output 01

    Saved table(s) to EXCEL file 'Result.xlsx' containing 2 sheets.

The histogram
    0 : Intervals       | Below | >=10 | >=20 | >=50 | >=100 | >=200 | >=500 | >=1'000 | >=2'000 | >=5'000 | >=10'000
    1 : Below           | 3     | 1    | 7    | 3    | 6     | 13    | 1     | 2       | 1       | 0       | 1       
    2 : >=1'000'000     | 0     | 0    | 3    | 4    | 2     | 1     | 1     | 1       | 0       | 0       | 0       
    3 : >=2'000'000     | 4     | 0    | 3    | 7    | 3     | 4     | 0     | 0       | 0       | 0       | 0       
    4 : >=5'000'000     | 3     | 7    | 4    | 4    | 7     | 3     | 2     | 0       | 0       | 1       | 0       
    5 : >=10'000'000    | 0     | 3    | 5    | 9    | 8     | 5     | 1     | 0       | 0       | 0       | 0       
    6 : >=20'000'000    | 3     | 4    | 6    | 11   | 6     | 3     | 1     | 0       | 0       | 0       | 0       
    7 : >=50'000'000    | 0     | 0    | 3    | 4    | 4     | 2     | 1     | 0       | 0       | 0       | 0       
    8 : >=100'000'000   | 1     | 0    | 1    | 1    | 2     | 3     | 0     | 1       | 0       | 0       | 0       
    9 : >=200'000'000   | 0     | 0    | 2    | 0    | 1     | 2     | 0     | 0       | 0       | 0       | 0       
   10 : >=500'000'000   | 0     | 0    | 0    | 0    | 0     | 0     | 0     | 0       | 0       | 0       | 0       
   11 : >=1'000'000'000 | 0     | 0    | 0    | 0    | 1     | 1     | 0     | 0       | 0       | 0       | 0       

Try it yourself: Open TAB_Features_The_Whole_Program.b4p in B4P_Examples.zip. Decompress before use.