B4P Reference

Overview

Below you see the whole program described above.

Whole Program



// Part 1: Import



file download overwrite         ( "https://de.wikipedia.org/wiki/Liste_der_Staaten_der_Erde", countries 1.html );

table load                      ( c1, countries 1.html, HTML, "international gebr" );



file download overwrite         ( "https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)", countries 2.html );

table load                      ( c2, countries 2.html, HTML, "mid-year estimates" );



table initialize                ( country names resolved,

                                { { Country Name Pattern,              Country Name to use },

                                  { 'Gambia*',                         Gambia },

                                  { 'Congo*Democratic*,DR*Congo',      'Congo (DR)' },

                                  { 'Congo,Congo*Brazzaville*',        'Congo (Brazzaville)' },

                                  { 'Lao*',                            Laos },

                                  { 'Korea*Democratic*,North Korea',   'Korea (North)' },

                                  { 'Korea*Republic*,South Korea',     'Korea (South)' },

                                  { 'Micronesia*',                     Micronesia },

                                  { 'Brunei*',                         Brunei },

                                  { 'Türkiye',                         Turkey } } );



// Part 2: Clean



table clean                     ( c1 );

table delete selected rows      ( c1, [0]='',Erde );           // Remove blank and summary rows

table process all cells         ( c1, [.] = left( [.], '[' ) );        // Remove cited references

table delete rows               ( c1, -1 );                    // Delete last row (bottom header row - repeated)



table clean                     ( c2, trim spaces );

table delete selected rows      ( c2, [0]='',World );          // Remove blank and summary rows

table process all cells         ( c2, [.] = left( [.], '[' ) );        // Remove cited references





// Part 3: Validate



table correct headers           ( c1, { 'Einwohner*', 'Fläche*', Englischer Name }, 

                                     { Inhabitants,  Area,      Country,        } );

table keep columns              ( c1, { Country, Area, Inhabitants } );

table process cells in columns  ( c1, { Area, Inhabitants }, [.] = replace(str([.]) / '.',',','.') );

// Let's ignore messy data in CHINA and DENMARK (Free text on HK, Macao, Greenland, Faroer Is.)



table keep selected rows        ( c1, is numeric( [Area] ) );

table process                   ( c1, [Country] = left([Country],' oder ') ); // Remove country name in other language after ' oder'

table lookup smart ignore case  ( c1, Country, Country, country names resolved, Country Name Pattern, Country Name to use );





most recent population[]        = filter( [c2:..,0], 'Population*' ) { -1 }; // Take right most matching header name from row 0



table delete selected rows      ( c2, [Country]=World );

table correct headers           ( c2, { most recent population[], '*Continent*' }, { Population, Continent } );

table process cells in columns  ( c2, Population, [.] = str([.]) / ',' );

table keep columns              ( c2, { Country, Continent, Population } );

table delete selected rows      ( c2, [Country]='*(*)*' );             // Overseas departements with home countries in parentheses

table lookup smart ignore case  ( c2, Country, Country, country names resolved, Country Name Pattern, Country Name to use );





// Part 4: Merge



table merge extend columns      ( c2, c1, Country );

table delete selected rows      ( c1, [Continent]='' ); // Questionable countries



// Part 5: Augment



[c1:Country,{China,Denmark},Area] = {9597000,42952}; // km2  (Denmark without Greenland, and CN without China South Sea)



table insert columns            ( c1, { Inhabitants Variation, Inhabitants per km2 } );

table process selected rows     ( c1, [Inhabitants] == '', [Inhabitants] = [Population]);

table process                   ( c1, [Inhabitants per km2]   = [Inhabitants] / [Area];

                                      [Inhabitants Variation] = ([Inhabitants] - [Population])/[Inhabitants] );





// Part 6: Analyze



// Create a histogram distribution across number of inhabitants and density



intervals[inhabitants]          

  = {  1000000, 2000000, 5000000, 10000000, 20000000, 50000000, 100000000, 200000000, 500000000, 1000000000 };

intervals[density]              = { 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000 };

intervals[]                     = { intervals[inhabitants], intervals[density] }; // Combine them in 1 set



table histogram                 ( c1, histo, {Population,Inhabitants per km2}, intervals[] );

table process selected rows     ( histo, is numeric([Intervals]), [Intervals] = ">=" + str([Intervals], "#,##0", local ) );

table process selected columns  ( histo, 0, is numeric([.]), [.] = ">=" + str([.], "#,##0", local ) );



// Remove the 'Others' as not needed.



table delete columns           ( histo, Others );

table delete selected rows     ( histo, [Intervals]=Others );



// Provide consolidated info by continent



table copy table                ( c1, c2 );

table consolidate               ( c2, Continent, { Area, Population}, sum); // Consolidate by continent

table process                   ( c2, [Inhabitants per km2]   = [Inhabitants] / [Area];

                                      [Inhabitants Variation] = ([Inhabitants] - [Population])/[Inhabitants];

                                      [Country] = "All countries in " + [Continent] );

for all ( {c1, c2}, table[])   table delete columns( table[], Inhabitants);

table merge                     ( c2, c1 );





// Part 7: Format



table sort selected rows        ( c1, [Country]{0..3}!="All ", Country ); // Ensure ascending order in English



table style auto width          ( c1 );

table style columns             ( c1, Area, sheet, column width, 16 );

table style columns             ( c1, {Area, Population},    table, number format, "#,##0");

table style columns             ( c1, Inhabitants Variation, table, number format, "0.00%");

table style columns             ( c1, Inhabitants per km2,   table, number format, "#,##0.00");

table style table               ( c1, sheet, freeze rows, 1, autofilter, 0 );

table style rows                ( c1, 0, table, boldface, true );



table style table               ( histo, sheet, column width, 10 );

table style columns             ( histo, 0, sheet, column width, 20, boldface, true );

table style rows                ( histo, 0, table, boldface, true, row height, 30 );

table style cells               ( histo, 0, 0, single, wrap text, true );



table process cells             ( histo, if (col()>0)

                                  {

                                      shade[] = limit(255-10*[.],0,255);

                                      table style cells( histo, col(), row(), single, fill color, {shade[],255,shade[]});

                                  } );





// Part 8: Export



table save excel file            ( {c1, histo}, {Countries, Histogram}, Result.xlsx );

echo                             ( "The histogram");

table list                       ( histo );

Output 01

    Saved table(s) to EXCEL file 'Result.xlsx' containing 2 sheets.

The histogram
    0 : Intervals       | Below | >=10 | >=20 | >=50 | >=100 | >=200 | >=500 | >=1'000 | >=2'000 | >=5'000 | >=10'000
    1 : Below           | 3     | 1    | 7    | 3    | 6     | 13    | 1     | 2       | 1       | 0       | 1       
    2 : >=1'000'000     | 0     | 0    | 3    | 4    | 2     | 1     | 1     | 1       | 0       | 0       | 0       
    3 : >=2'000'000     | 4     | 0    | 3    | 7    | 3     | 4     | 0     | 0       | 0       | 0       | 0       
    4 : >=5'000'000     | 3     | 7    | 4    | 4    | 7     | 3     | 2     | 0       | 0       | 1       | 0       
    5 : >=10'000'000    | 0     | 3    | 5    | 9    | 8     | 5     | 1     | 0       | 0       | 0       | 0       
    6 : >=20'000'000    | 3     | 4    | 6    | 11   | 6     | 3     | 1     | 0       | 0       | 0       | 0       
    7 : >=50'000'000    | 0     | 0    | 3    | 4    | 4     | 2     | 1     | 0       | 0       | 0       | 0       
    8 : >=100'000'000   | 1     | 0    | 1    | 1    | 2     | 3     | 0     | 1       | 0       | 0       | 0       
    9 : >=200'000'000   | 0     | 0    | 2    | 0    | 1     | 2     | 0     | 0       | 0       | 0       | 0       
   10 : >=500'000'000   | 0     | 0    | 0    | 0    | 0     | 0     | 0     | 0       | 0       | 0       | 0       
   11 : >=1'000'000'000 | 0     | 0    | 0    | 0    | 1     | 1     | 0     | 0       | 0       | 0       | 0

Try it yourself: Open TAB_Features_The_Whole_Program.b4p in B4P_Examples.zip. Decompress before use.

The Whole Program

Overview

Whole Program

Output 01