#!/usr/bin/env php
<?php

/*
 * PHP Glossary
 *
 * Term Analysis tool for PHP.
 *
 * Copyright (c) 2020 Francesco Bianco <bianco@javanile.org>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

$defaultConfig = [
    'files' => [
        'exclude' => '',
        'include' => ''
    ],
    'domain' => [
        'terms' => ''
    ],
];

$languageKeywords = [
    'default' => [
        'php', 'echo', 'default', 'true', 'false', 'function', 'return', 'foreach', 'as', 'if', 'continue', 'elseif',
        'exit', 'break', 'case', 'switch', 'while', 'do', 'for', 'try', 'catch', 'finally', 'throw', 'empty', 'isset',
        'null', 'new', 'class', 'extends', 'implements', 'interface', 'use', 'namespace', 'require', 'require_once',
        'include', 'include_once', 'global', 'static', 'public', 'private', 'protected', 'final', 'abstract', 'const',
        'argv', 'this', 'self', 'parent', 'static',
    ]
];

$dumpFiles = in_array('--dump-files', $argv);
$dumpDomainTerms = in_array('--dump-domain-terms', $argv);
$configFile = getcwd() . '/.glossaryrc';
$config = file_exists($configFile)
        ? parse_ini_string(preg_replace('/\s+,\s+/m', ',', file_get_contents($configFile)), true)
        : $defaultConfig;

$config['files']['include'] = array_filter(explode(',', @$config['files']['include']));
$config['files']['exclude'] = array_map('realpath', array_filter(explode(',', 'vendor,'.@$config['files']['exclude'])));
$config['domain']['terms'] = array_map('sanitize_term', explode(',', @$config['domain']['terms']));

function sanitize_term($term)
{
    $snakeCase = strtolower(preg_replace('/([a-z])([A-Z])/', '$1_$2', $term));
    $words = str_replace('_', ' ', $snakeCase);

    return trim($words);
}

function search_php_files($dir, $exclude)
{
    $result = [];

    foreach (scandir($dir) as $filename) {
        if ($filename[0] === '.') continue;
        $filePath = $dir . '/' . $filename;
        $realPath = realpath($filePath);
        if (in_array($realPath, $exclude)) {
            continue;
        } elseif ($realPath && is_dir($filePath)) {
            $result = array_merge($result, search_php_files($filePath, $exclude));
        } elseif (pathinfo($filePath, PATHINFO_EXTENSION) === 'php') {
            $result[] = $filePath;
        }
    }

    return $result;
}

$domainTerms = [];
foreach ($config['domain']['terms'] as $term) {
    $domainTerms[$term] = [
        'term' => $term,
        'count' => 0,
        'files' => []
    ];
}

$currentLanguageKeywords = $languageKeywords['default'];
$phpFiles = array_merge(search_php_files('.', $config['files']['exclude']), $config['files']['include']);

if (empty($phpFiles)) {
    echo "Problem was found: No PHP files found.\n";
    exit(1);
}

sort($phpFiles);

if ($dumpFiles) {
    echo implode("\n", $phpFiles) . "\n";
    exit;
}

foreach ($phpFiles as $file) {
    if (empty($file) || !file_exists($file)) {
        continue;
    }
    $code = str_replace(['\\n', '\\s'], '', php_strip_whitespace($file));
    if (empty($code)) {
        continue;
    }
    preg_match_all('/\\w+/', $code, $tokens);
    foreach ($tokens[0] as $token) {
        $term = sanitize_term($token);
        if (strlen($term) < 2) {
            continue;
        }
        if (is_numeric($term)) {
            continue;
        }
        if (function_exists($token)) {
            continue;
        }
        if (defined($token)) {
            continue;
        }
        if (in_array($token, $currentLanguageKeywords)) {
            continue;
        }
        if ($dumpDomainTerms && empty($domainTerms[$term])) {
            $domainTerms[$term] = [
                'term' => $term,
                'count' => 0,
                'files' => []
            ];
        }
        if (isset($domainTerms[$term]['term']) && $domainTerms[$term]['term']) {
            $domainTerms[$term]['count']++;
            continue;
        }
        echo "Problem was found: Unexpected domain term '{$token}' on file '{$file}'.\n";
        exit(1);
    }
}

if ($dumpDomainTerms) {
    usort($domainTerms, function ($a, $b) {
        return strcmp(strtolower($a['term']), strtolower($b['term']));
    });
    echo "[domain]\n";
    $line = 'terms =';
    foreach ($domainTerms as $info) {
        if (empty($info['term'])) {
            continue;
        }
        echo "$line {$info['term']}\n";
        $line = '      ,';
    }
    exit(0);
}

$longTerm = [
    'term' => '',
    'length' => 0,
];

foreach ($domainTerms as $term => $info) {
    if ($term && empty($info['count'])) {
        echo "Problem was found: Domain term '{$term}' seems never used.\n";
        exit(1);
    }
    if (strlen($term) > $longTerm['length']) {
        $longTerm['term'] = $term;
        $longTerm['length'] = strlen($term);
    }
}

usort($domainTerms, function ($a, $b) {
    return $a['count'] < $b['count'];
});

$termsColumnSize = max(8, $longTerm['length']);
echo "| ".str_pad('Terms', $termsColumnSize, ' ')." | Count |\n";
echo "|-".str_repeat('-', $termsColumnSize)."-|:-----:|\n";
foreach ($domainTerms as $info) {
    $term = str_pad($info['term'], $termsColumnSize, ' ');
    $count = str_pad($info['count'], 5, ' ', STR_PAD_LEFT);
    echo "| {$term} | {$count} |\n";
}