@misc{BrzozowskiETAL2023:geniegraphs,
    author = {Lukasz Brzozowski and Grzegorz Siudem and Marek Gagolewski},
    title = {Community detection in complex networks via node similarity, graph representation learning, and hierarchical clustering},
    year = {2023},
    publisher = {arXiv},
    doi = {10.48550/arXiv.2303.12212},
    preprint = {https://arxiv.org/pdf/2303.12212.pdf},
    note = {under review (preprint)}
}

@article{BertoliBarsottiETAL2024:inequality3dsi,
    author = {Lucio Bertoli-Barsotti and Marek Gagolewski and Grzegorz Siudem and Barbara Żogała-Siudem},
    title = {Equivalence of inequality indices in the three-dimensional model of informetric impact},
    year = {2024},
    doi = {10.1016/j.joi.2024.101566},
    journal = {Journal of Informetrics},
    preprint = {https://arxiv.org/pdf/2304.07479.pdf},
    note = {in press},
    abstract = {
        Inequality is an inherent part of our lives: we see it in the distribution of incomes, talents, citations, to name a few. However, its intensity varies across environments: there are systems where the available resources are relatively evenly distributed but also where a small group of items or agents controls the majority of assets. Numerous indices for quantifying the degree of inequality have been proposed but in general, they work quite differently.

        We recently observed (Siudem et al., 2020) that many rank-size distributions might be approximated by a time-dependent agent-based model involving a mixture of preferential (rich-get-richer) and accidental (sheer chance) attachment. In this paper, we point out its relationship to an iterative process that generates rank distributions of any length and a predefined level of inequality, as measured by the Gini index.

        We prove that, under our model, the Gini, Bonferroni, De Vergottini, and Hoover indices are equivalent for samples of similar sizes. Given one of them, we can recreate the value of another measure. Thanks to the obtained formulae, we can also understand how they depend on the sample size. An empirical analysis of a large database of citation records in economics (RePEc) yields a good match with our theoretical derivations.
    },
    keywords = {
        Gini index;
        Bonferroni index;
        Power law;
        Rich-get-richer;
        Inequality;
        Sensitivity
    }
}

@article{GagolewskiETAL2024:cvimst,
    author = {Marek Gagolewski and Anna Cena and Maciej Bartoszuk and Lukasz Brzozowski},
    title = {Clustering with minimum spanning trees: {H}ow good can it be?},
    journal = {Journal of Classification},
    year = {2024},
    preprint = {https://link.springer.com/content/pdf/10.1007/s00357-024-09483-1.pdf},
    doi = {10.1007/s00357-024-09483-1},
    note = {in press},
    abstract = {
        Minimum spanning trees (MSTs) provide a convenient representation of datasets in numerous pattern recognition activities. Moreover, they are relatively fast to compute. In this paper, we quantify the extent to which they are meaningful in low-dimensional partitional data clustering tasks. By identifying the upper bounds for the agreement between the best (oracle) algorithm and the expert labels from a large battery of benchmark data, we discover that MST methods can be very competitive. Next, we review, study, extend, and generalise a few existing, state-of-the-art MST-based partitioning schemes. This leads to some new noteworthy approaches. Overall, the Genie and the information-theoretic methods often outperform the non-MST algorithms such as K-means, Gaussian mixtures, spectral clustering, Birch, density-based, and classical hierarchical agglomerative procedures. Nevertheless, we identify that there is still some room for improvement, and thus the development of novel algorithms is encouraged.
    },
    keywords = {
        hierarchical partitional clustering;
        minimum spanning tree;
        MST;
        cluster validity measure;
        single linkage;
        Genie algorithm;
        mutual information
    }
}

@article{Gagolewski2024:nca,
    author = {Marek Gagolewski},
    title = {Normalised clustering accuracy: {A}n asymmetric external cluster validity measure},
    journal = {Journal of Classification},
    year = {2024},
    preprint = {https://link.springer.com/content/pdf/10.1007/s00357-024-09482-2.pdf},
    doi = {10.1007/s00357-024-09482-2},
    note = {in press},
    abstract = {
        There is no, nor will there ever be, single best clustering algorithm. Nevertheless, we would still like to be able to distinguish between methods that work well on certain task types and those that systematically underperform. Clustering algorithms are traditionally evaluated using either internal or external validity measures. Internal measures quantify different aspects of the obtained partitions, e.g., the average degree of cluster compactness or point separability. However, their validity is questionable because the clusterings they endorse can sometimes be meaningless. External measures, on the other hand, compare the algorithms' outputs to fixed ground truth groupings provided by experts. In this paper, we argue that the commonly used classical partition similarity scores, such as the normalised mutual information, Fowlkes--Mallows, or adjusted Rand index, miss some desirable properties. In particular, they do not identify worst-case scenarios correctly, nor are they easily interpretable. As a consequence, the evaluation of clustering algorithms on diverse benchmark datasets can be difficult. To remedy these issues, we propose and analyse a new measure: a version of the optimal set-matching accuracy, which is normalised, monotonic with respect to some similarity relation, scale-invariant, and corrected for the imbalancedness of cluster sizes (but neither symmetric nor adjusted for chance).
    },
    keywords = {
        clustering;
        external cluster validity;
        optimal set matching;
        normalisation;
        accuracy;
        adjusted Rand index;
        mutual information
    }
}

@article{BertoliBarsottiETAL2024:lorenz,
    author = {Lucio Bertoli-Barsotti and Marek Gagolewski and Grzegorz Siudem and Barbara Żogała-Siudem},
    title = {{G}ini-stable {L}orenz curves and their relation to the generalised {P}areto distribution},
    year = {2024},
    journal = {Journal of Informetrics},
    doi = {10.1016/j.joi.2024.101499},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2024-gini-stable-lorenz-curves.pdf},
    volume = {18},
    number = {2},
    pages = {101499},
    abstract = {
        We introduce an iterative discrete information production process where we can extend ordered normalised vectors by new elements based on a simple affine transformation, while preserving the predefined level of inequality, G, as measured by the Gini index.

        Then, we derive the family of empirical Lorenz curves of the corresponding vectors and prove that it is stochastically ordered with respect to both the sample size and G which plays the role of the uncertainty parameter. We prove that asymptotically, we obtain all, and only, Lorenz curves generated by a new, intuitive parametrisation of the finite-mean Pickands' Generalised Pareto Distribution (GPD) that unifies three other families, namely: the Pareto Type II, exponential, and scaled beta distributions. The family is not only totally ordered with respect to the parameter G, but also, thanks to our derivations, has a nice underlying interpretation. Our result may thus shed a new light on the genesis of this family of distributions.

        Our model fits bibliometric, informetric, socioeconomic, and environmental data reasonably well. It is quite user-friendly for it only depends on the sample size and its Gini index.
    },
    keywords = {
        Gini index; Lorenz ordering; inequality; Generalised Pareto Distributions; information production process; econometrics and scientometrics
    }
}

@article{WuETAL2024:randomfm,
    author = {Jian-Zhang Wu and Gleb Beliakov and Simon James and Marek Gagolewski},
    title = {Random generation of linearly constrained fuzzy measures and domain coverage performance evaluation},
    journal = {Information Sciences},
    year = {2024},
    volume = {659},
    pages = {120080},
    doi = {10.1016/j.ins.2023.120080},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2024-random-linearly-constrained-fuzzy-measures.pdf},
    abstract = {
        The random generation of fuzzy measures under complex linear constraints holds significance in various fields, including optimization solutions, machine learning, decision making, and property investigation. However, most existing random generation methods primarily focus on addressing the monotonicity and normalization conditions inherent in the construction of fuzzy measures, rather than the linear constraints that are crucial for representing special families of fuzzy measures and additional preference information. In this paper, we present two categories of methods to address the generation of linearly constrained fuzzy measures using linear programming models. These methods enable a comprehensive exploration and coverage of the entire feasible convex domain. The first category involves randomly selecting a subset and assigning measure values within the allowable range under given linear constraints. The second category utilizes convex combinations of constrained extreme fuzzy measures and vertex fuzzy measures. Then we employ some indices of fuzzy measures, objective functions, and distances to domain boundaries to evaluate the coverage performance of these methods across the entire feasible domain. We further provide enhancement techniques to improve the coverage ratios. Finally, we discuss and demonstrate potential applications of these generation methods in practical scenarios.
    },
    keywords = {Fuzzy measure; random generation; linear programming; domain coverage; convex combination}
}

@article{GagolewskiETAL2023:owalink,
    author = {Marek Gagolewski and Anna Cena and Simon James and Gleb Beliakov},
    title = {Hierarchical clustering with {OWA}-based linkages, the {L}ance--{W}illiams formula, and dendrogram inversions},
    year = {2023},
    journal = {Fuzzy Sets and Systems},
    preprint = {https://arxiv.org/pdf/2303.05683.pdf},
    doi = {10.1016/j.fss.2023.108740},
    volume = {473},
    pages = {108740},
    abstract = {Agglomerative hierarchical clustering based on Ordered Weighted Averaging (OWA) operators not only generalises the single, complete, and average linkages, but also includes intercluster distances based on a few nearest or farthest neighbours, trimmed and winsorised means of pairwise point similarities, amongst many others. We explore the relationships between the famous Lance-Williams update formula and the extended OWA-based linkages with weights generated via infinite coefficient sequences. Furthermore, we provide some conditions for the weight generators to guarantee the resulting dendrograms to be free from unaesthetic inversions.},
    keywords = {OWA operators; hierarchical clustering; dendrogram; inversion; the Lance-Williams formula}
}

@book{Gagolewski2023:deepr,
    author = {Marek Gagolewski},
    title = {Deep {R} Programming},
    address = {Melbourne},
    doi = {10.5281/zenodo.7490464},
    isbn = {978-0-6455719-2-9},
    edition = {v1.0.0},
    note = {🔓},
    url = {https://deepr.gagolewski.com/},
    year = {2023},
    pages = {456},
    abstract = {
        Deep R Programming is a comprehensive and in-depth introductory course
        on one of the most popular languages for data science. It equips
        ambitious students, professionals, and researchers with the knowledge
        and skills to become independent users of this potent environment
        so that they can tackle any problem related to data wrangling and
        analytics, numerical computing, statistics, and machine learning.
        This textbook is a non-profit project. Its online and PDF versions
        are freely available at <https://deepr.gagolewski.com/>.
    },
    keywords = {R; S; programming; data wrangling; data science; statistics;
        machine learning; data frames; matrices; vectors; tensors;
        data cleansing; text processing; graphics},
    preprint = {https://deepr.gagolewski.com/deepr.pdf}
}

@article{BoczekETAL2023:benchmarkint,
    author = {Michał Boczek and Marek Gagolewski and Marek Kaluszka and Andrzej Okolewski},
    title = {A benchmark-type generalization of the {S}ugeno integral with applications in bibliometrics},
    journal = {Fuzzy Sets and Systems},
    year = {2023},
    doi = {10.1016/j.fss.2023.01.014},
    volume = {466},
    pages = {108479},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2023benchmark-sugeno.pdf},
    keywords = {scientometric indices; h-index; Sugeno integral;
        subadditivity; Jensen's inequality; monotone measure},
    abstract = {
        We propose a new generalization of the classical Sugeno integral
        motivated by the Hirsch, Woeginger, and other geometrically-inspired
        indices of scientific impact. The new integral adapts to the rank-size
        curve better as it allows for putting more emphasis on highly-valued
        items and/or the tail of the distribution (level measure). We study
        its fundamental properties and give the conditions guaranteeing the
        fulfillment of subadditivity as well as the Jensen, Liapunov, Hardy,
        Markov, and Paley-Zygmund type inequalities. We discuss its
        applications in scientometrics.
    }
}

@misc{Gagolewski2022:clustering-data-v1.1.0,
    author = {Marek Gagolewski and others},
    title = {A benchmark suite for clustering algorithms: Version 1.1.0},
    year = {2022},
    url = {https://github.com/gagolews/clustering-data-v1/releases/tag/v1.1.0},
    doi = {10.5281/zenodo.7088171}
}

@article{Gagolewski2022:clustering-benchmarks,
    author = {Marek Gagolewski},
    title = {A framework for benchmarking clustering algorithms},
    journal = {SoftwareX},
    year = {2022},
    doi = {10.1016/j.softx.2022.101270},
    volume = {20},
    pages = {101270},
    url = {https://clustering-benchmarks.gagolewski.com/},
    abstract = {
        The evaluation of clustering algorithms can involve running them
        on a variety of benchmark problems, and comparing their outputs
        to the reference, ground-truth groupings provided by experts.
        Unfortunately, many research papers and graduate theses consider
        only a small number of datasets. Also, the fact that there can be many
        equally valid ways to cluster a given problem set is rarely taken into
        account. In order to overcome these limitations, we have developed
        a framework whose aim is to introduce a consistent methodology for
        testing clustering algorithms. Furthermore, we have aggregated,
        polished, and standardised many clustering benchmark dataset
        collections referred to across the machine learning and data mining
        literature, and included new datasets of different dimensionalities,
        sizes, and cluster types. An interactive datasets explorer, the
        documentation of the Python API, a description of the ways to
        interact with the framework from other programming languages such
        as R or MATLAB, and other details are all provided at
        <https://clustering-benchmarks.gagolewski.com>.
    },
    keywords = {
        clustering; machine learning; benchmark data; noise points;
        external cluster validity; partition similarity score
    },
    preprint = {https://arxiv.org/pdf/2209.09493.pdf}
}

@article{ZogalaETAL2023:interpretable-citation-models,
    author = {Barbara Żogała-Siudem and Anna Cena and Grzegorz Siudem
        and Marek Gagolewski},
    title = {Interpretable reparameterisations of citation models},
    journal = {Journal of Informetrics},
    year = {2023},
    doi = {10.1016/j.joi.2022.101355},
    pages = {101355},
    volume = {17},
    number = {1},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2023interpretable-citation-models.pdf},
    abstract = {
        This paper aims to find the reasons why some citation models can
        predict a set of specific bibliometric indices extremely well.
        We show why fitting a model that preserves the total sum of a vector
        can be beneficial in the case of heavy-tailed data that are frequently
        observed in informetrics and similar disciplines. Based on this
        observation, we introduce the reparameterised versions of the discrete
        generalised beta distribution (DGBD) and power law models that preserve
        the total sum of elements in a citation vector and, as a byproduct,
        they enjoy much better predictive power when predicting many
        bibliometric indices as well as partial cumulative sums. This also
        results in the underlying model parameters' being easier to fit
        numerically. Moreover, they are also more interpretable. Namely,
        just like in our recently-introduced 3DSI (three dimensions of
        scientific impact) model, we have a clear distinction between the
        coefficients determining the total productivity (size), total impact
        (sum), and those that affect the shape of the resulting theoretical
        curve.
    },
    keywords = {science of science; bibliometric indices; informetrics;
        citation models; interpretability
    },
}

@book{Gagolewski2021:lmlcr,
    author = {Marek Gagolewski},
    doi = {10.5281/zenodo.3679976},
    note = {draft:v0.2.3 🔓},
    year = {2022},
    address = {Melbourne},
    title = {Lightweight Machine Learning Classics with R},
    url = {https://lmlcr.gagolewski.com/},
    abstract = {
        Explore some of the most fundamental algorithms which have stood the
        test of time and provide the basis for innovative solutions in
        data-driven AI. Learn how to use the R language for implementing
        various stages of data processing and modelling activities.
        Appreciate mathematics as the universal language for formalising
        data-intense problems and communicating their solutions.
        The book is for you if you're yet to be fluent with university-level
        linear algebra, calculus and probability theory or you've forgotten
        all the maths you've ever learned, and are seeking a gentle,
        albeit thorough, introduction to the topic.
        This textbook is a non-profit project. Its online and PDF versions
        are freely available at <https://lmlcr.gagolewski.com/>.
    },
    keywords = {machine learning; classification; regression; clustering;
        recommender systems; optimisation; R},
    preprint = {https://lmlcr.gagolewski.com/lmlcr.pdf}
}

@article{BeliakovGagolewskiJames2022:antibuoyant,
    author = {Gleb Beliakov and Marek Gagolewski and Simon James},
    title = {Reduction of variables and constraints in fitting antibuoyant fuzzy
        measures to data using linear programming},
    journal = {Fuzzy Sets and Systems},
    year = {2022},
    volume = {451},
    pages = {266--284},
    doi = {10.1016/j.fss.2022.06.025},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2022antibuoyant.pdf},
    keywords = {fuzzy measures; Choquet integral; supermodularity;
        capacities; progressive transfers},
    abstract = {
        The discrete Choquet integral with respect to various types
        of fuzzy measures serves as an important aggregation function which
        accounts for mutual dependencies between the inputs. The Choquet
        integral can be used as an objective (or constraint) in optimisation
        problems, and the type of fuzzy measure used determines its complexity.
        This paper examines the class of antibuoyant fuzzy measures, which
        restrict the supermodular (convex) measures and satisfy the Pigou–Dalton
        progressive transfers principle. We determine subsets of extreme points
        of the set of antibuoyant fuzzy measures, whose convex combinations form
        a basis of three proposed algorithms for random generation of fuzzy
        measures from that class, and also for fitting fuzzy measures to
        empirical data or solving best approximation problems. Potential
        applications of the proposed methods are envisaged in social
        welfare, ecology, and optimisation.
    }
}

@article{GerasETAL2022:timetovote,
    author = {Agnieszka Geras and Grzegorz Siudem and Marek Gagolewski},
    journal = {Journal of the Association for Information Science and Technology},
    title = {Time to vote: {T}emporal clustering of user activity
        on {S}tack {O}verflow},
    year = {2022},
    volume = {73},
    number = {12},
    pages = {1681--1691},
    doi = {10.1002/asi.24658},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2022timetovote.pdf},
    abstract = {
        Question-and-answer (Q\&A) sites improve access to information
        and ease transfer of knowledge. In recent years, they have grown
        in popularity and importance, enabling research on behavioural
        patterns of their users. We study the dynamics related to the casting
        of 7M votes across a sample of 700k posts on Stack Overflow,
        a large community of professional software developers. We employ
        log-Gaussian mixture modelling and Markov chains to formulate
        a simple yet elegant description of the considered phenomena.
        We indicate that the inter-event times can naturally be clustered
        into 3 typical time scales: those which occur within hours, weeks,
        and months and show how the events become rarer and rarer as time
        passes. It turns out that the posts' popularity in a short period
        after publication is a weak predictor of its overall success,
        contrary to what was observed, e.g., in case of YouTube clips.
        Nonetheless, the sleeping beauties sometimes awake and can receive
        bursts of votes following each other relatively quickly.
    },
    keywords = {clustering; inter-event times; log-normal mixtures;
        Q\&A networks; Stack Overflow; burstiness}
}

@article{SiudemNowakGagolewski2022:pricepareto2,
    title = {Power laws, the {P}rice Model, and the {P}areto type-2 distribution},
    author = {Grzegorz Siudem and Przemysław Nowak and Marek Gagolewski},
    journal = {Physica A: Statistical Mechanics and its Applications},
    doi = {10.1016/j.physa.2022.128059},
    year = {2022},
    volume = {606},
    pages = {128059},
    abstract = {
        We consider a version of D. Price's model for the growth
        of a bibliographic network, where in each iteration, a constant number
        of citations is randomly allocated according to a weighted combination
        of the accidental (uniformly distributed) and the preferential
        (rich-get-richer) rule. Instead of relying on the typical master
        equation approach, we formulate and solve this problem in terms of
        the rank-size distribution. We show that, asymptotically, such a process
        leads to a Pareto-type 2 distribution with a new, appealingly
        interpretable parametrisation. We prove that the solution to the Price
        model expressed in terms of the rank-size distribution coincides with
        the expected values of order statistics in an independent Paretian
        sample. An empirical analysis of a large repository of academic papers
        yields a good fit not only in the tail of the distribution (as it is
        usually the case in the power law-like framework), but also across
        a significantly larger fraction of the data domain.
    },
    keywords = {Price model; Pareto distribution; power laws;
        rich get richer; complex networks; citations
    },
    preprint = {https://arxiv.org/pdf/2201.11456}
}

@book{Gagolewski2022:datawranglingpy,
    author = {Marek Gagolewski},
    title = {Minimalist Data Wrangling with Python},
    doi = {10.5281/zenodo.6451068},
    isbn = {978-0-6455719-1-2},
    address = {Melbourne},
    edition = {v1.0.3},
    note = {🔓},
    pages = {442},
    url = {https://datawranglingpy.gagolewski.com/},
    year = {2023},
    abstract = {
        Minimalist Data Wrangling with Python is envisaged as a student's
        first introduction to data science, providing a high-level overview
        as well as discussing key concepts in detail. We explore methods
        for cleaning data gathered from different sources, transforming,
        selecting, and extracting features, performing exploratory data
        analysis and dimensionality reduction, identifying naturally
        occurring data clusters, modelling patterns in data, comparing
        data between groups, and reporting the results.
        This textbook is a non-profit project. Its online and PDF versions
        are freely available at <https://datawranglingpy.gagolewski.com/>.
    },
    keywords = {data wrangling; data science; Python; numpy; scipy; pandas;
        matplotlib; regression; classification; clustering; scikit-learn;
        time series; text processing; data frames; matrices; vectors;
        data cleansing; missing values; outliers},
    preprint = {https://datawranglingpy.gagolewski.com/datawranglingpy.pdf}
}

@book{Gagolewski2022:aipp,
    author = {Marek Gagolewski},
    title = {Algorytmy i postawy programowania w języku C++
        (Introduction to Algorithms and Programming in {C++})},
    url = {https://github.com/gagolews/aipp},
    address = {Melbourne},
    doi = {10.5281/zenodo.6451054},
    isbn = {978-0-6455719-0-5},
    year = {2022},
    pages = {209},
    edition = {v1.2.0},
    note = {🇵🇱 🔓},
    abstract = {
        Skrypt do wykładu z Algorytmów i podstaw programowania
        w języku C++, prowadzonego w latach 2010–2016 na Wydziale Matematyki
        i Nauk Informacyjnych Politechniki Warszawskiej dla studentów I roku
        kierunku Matematyka. Zawiera wiele przykładowych zadań na ćwiczenia
        i laboratoria. Książka dystrybuowana jest bezpłatnie.
    },
    keywords = {algorytmy; programowanie; C++},
    preprint = {https://raw.githubusercontent.com/gagolews/aipp/master/aipp.pdf}
}

@article{GagolewskiETAL2022:ockham,
    author = {Marek Gagolewski and Barbara Żogała-Siudem
        and Grzegorz Siudem and Anna Cena},
    journal = {Scientometrics},
    title = {{O}ckham's index of citation impact},
    year = {2022},
    doi = {10.1007/s11192-022-04345-2},
    volume = {127},
    pages = {2829--2845},
    abstract = {
        We demonstrate that by using a triple of simple numerical
        summaries: an author's productivity, their overall impact, and a single
        other bibliometric index that aims to capture the shape
        of the citation distribution, we can reconstruct other popular metrics
        of bibliometric impact with a sufficient degree of precision.
        We thus conclude that the use of many indices may be unnecessary –
        entities should not be multiplied beyond
        necessity. Such a study was possible thanks to our new agent-based model
        (Siudem, Żogała-Siudem, Cena, Gagolewski; PNAS 117; 2020), which
        not only assumes that citations are distributed according to a mixture
        of the rich-get-richer rule and sheer chance, but also fits real
        bibliometric data quite well. We investigate which bibliometric
        indices have good discriminative power, which measures can be easily
        predicted as functions of other ones, and what implications to the
        research evaluation practice our findings have.},
    keywords = {3DSI model; h-index; g-index; w-index;
        equivalence of bibliometric indices},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2022ockham.pdf}
}

@article{MrowinskiETAL2022:journals,
    author = {Maciej J. Mrowiński and Marek Gagolewski and Grzegorz Siudem},
    journal = {Journal of Informetrics},
    title = {Accidentality in journal citation patterns},
    year = {2022},
    volume = {16},
    number = {4},
    pages = {101341},
    doi = {10.1016/j.joi.2022.101341},
    abstract = {
        We study an agent-based model for generating citation distributions
        in complex networks of scientific papers, where a fraction of citations
        is allotted according to the preferential attachment rule
        (rich get richer) and the remainder is allocated accidentally
        (purely at random, uniformly). Previously, we derived and analysed
        such a process in the context of describing individual authors,
        but now we apply it to scientific journals in computer and
        information sciences. Based on the large DBLP dataset as well as the
        CORE (Computing Research and Education Association of Australasia)
        journal ranking, we find that the impact of journals is correlated
        with the degree of accidentality of their citation distribution.
        Citations to impactful journals tend to be more preferential,
        while citations to lower-ranked journals are distributed in a more
        accidental manner. Further, applied fields of research such as
        artificial intelligence seem to be driven by a stronger preferential
        component – and hence have a higher degree of inequality – than the more
        theoretical ones, e.g., mathematics and computation theory.
    },
    keywords = {complex networks; DBLP; CORE; rich get richer; Pareto principle},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2022journals.pdf}
}

@article{CenaETAL2022:validcit,
    author = {Anna Cena and Marek Gagolewski
        and Grzegorz Siudem and Barbara Żogała-Siudem},
    journal = {Journal of Informetrics},
    title = {Validating citation models by proxy indices},
    year = {2022},
    doi = {10.1016/j.joi.2022.101267},
    volume = {16},
    number = {2},
    pages = {101267},
    abstract = {
        There are many approaches to the modelling of citation vectors
        of individual authors. Models may serve different purposes, but usually
        they are evaluated with regards to how well they align to citation
        distributions in large networks of papers. Here we compare a few
        leading models in terms of their ability to correctly reproduce
        the values of selected bibliometric indices of individual authors.
        Our recently-proposed three-dimensional model of scientific impact
        serves this purpose equally well as the discrete generalised beta
        distribution and the log-normal models, but has fewer parameters which
        additionally are all easy to interpret. We also indicate which indices
        can be predicted with high accuracy and which are more difficult to
        model.},
    keywords = {science of science; bibliometric indices; scientometrics;
        citation models; power law},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2022validcit.pdf}
}

@article{Gagolewski2022:stringi,
    author = {Marek Gagolewski},
    title = {{stringi}: {F}ast and portable character string processing in {R}},
    journal = {Journal of Statistical Software},
    year = {2022},
    doi = {10.18637/jss.v103.i02},
    volume = {103},
    number = {2},
    pages = {1--59},
    url = {https://stringi.gagolewski.com/},
    keywords = {stringi; character strings; text; ICU; Unicode;
        regular expressions; data cleansing; natural language processing; R},
    abstract = {
        Effective processing of character strings is required at various
        stages of data analysis pipelines: from data cleansing and preparation,
        through information extraction, to report generation. Pattern searching,
        string collation and sorting, normalisation, transliteration,
        and formatting are ubiquitous in text mining, natural language
        processing, and bioinformatics. This paper discusses and demonstrates
        how and why stringi, a mature R package for fast and
        portable handling of string data based on the ICU library
        (International Components for Unicode), should be included in each
        statistician's or data scientist's repertoire to complement their
        numerical computing and data wrangling skills.},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2022stringi.pdf}
}

@article{BeliakovGagolewskiJames2022:mobius,
    author = {Gleb Beliakov and Marek Gagolewski and Simon James},
    title = {Hierarchical data fusion processes involving
        the {M}öbius representation of capacities},
    journal = {Fuzzy Sets and Systems},
    year = {2022},
    doi = {10.1016/j.fss.2021.02.006},
    pages = {1--21},
    volume = {433},
    keywords = {non-additive measures; capacities; fuzzy measures;
        2-step Choquet integral; aggregation operators; high dimensional data},
    abstract = {
        The use of the Choquet integral in data fusion processes allows
        for the effective modelling of interactions and dependencies between
        data features or criteria. Its application requires identification
        of the defining capacity (also known as fuzzy measure) values.
        The main limiting factor is the complexity of the underlying parameter
        learning problem, which grows exponentially in the number of variables.
        However, in practice we may have expert knowledge regarding which of
        the subsets of criteria interact with each other, and which groups are
        independent. In this paper we study hierarchical aggregation processes,
        architecturally similar to feed-forward neural networks, but which
        allow for the simplification of the fitting problem both in terms
        of the number of variables and monotonicity constraints. We note that
        the Möbius representation lets us identify a number of relationships
        between the overall fuzzy measure and the data pipeline structure.
        Included in our findings are simplified fuzzy measures that generalise
        both k-intolerant and k-interactive capacities.},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2022mobius.pdf}
}

@article{GagolewskiBartoszukCena2021:cvi,
    author = {Marek Gagolewski and Maciej Bartoszuk and Anna Cena},
    journal = {Information Sciences},
    title = {Are cluster validity measures (in)valid?},
    year = {2021},
    pages = {620--636},
    volume = {581},
    url = {https://github.com/gagolews/optim_cvi},
    doi = {10.1016/j.ins.2021.10.004},
    abstract = {
        Internal cluster validity measures (such as the Caliński--Harabasz,
        Dunn, or Davies--Bouldin indices) are frequently used for selecting
        the appropriate number of partitions a dataset should be split into.
        In this paper we consider what happens if we treat
        such indices as objective functions in unsupervised learning activities.
        Is the optimal grouping with regards to, say, the Silhouette index
        really meaningful?
        It turns out that many cluster (in)validity indices
        promote clusterings that match expert knowledge quite poorly.
        We also introduce a new, well-performing variant of the Dunn index that
        is built upon OWA operators and the near-neighbour graph
        so that subspaces of higher density,
        regardless of their shapes, can be separated from each other better.
    },
    keywords = {clustering methodology;
        cluster validity index; Dunn index; nearest neighbours (NNs);
        ordered weighted averaging (OWA) operator; no free lunch
    },
    preprint = {https://arxiv.org/pdf/2208.01261}
}

@article{BartoszukGagolewski2021:tnormsimilar,
    author = {Maciej Bartoszuk and Marek Gagolewski},
    title = {T-norms or t-conorms? {H}ow to aggregate similarity
        degrees for plagiarism detection},
    journal = {Knowledge-Based Systems},
    year = {2021},
    volume = {231},
    pages = {107427},
    doi = {10.1016/j.knosys.2021.107427},
    keywords = {fuzzy logic connectives; similarity aggregation;
        decision making; data-driven optimisation; R language},
    abstract = {
        Making correct decisions as to whether code chunks should be
        considered similar becomes increasingly important in software design
        and education and not only can improve the quality of computer programs,
        but also help assure the integrity of student assessments. In this paper
        we test numerous source code similarity detection tools on pairs of code
        fragments written in the data science-oriented functional programming
        language R. Contrary to mainstream approaches, instead of considering
        symmetric measures of “how much code chunks A and B are similar to
        each other”, we propose and study the nonsymmetric degrees of inclusion
        “to what extent A is a subset of B” and “to what degree B is included
        in A”. Overall, t-norms yield better precision (how many suspicious
        pairs are actually similar), t-conorms maximise recall (how many
        similar pairs are successfully retrieved), and custom aggregation
        functions fitted to training data provide a good balance between
        the two. Also, we find that program dependence graph-based methods
        tend to outperform those relying on normalised source code text,
        tokens, and names of functions invoked.
    },
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2021tnormsimilar.pdf}
}

@article{LasekGagolewski2021:iteratings,
    title = {Interpretable sports team rating models based
        on the gradient descent algorithm},
    author = {Jan Lasek and Marek Gagolewski},
    journal = {International Journal of Forecasting},
    doi = {10.1016/j.ijforecast.2020.11.008},
    year = {2021},
    volume = {37},
    number = {3},
    pages = {1061--1071},
    abstract = {
        We introduce several new sport team rating models based upon
        the gradient descent algorithm. More precisely, the models can be
        formulated by maximising the likelihood of match results observed
        using a single step of this optimisation heuristic.
        The framework proposed, inspired by the prominent Elo rating system,
        yields an iterative version of the ordinal logistic regression
        as well as different variants of the Poisson regression-based models.
        This construction makes the update equations easy to interpret
        as well as adjusts ratings once new match results are observed.
        Thus, it naturally handles temporal changes in team strength. Moreover,
        a study of association football data indicates that the new models yield
        more accurate forecasts and are less computationally demanding than
        corresponding methods that jointly optimise likelihood for the whole
        set of matches
    },
    keywords = {rating systems; association football;
        match outcome forecasting; gradient descent; Poisson regression;
        ordinal logistic regression; Elo rating system},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2021iteratings.pdf}
}

@article{Gagolewski2021:genieclust,
    author = {Marek Gagolewski},
    title = {{genieclust}: {F}ast and robust hierarchical clustering},
    journal = {SoftwareX},
    year = {2021},
    doi = {10.1016/j.softx.2021.100722},
    volume = {15},
    pages = {100722},
    url = {https://genieclust.gagolewski.com/},
    keywords = {hierarchical clustering; robust methods;
        noise points; Python; R},
    abstract = {
        genieclust is an open source Python and R package that
        implements the hierarchical clustering algorithm called Genie.
        This method frequently outperforms other state-of-the-art approaches
        in terms of clustering quality and speed, supports various distances over
        dense, sparse, and string data domains, and can be robustified even
        further with the built-in noise point detector. As domain-independent
        software, it can be used for solving problems arising in all data-driven
        research and development activities, including environmental, health,
        biological, physical, decision, and social sciences as well as
        technology and engineering. The Python version provides a
        scikit-learn-compliant API, whereas the R variant is compatible with the
        classic hclust(). Numerous tutorials, use cases, non-trivial examples,
        documentation, installation instructions, benchmark results and timings
        can be found at https://genieclust.gagolewski.com/.
    },
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2021genieclust.pdf}
}

@article{PerezGagolewskiDeBaets2021:compositional,
    author = {Raúl Pérez-Fernández and Marek Gagolewski and De~Baets, Bernard},
    title = {On the aggregation of compositional data},
    journal = {Information Fusion},
    year = {2021},
    doi = {10.1016/j.inffus.2021.02.021},
    volume = {73},
    pages = {103--110},
    keywords = {aggregation; compositional data; beset; centroid},
    abstract = {
        Compositional data naturally appear in many fields of
        application. For instance, in chemistry, the relative contributions
        of different chemical substances to a product are typically described
        in terms of a compositional data vector.  Although the aggregation
        of compositional data frequently arises in practice, the functions
        formalizing this process do not fit the standard order-based
        aggregation framework. This is due to the fact that there is
        no intuitive order that carries the semantics of the set of
        compositional data vectors (referred to as the standard simplex).
        In this paper, we consider the more general betweenness-based
        aggregation framework that yields a natural definition of an
        aggregation function for compositional data.
        The weighted centroid is proved to fit within this definition and
        discussed to be linked to a very tangible interpretation. Other
        functions for the aggregation of compositional data are presented
        and their fit within the proposed definition is discussed.
    },
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2021compositional.pdf}
}

@article{BeliakovGagolewskiJames2020:dcsugeno,
    author = {Gleb Beliakov and Marek Gagolewski and Simon James},
    title = {{DC} optimization for constructing discrete {S}ugeno
        integrals and learning nonadditive measures},
    doi = {10.1080/02331934.2019.1705300},
    journal = {Optimization},
    volume = {69},
    number = {12},
    year = {2020},
    pages = {2515--2534},
    abstract = {
        Defined solely by means of order-theoretic operations meet
        (min) and join (max), weighted lattice polynomial functions
        are particularly useful for modelling data on an ordinal scale.
        A special case, the discrete Sugeno integral, defined with respect
        to a nonadditive measure (a capacity), enables accounting for the
        interdependencies between input variables. However, until recently
        the problem of identifying the fuzzy measure values with respect to
        various objectives and requirements has not received a great deal of
        attention. By expressing the learning problem as the difference of
        convex functions, we are able to apply DC (difference of convex)
        optimization methods. Here we formulate one of the global optimization
        steps as a local linear programming problem and investigate the
        improvement under different conditions.
    },
    keywords = {aggregation functions; nonadditive measures; Sugeno integral;
        capacities; DC optimization},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2020dcsugeno.pdf}
}

@article{BartoszukGagolewski2020:similar,
    author = {Maciej Bartoszuk and Marek Gagolewski},
    title = {{SimilaR}: {R} Code Clone and Plagiarism Detection},
    journal = {R Journal},
    doi = {10.32614/RJ-2020-017},
    url = {https://CRAN.R-project.org/package=SimilaR},
    year = {2020},
    pages = {367--385},
    volume = {12},
    number = {1},
    abstract = {
        Third-party software for assuring source code quality
        is becoming increasingly popular. Tools that evaluate the coverage
        of unit tests, perform static code analysis, or inspect run-time memory
        use are crucial in the software development life cycle. More
        sophisticated methods allow for performing meta-analyses of large
        software repositories, e.g., to discover abstract topics they relate
        to or common design patterns applied by their developers. They may be
        useful in gaining a better understanding of the component
        interdependencies, avoiding cloned code as well as detecting plagiarism
        in programming classes. A meaningful
        measure of similarity of computer programs often forms the basis of
        such tools. While there are a few noteworthy instruments for
        similarity assessment, none of them turns out particularly suitable
        for analysing R code chunks. Existing solutions rely on rather
        simple techniques and heuristics and fail to provide a user with
        the kind of sensitivity and specificity required for working with
        R scripts. In order to fill this gap, we propose a new algorithm
        based on a Program Dependence Graph, implemented in the SimilaR package.
        It can serve as a tool not only for improving R code quality but also
        for detecting plagiarism, even when it has been masked by applying some
        obfuscation techniques or imputing dead code. We demonstrate its
        accuracy and efficiency in a real-world case study.
    },
    keywords = {plagiarism detection; R; code clones},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2020similar.pdf}
}

@article{SiudemZogalaCenaGagolewski2020:pnas3d,
    author = {Grzegorz Siudem and Barbara Żogała-Siudem
        and Anna Cena and Marek Gagolewski},
    title = {Three dimensions of scientific impact},
    journal = {Proceedings of the National Academy of Sciences
        of the United States of America (PNAS)},
    doi = {10.1073/pnas.2001064117},
    issue = {25},
    pages = {13896--13900},
    volume = {117},
    year = {2020},
    abstract = {
        The growing popularity of bibliometric indexes
        (whose most famous example is the h index by J. E. Hirsch
        [J. E. Hirsch, Proc. Natl. Acad. Sci. U.S.A. 102, 16569--16572 (2005)])
        is opposed by those claiming that one's scientific impact cannot be reduced
        to a single number. Some even believe that our complex reality fails
        to submit to any quantitative description. We argue that neither of
        the two controversial extremes is true. By assuming that some citations
        are distributed according to the rich get richer rule (success breeds
        success, preferential attachment) while some others are assigned totally
        at random (all in all, a paper needs a bibliography), we have crafted
        a model that accurately summarizes citation records with merely
        three easily interpretable parameters: productivity, total impact,
        and how lucky an author has been so far.
    },
    keywords = {science of science; scientometrics; bibliometric indexes; rich get richer},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2020pnas3d.pdf}
}

@article{CoroianuETAL2019:owacomonotone,
    author = {Lucian Coroianu and Robert Fullér
        and Marek Gagolewski and Simon James},
    title = {Constrained ordered weighted averaging aggregation
        with multiple comonotone constraints},
    doi = {10.1016/j.fss.2019.09.006},
    journal = {Fuzzy Sets and Systems},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2019owacomonotone.pdf},
    pages = {21--39},
    volume = {395},
    year = {2020},
    abstract = {
        The constrained ordered weighted averaging (OWA) aggregation
        problem arises when we aim to maximize or minimize a convex combination
        of order statistics under linear inequality constraints that act on
        the variables with respect to their original sources. The standalone
        approach to optimizing the OWA under constraints is to consider all
        permutations of the inputs, which becomes quickly infeasible when
        there are more than a few variables, however in certain cases we can
        take advantage of the relationships amongst the constraints and
        the corresponding solution structures. For example, we can consider
        a land-use allocation satisfaction problem with an auxiliary aim of
        balancing land-types, whereby the response curves for each species
        are non-decreasing with respect to the land-types. This results in
        comonotone constraints, which allow us to drastically reduce
        the complexity of the problem.

        In this paper, we show that if we have an arbitrary number of
        constraints that are comonotone (i.e., they share the same ordering
        permutation of the coefficients), then the optimal solution occurs
        for decreasing components of the solution. After investigating the
        form of the solution in some special cases and providing theoretical
        results that shed light on the form of the solution, we detail
        practical approaches to solving and give real-world examples.
    },
    keywords = {multiple criteria evaluation; ordered weighted averaging;
        constrained OWA aggregation; ecology; work allocation}
}

@article{CenaGagolewski2020:genieowa,
    author = {Anna Cena and Marek Gagolewski},
    title = {{Genie+OWA}: {R}obustifying hierarchical clustering with {OWA}-based linkages},
    journal = {Information Sciences},
    doi = {10.1016/j.ins.2020.02.025},
    pages = {324--336},
    volume = {520},
    year = {2020},
    abstract = {
        We investigate the application of the Ordered Weighted
        Averaging (OWA) data fusion operator in agglomerative hierarchical
        clustering. The examined setting generalises the well-known single,
        complete and average linkage schemes. It allows to embody expert
        knowledge in the cluster merge process and to provide a much wider
        range of possible linkages. We analyse various families of weighting
        functions on numerous benchmark data sets in order to assess their
        influence on the resulting cluster structure. Moreover, we inspect
        the correction for the inequality of cluster size distribution --
        similar to the one in the Genie algorithm. Our results demonstrate
        that by robustifying the procedure with the Genie correction,
        we can obtain a significant performance boost in terms of clustering
        quality. This is particularly beneficial in the case of the linkages
        based on the closest distances between clusters, including the single
        linkage and its "smoothed" counterparts. To explain this behaviour,
        we propose a new linkage process called three-stage OWA which yields
        further improvements. This way we confirm the intuition that
        hierarchical cluster analysis should rather take into account
        a few nearest neighbours of each point, instead of trying to adapt
        to their non-local neighbourhood.
    },
    keywords = {hierarchical clustering; OWA; data fusion; aggregation; Genie},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2020genieowa.pdf}
}

@article{GagolewskiPerezDeBaets2020:inherent,
    author = {Marek Gagolewski and Raúl Pérez-Fernández and De~Baets, Bernard},
    title = {An inherent difficulty in the aggregation of multidimensional data},
    journal = {IEEE Transactions on Fuzzy Systems},
    doi = {10.1109/TFUZZ.2019.2908135},
    issue = {3},
    pages = {602--606},
    volume = {28},
    year = {2020},
    abstract = {In the field of information fusion, the problem of data
        aggregation has been formalized as an order-preserving process that
        builds upon the property of monotonicity. However, fields such as
        computational statistics, data analysis and geometry, usually emphasize
        the role of equivariances to various geometrical transformations in
        aggregation processes. Admittedly, if we consider a unidimensional
        data fusion task, both requirements are often compatible with each
        other. Nevertheless, in this paper we show that, in the
        multidimensional setting, the only idempotent functions that are
        monotone and orthogonal equivariant are the over-simplistic weighted
        centroids. Even more, this result still holds after replacing
        monotonicity and orthogonal equivariance by the weaker property of
        orthomonotonicity. This implies that the aforementioned approaches
        to the aggregation of multidimensional data are irreconcilable,
        and that, if a weighted centroid is to be avoided, we must choose
        between monotonicity and a desirable behaviour with regard to
        orthogonal transformations.
    },
    keywords = {multidimensional data aggregation; monotonicity; orthogonal equivariance; centroid}
}

@article{BeliakovGagolewskiJames2019:SugenoRobustGeneral,
    author = {Gleb Beliakov and Marek Gagolewski and Simon James},
    title = {Robust fitting for the {S}ugeno integral
        with respect to general fuzzy measures},
    journal = {Information Sciences},
    doi = {10.1016/j.ins.2019.11.024},
    pages = {449--461},
    volume = {514},
    year = {2020},
    keywords = {Sugeno integral; fuzzy measure; parameter learning;
        aggregation functions},
    abstract = {
        The Sugeno integral is an expressive aggregation function with
        potential applications across a range of decision contexts.
        Its calculation requires only the lattice minimum and maximum
        operations, making it particularly suited to ordinal data and robust
        to scale transformations. However, for practical use in data analysis
        and prediction, we require efficient methods for learning the associated
        fuzzy measure. While such methods are well developed for the Choquet
        integral, the fitting problem is more difficult for the Sugeno integral
        because it is not amenable to being expressed as a linear combination
        of weights, and more generally due to plateaus and non-differentiability
        in the objective function. Previous research has hence focused on
        heuristic approaches or simplified fuzzy measures. Here we show
        that the problem of fitting the Sugeno integral to data such that
        the maximum absolute error is minimized can be solved using an
        efficient bilevel program. This method can be incorporated into
        algorithms that learn fuzzy measures with the aim of minimizing
        the median residual. This equips us with tools that make the Sugeno
        integral a feasible option in robust data regression and analysis.
        We provide experimental comparison with a genetic algorithms approach
        and an example in data analysis.
    }
}

@article{GerasETAL2020:dislike,
    author = {Agnieszka Geras and Grzegorz Siudem and Marek Gagolewski},
    doi = {10.1002/ASI.24231},
    journal = {Journal of the Association for Information Science and Technology},
    number = {2},
    pages = {221--229},
    title = {Should we introduce a dislike button for academic papers?},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2019dislike.pdf},
    volume = {71},
    year = {2020},
    abstract = {
        On the grounds of the revealed, mutual resemblance between the behaviour of users of the Stack Exchange and the dynamics of the citations accumulation process in the scientific community, we tackled an outwardly intractable problem of assessing the impact of introducing  negative citations.

        Although the most frequent reason to cite a paper is to highlight the connection between the two publications, researchers sometimes mention an earlier work to cast a negative light. While computing  citation-based scores, for instance the h-index, information about the reason why a paper was mentioned is neglected. Therefore it can be questioned whether these indices describe scientific achievements accurately. In this contribution we shed insight into the problem of negative citations, analysing data from Stack Exchange and, to draw more universal conclusions, we derive an approximation of citations scores. Here we show that the quantified influence of introducing negative citations is  of lesser importance and that they could be used as an indicator of where attention of scientific community is allocated.
    },
    keywords = {citation analysis; the Hirsch index; negative citations; research evaluation; science of science}
}

@article{PerezDeBaetsGagolewski2019:taxonomy,
    author = {Raúl Pérez-Fernández and De~Baets, Bernard and Marek Gagolewski},
    doi = {10.1016/j.inffus.2019.05.006},
    journal = {Information Fusion},
    pages = {322--334},
    title = {A taxonomy of monotonicity properties for the
        aggregation of multidimensional data},
    volume = {52},
    year = {2019},
    abstract = {
        The property of monotonicity, which requires a function to preserve
        a given order, has been considered the standard in the aggregation
        of real numbers for decades. In this paper, we argue that, for the
        case of multidimensional data, an order-based definition of monotonicity
        is far too restrictive. We propose several meaningful alternatives to
        this property not involving the preservation of a given order by
        returning to its early origins stemming from the field of calculus.
        Numerous aggregation methods for multidimensional data commonly
        used by practitioners are studied within our new framework.
    },
    keywords = {
        Monotonicity; Aggregation; Multidimensional data;
        Centroid; Spatial median
    },
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2019taxonomymonotonicity.pdf}
}

@article{GagolewskiJamesBeliakov2019:fitsugenol1,
    author = {Marek Gagolewski and Simon James and Gleb Beliakov},
    doi = {10.1109/TFUZZ.2019.2895565},
    journal = {IEEE Transactions on Fuzzy Systems},
    number = {4},
    pages = {810--815},
    title = {Supervised learning to aggregate data with the {S}ugeno integral},
    volume = {27},
    year = {2019},
    abstract = {
        The problem of learning symmetric capacities (or fuzzy measures)
        from data is investigated toward applications in data analysis and
        prediction as well as decision making. Theoretical results regarding
        the solution minimizing the mean absolute error are exploited to develop
        an exact branch-refine-and-bound-type algorithm for fitting Sugeno
        integrals (weighted lattice polynomial functions, max-min operators)
        with respect to symmetric capacities. The proposed method turns out
        to be particularly suitable for acting on ordinal data. In addition
        to providing a model that can be used for the general data regression
        task, the results can be used, among others, to calibrate generalized
        h-indices to bibliometric data.
    },
    keywords = {Fuzzy measures; h-index; lattice polynomials;
        ordinal data fitting; Sugeno integral; weight learning},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2019fitsugenol1.pdf}
}

@article{CoroianuGagolewskiGrzegorzewski2019:piecewise,
    author = {Lucian Coroianu and Marek Gagolewski and Przemysław Grzegorzewski},
    doi = {10.1007/s00500-019-03800-2},
    journal = {Soft Computing},
    number = {19},
    pages = {9491--9505},
    url = {https://CRAN.R-project.org/package=FuzzyNumbers},
    title = {Piecewise linear approximation of fuzzy numbers:
        {A}lgorithms, arithmetic operations and stability of characteristics},
    volume = {23},
    year = {2019}
}

@article{BeliakovGagolewskiJames2019:SugenoBiomed,
    author = {Gleb Beliakov and Marek Gagolewski and Simon James},
    doi = {10.1016/j.ins.2019.06.023},
    journal = {Information Sciences},
    pages = {377--387},
    title = {Aggregation on ordinal scales with the {S}ugeno integral
        for biomedical applications},
    volume = {501},
    year = {2019}
}

@incollection{CoroianuGagolewski2019:penaltyvector,
    author = {Lucian Coroianu and Marek Gagolewski},
    booktitle = {New Trends in Aggregation Theory},
    doi = {10.1007/978-3-030-19494-9_15},
    editor = {Radomír Halaš and others},
    pages = {160--171},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {Penalty-based data aggregation in real normed vector spaces},
    volume = {981},
    year = {2019}
}

@article{LasekGagolewski2018:leagues,
    author = {Jan Lasek and Marek Gagolewski},
    doi = {10.1177/1471082X18798426},
    journal = {Statistical Modelling},
    number = {5--6},
    pages = {411--435},
    title = {The efficacy of league formats in ranking teams},
    volume = {18},
    year = {2018},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2018leagues.pdf}
}

@article{BeliakovETAL2019:traffic,
    author = {Gleb Beliakov and Marek Gagolewski and Simon James
        and Shannon Pace and Nicola Pastorello and Elodie Thilliez and Rajesh Vasa},
    doi = {10.1016/j.asoc.2017.07.014},
    journal = {Applied Soft Computing},
    pages = {910--919},
    title = {Measuring traffic congestion: {A}n approach based on learning
        weighted inequality, spread and aggregation indices from comparison data},
    volume = {67},
    year = {2019},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2018traffic.pdf}
}

@incollection{BeliakovETAL2018:lmslts,
    author = {Gleb Beliakov and Marek Gagolewski and Simon James},
    booktitle = {Information Processing and Management of Uncertainty
        in Knowledge-Based Systems. Theory and Foundations},
    doi = {10.1007/978-3-319-91476-3_31},
    editor = {Jesus Medina and others},
    pages = {367--378},
    publisher = {Springer},
    title = {Least median of squares ({LMS}) and least trimmed squares ({LTS})
        fitting for the weighted arithmetic mean},
    year = {2018}
}

@incollection{BartoszukGagolewski2017:binagopplagiarism,
    author = {Maciej Bartoszuk and Marek Gagolewski},
    booktitle = {Proc. FUZZ-IEEE'17},
    doi = {10.1109/FUZZ-IEEE.2017.8015582},
    note = {no.~8015582},
    publisher = {IEEE},
    title = {Binary aggregation functions in software plagiarism detection},
    year = {2017}
}

@incollection{CenaGagolewski2017:owagenie,
    author = {Anna Cena and Marek Gagolewski},
    booktitle = {Proc. FUZZ-IEEE'17},
    doi = {10.1109/FUZZ-IEEE.2017.8015652},
    note = {no.~8015652},
    publisher = {IEEE},
    title = {{OWA}-based linkage and the {G}enie correction for hierarchical clustering},
    year = {2017},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2017genieowatest.pdf}
}

@incollection{GagolewskiJames2017:eusflat,
    author = {Marek Gagolewski and Simon James},
    booktitle = {Advances in Fuzzy Logic and Technology 2017},
    doi = {10.1007/978-3-319-66824-6_10},
    editor = {Janusz Kacprzyk and others},
    pages = {104--116},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {Fitting symmetric fuzzy measures for discrete {S}ugeno integration},
    volume = {642},
    year = {2018}
}

@article{Gagolewski2017:pbamultidim,
    author = {Marek Gagolewski},
    doi = {10.1016/j.fss.2016.12.009},
    journal = {Fuzzy Sets and Systems},
    pages = {4--20},
    title = {Penalty-based aggregation of multidimensional data},
    volume = {325},
    year = {2017},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2017penaltymultidim.pdf}
}

@article{MesiarGagolewski2016:defects,
    author = {Radko Mesiar and Marek Gagolewski},
    doi = {10.1109/TFUZZ.2016.2516579},
    journal = {IEEE Transactions on Fuzzy Systems},
    number = {6},
    pages = {1668--1672},
    title = {{H}-index and other {S}ugeno integrals: {S}ome defects and their compensation},
    volume = {24},
    year = {2016},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2016hdeffects.pdf}
}

@article{BeliakovETAL2016:penaltyinequality,
    author = {Gleb Beliakov and Marek Gagolewski and Simon James},
    doi = {10.1142/S0218488516400018},
    journal = {International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems},
    pages = {1--23},
    title = {Penalty-based and other representations of economic inequality},
    volume = {24(Suppl.1)},
    year = {2016},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2016penaltyeconomic.pdf}
}

@incollection{GagolewskiETAL2016:genie2,
    author = {Marek Gagolewski and Anna Cena and Maciej Bartoszuk},
    booktitle = {Modeling Decisions for Artificial Intelligence},
    doi = {10.1007/978-3-319-45656-0_16},
    editor = {Vicenç Torra and others},
    pages = {191--202},
    publisher = {Springer},
    series = {Lecture Notes in Artificial Intelligence},
    title = {Hierarchical clustering via penalty-based aggregation and the {G}enie approach},
    volume = {9880},
    year = {2016},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2016genie2.pdf}
}

@article{GagolewskiETAL2016:genie,
    author = {Marek Gagolewski and Maciej Bartoszuk and Anna Cena},
    doi = {10.1016/j.ins.2016.05.003},
    journal = {Information Sciences},
    pages = {8--23},
    url = {https://genieclust.gagolewski.com/},
    title = {Genie: {A} new, fast, and outlier-resistant hierarchical clustering algorithm},
    volume = {363},
    year = {2016},
    preprint = {https://arxiv.org/pdf/2209.05757}
}

@incollection{CenaGagolewski2016:generickmeans,
    author = {Anna Cena and Marek Gagolewski},
    booktitle = {Information Processing and Management of Uncertainty in Knowledge-Based Systems, Part II},
    doi = {10.1007/978-3-319-40581-0_36},
    editor = {J.P. Carvalho and others},
    pages = {445--456},
    publisher = {Springer},
    series = {Communications in Computer and Information Science},
    title = {Fuzzy k-minpen clustering and k-nearest-minpen classification procedures incorporating generic distance-based penalty minimizers},
    volume = {611},
    year = {2016}
}

@incollection{BartoszukETAL2016:fitagop1,
    author = {Maciej Bartoszuk and Gleb Beliakov and Marek Gagolewski and Simon James},
    booktitle = {Information Processing and Management of Uncertainty in Knowledge-Based Systems, Part II},
    doi = {10.1007/978-3-319-40581-0_62},
    editor = {J.P. Carvalho and others},
    pages = {767--779},
    publisher = {Springer},
    series = {Communications in Computer and Information Science},
    title = {Fitting aggregation functions to data: {Part} {I} -- {L}inearization and regularization},
    volume = {611},
    year = {2016},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2016fitagop1.pdf}
}

@incollection{BartoszukETAL2016:fitagop2,
    author = {Maciej Bartoszuk and Gleb Beliakov and Marek Gagolewski and Simon James},
    booktitle = {Information Processing and Management of Uncertainty in Knowledge-Based Systems, Part II},
    doi = {10.1007/978-3-319-40581-0_63},
    editor = {J.P. Carvalho and others},
    pages = {780--789},
    publisher = {Springer},
    series = {Communications in Computer and Information Science},
    title = {Fitting aggregation functions to data: {Part} {II} -- {I}dempotization},
    volume = {611},
    year = {2016},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2016fitagop2.pdf}
}

@article{LasekETAL2016:fifa,
    author = {Jan Lasek and Zoltan Szlavik and Marek Gagolewski and Sandjai Bhulai},
    doi = {10.1080/02664763.2015.1100593},
    journal = {Journal of Applied Statistics},
    number = {7},
    pages = {1349--1368},
    title = {How to improve a team's position in the {FIFA} ranking -- {A} simulation study},
    volume = {43},
    year = {2016},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2016improvefifa.pdf}
}

@article{ZogalaETAL2016:agenth,
    author = {Barbara Żogała-Siudem and Grzegorz Siudem and Anna Cena and Marek Gagolewski},
    doi = {10.1140/epjb/e2015-60757-1},
    journal = {European Physical Journal B},
    number = {21},
    title = {Agent-based model for the bibliometric h-index -- {E}xact solution},
    volume = {89},
    year = {2016},
    preprint = {https://arxiv.org/pdf/1509.05798}
}

@incollection{BartoszukGagolewski2014:fuzzyrsimilar,
    author = {Maciej Bartoszuk and Marek Gagolewski},
    booktitle = {Information Processing and Management of Uncertainty in Knowledge-Based Systems, Part III},
    doi = {10.1007/978-3-319-08852-5_3},
    editor = {A. Laurent and others},
    pages = {21--30},
    publisher = {Springer},
    series = {Communications in Computer and Information Science},
    title = {A fuzzy {R} code similarity detection algorithm},
    volume = {444},
    year = {2014}
}

@incollection{BartoszukGagolewski2015:similar2,
    author = {Maciej Bartoszuk and Marek Gagolewski},
    booktitle = {Proc. IFSA/EUSFLAT'15},
    doi = {10.2991/ifsa-eusflat-15.2015.61},
    editor = {J.M. Alonso and H. Bustince and M. Reformat},
    pages = {419--426},
    publisher = {Atlantis Press},
    title = {Detecting similarity of {R} functions via a fusion of multiple heuristic methods},
    year = {2015}
}

@article{CenaETAL2015:prodclust,
    author = {Anna Cena and Marek Gagolewski and Radko Mesiar},
    doi = {10.1016/j.joi.2015.02.005},
    journal = {Journal of Informetrics},
    number = {2},
    pagesg = {273--284},
    title = {Problems and challenges of information resources producers' clustering},
    volume = {9},
    year = {2015}
}

@incollection{CenaGagolewski2013:om31,
    author = {Anna Cena and Marek Gagolewski},
    booktitle = {Aggregation Functions in Theory and in Practise},
    doi = {10.1007/978-3-642-39165-1_13},
    editor = {Humberto Bustince and others},
    pages = {93--103},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {{OM3}: {O}rdered maxitive, minitive, and modular aggregation operators -- {P}art {I}: {A}xiomatic analysis under arity-dependence},
    volume = {228},
    year = {2013},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2013om31.pdf}
}

@incollection{CenaGagolewski2013:om32,
    author = {Anna Cena and Marek Gagolewski},
    booktitle = {Aggregation Functions in Theory and in Practise},
    doi = {10.1007/978-3-642-39165-1_14},
    editor = {Humberto Bustince and others},
    pages = {105--115},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {{OM3}: {O}rdered maxitive, minitive, and modular aggregation operators -- {P}art {II}: {A}~simulation study},
    volume = {228},
    year = {2013},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2013om32.pdf}
}

@incollection{CenaGagolewski2015:kmeansinformetric,
    author = {Anna Cena and Marek Gagolewski},
    booktitle = {Proc. IFSA/EUSFLAT'15},
    doi = {10.2991/ifsa-eusflat-15.2015.77},
    editor = {J.M. Alonso and H. Bustince and M. Reformat},
    pages = {536--543},
    publisher = {Atlantis Press},
    title = {A {K}-means-like algorithm for informetric data clustering},
    year = {2015}
}

@article{CenaGagolewski2015:om3fss,
    author = {Anna Cena and Marek Gagolewski},
    doi = {10.1016/j.fss.2014.04.001},
    journal = {Fuzzy Sets and Systems},
    pages = {138--159},
    title = {{OM3}: {O}rdered maxitive, minitive, and modular aggregation operators -- {A}xiomatic and probabilistic properties in an arity-monotonic setting},
    volume = {264},
    year = {2015},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2015om3fss.pdf}
}

@article{CoroianuETAL2013:piecewise1,
    author = {Lucian Coroianu and Marek Gagolewski and Przemysław Grzegorzewski},
    doi = {10.1016/j.fss.2013.02.005},
    journal = {Fuzzy Sets and Systems},
    pages = {26--51},
    title = {Nearest piecewise linear approximation of fuzzy numbers},
    url = {https://CRAN.R-project.org/package=FuzzyNumbers},
    volume = {233},
    year = {2013},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2013plfnknot1.pdf}
}

@incollection{CoroianuETAL2014:piecewise1suppcore,
    author = {Lucian Coroianu and Marek Gagolewski and Przemysław Grzegorzewski and Adabitabar Firozja, M. and Tahereh Houlari},
    booktitle = {Information Processing and Management of Uncertainty in Knowledge-Based Systems, Part II},
    doi = {10.1007/978-3-319-08855-6_25},
    editor = {A. Laurent and others},
    pages = {244--254},
    publisher = {Springer},
    series = {Communications in Computer and Information Science},
    title = {Piecewise linear approximation of fuzzy numbers preserving the support and core},
    volume = {443},
    year = {2014}
}

@article{Gagolewski2011:CITAN,
    author = {Marek Gagolewski},
    doi = {10.1016/j.joi.2011.06.006},
    journal = {Journal of Informetrics},
    number = {4},
    pages = {678--692},
    url = {https://CRAN.R-project.org/package=CITAN},
    title = {Bibliometric impact assessment with {R} and the {CITAN} package},
    volume = {5},
    year = {2011},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2011citan.pdf}
}

@incollection{Gagolewski2012:effortdom,
    author = {Marek Gagolewski},
    booktitle = {Advances in Computational Intelligence, Part III},
    doi = {10.1007/978-3-642-31718-7_29},
    editor = {Salvatore Greco and others},
    pages = {276--285},
    publisher = {Springer},
    series = {Communications in Computer and Information Science},
    title = {On the relation between effort-dominating and symmetric minitive aggregation operators},
    volume = {299},
    year = {2012},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2012effortdom.pdf}
}

@incollection{Gagolewski2012:smps,
    author = {Marek Gagolewski},
    booktitle = {Synergies of Soft Computing and Statistics for Intelligent Data Analysis},
    doi = {10.1007/978-3-642-33042-1_39},
    editor = {Rudolf Kruse and others},
    pages = {359--367},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {Statistical hypothesis test for the difference between {H}irsch
        indices of two {P}areto-distributed random samples},
    volume = {190},
    year = {2013},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2013htest.pdf}
}

@article{Gagolewski2013:fair,
    author = {Marek Gagolewski},
    doi = {10.1016/j.joi.2013.07.001},
    journal = {Journal of Informetrics},
    number = {4},
    pages = {792--802},
    title = {Scientific impact assessment cannot be fair},
    volume = {7},
    year = {2013},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2013fair.pdf}
}

@article{Gagolewski2013:om3,
    author = {Marek Gagolewski},
    doi = {10.1016/j.ins.2012.09.005},
    journal = {Information Sciences},
    pages = {170--180},
    title = {On the relationship between symmetric maxitive, minitive, and modular aggregation operators},
    volume = {221},
    year = {2013},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2013rel3.pdf}
}

@incollection{Gagolewski2015:hconfint,
    author = {Marek Gagolewski},
    booktitle = {Strengthening Links Between Data Analysis and Soft Computing},
    doi = {10.1007/978-3-319-10765-3_28},
    editor = {P. Grzegorzewski and others},
    pages = {233--240},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {Sugeno integral-based confidence intervals for the theoretical h-index},
    volume = {315},
    year = {2015},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2015hconfint.pdf}
}

@incollection{Gagolewski2015:normalizedspread,
    author = {Marek Gagolewski},
    booktitle = {Proc. IFSA/EUSFLAT'15},
    doi = {10.2991/ifsa-eusflat-15.2015.32},
    editor = {J.M. Alonso and H. Bustince and M. Reformat},
    pages = {210--216},
    publisher = {Atlantis Press},
    title = {Normalized {WD$_p$WAM} and {WD$_p$OWA} spread measures},
    year = {2015},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2015normalizedspread.pdf}
}

@article{Gagolewski2015:spread,
    author = {Marek Gagolewski},
    doi = {10.1016/j.ejor.2014.08.034},
    journal = {European Journal of Operational Research},
    number = {2},
    pages = {469--477},
    title = {Spread measures and their relation to aggregation functions},
    volume = {241},
    year = {2015},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2015spread.pdf}
}

@article{GagolewskiGrzegorzewski2009:geometricapproach,
    author = {Marek Gagolewski and Przemysław Grzegorzewski},
    doi = {10.1007/s11192-008-2253-y},
    journal = {Scientometrics},
    number = {3},
    pages = {617--634},
    title = {A geometric approach to the construction of scientific impact indices},
    volume = {81},
    year = {2009},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2009geometricapproach.pdf}
}

@incollection{GagolewskiGrzegorzewski2010:ipmu,
    author = {Marek Gagolewski and Przemysław Grzegorzewski},
    booktitle = {Information Processing and Management of Uncertainty in Knowledge-Based Systems},
    doi = {10.1007/978-3-642-14055-6_73},
    editor = {E. Hüllermeier and others},
    pages = {693--702},
    publisher = {Springer},
    series = {Communications in Computer and Information Science},
    title = {Arity-monotonic extended aggregation operators},
    volume = {80},
    year = {2010},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2010aritymonotonic.pdf}
}

@incollection{GagolewskiGrzegorzewski2010:smps,
    author = {Marek Gagolewski and Przemysław Grzegorzewski},
    booktitle = {Combining Soft Computing and Statistical Methods in Data Analysis},
    doi = {10.1007/978-3-642-14746-3_35},
    editor = {Christian Borgelt and others},
    pages = {281--288},
    publisher = {Springer},
    series = {Advances in Intelligent and Soft Computing},
    title = {{S}-statistics and their basic properties},
    volume = {77},
    year = {2010},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2010sstatprop.pdf}
}

@incollection{GagolewskiGrzegorzewski2011:axcharquasils,
    author = {Marek Gagolewski and Przemysław Grzegorzewski},
    booktitle = {Proc. EUSFLAT/LFA'11},
    doi = {10.2991/eusflat.2011.112},
    editor = {Sylvie Galichet and others},
    location = {Aix-Les-Bains, France},
    pages = {53--58},
    publisher = {Atlantis Press},
    title = {Axiomatic characterizations of {(quasi-)} {L}-statistics and {S}-statistics and the {P}roducer {A}ssessment {P}roblem},
    year = {2011},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2011axcharquasils.pdf}
}

@article{GagolewskiGrzegorzewski2011:possibilistic,
    author = {Marek Gagolewski and Przemysław Grzegorzewski},
    doi = {10.1016/j.ijar.2011.01.010},
    journal = {International Journal of Approximate Reasoning},
    number = {9},
    pages = {1312--1324},
    title = {Possibilistic analysis of arity-monotonic aggregation operators and its relation to bibliometric impact assessment of individuals},
    volume = {52},
    year = {2011},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2011possibilistic.pdf}
}

@incollection{GagolewskiLasek2015:fuzzrelpap,
    author = {Marek Gagolewski and Jan Lasek},
    booktitle = {Proc. 7th IEEE International Conference Intelligent Systems IS'2014, Vol. 2: Tools, Architectures, Systems, Applications},
    doi = {10.1007/978-3-319-11310-4_25},
    pages = {289--300},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {The use of fuzzy relations in the assessment of information resources producers' performance},
    volume = {323},
    year = {2015}
}

@incollection{GagolewskiLasek2015:preflearn,
    author = {Marek Gagolewski and Jan Lasek},
    booktitle = {Proc. IFSA/EUSFLAT'15},
    doi = {10.2991/ifsa-eusflat-15.2015.70},
    editor = {J.M. Alonso and H. Bustince and M. Reformat},
    pages = {484--491},
    publisher = {Atlantis Press},
    title = {Learning experts' preferences from informetric data},
    year = {2015}
}

@article{GagolewskiMesiar2012:pqm,
    author = {Marek Gagolewski and Radko Mesiar},
    doi = {10.1016/j.joi.2012.05.001},
    journal = {Journal of Informetrics},
    number = {4},
    pages = {566--579},
    title = {Aggregating different paper quality measures with a generalized {h}-index},
    volume = {6},
    year = {2012},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2012pqm.pdf}
}

@article{GagolewskiMesiar2014:integrals,
    author = {Marek Gagolewski and Radko Mesiar},
    doi = {10.1016/j.ins.2013.12.004},
    journal = {Information Sciences},
    pages = {166--174},
    title = {Monotone measures and universal integrals in a uniform framework for the scientific impact assessment problem},
    volume = {263},
    year = {2014},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2014integrals.pdf}
}

@incollection{LasekGagolewski2015:fireman,
    author = {Jan Lasek and Marek Gagolewski},
    booktitle = {Proc. FedCSIS'15},
    doi = {10.15439/2015F418},
    editor = {M. Ganzha and L. Maciaszek and M. Paprzycki},
    pages = {375--380},
    publisher = {IEEE},
    title = {The winning solution to the {AAIA'15} {D}ata {M}ining {C}ompetition: {T}agging firefighter activities at a fire scene},
    year = {2015},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2015fireman.pdf}
}

@incollection{GagolewskiDebskiNowakiewicz2013:graphintegrals,
    author = {Marek Gagolewski and Michał Dębski and Michał Nowakiewicz},
    booktitle = {Proc. Uncertainty Modeling},
    editor = {Radko Mesiar and Tomas Bacigal},
    isbn = {ISBN:978-80-227-4067-8},
    pages = {17--23},
    publisher = {STU Bratislava},
    title = {Efficient algorithm for computing certain graph-based monotone integrals: {T}he {$l_p$}-indices},
    year = {2013},
    abstract = {
        The Choquet, Sugeno, and Shilkret integrals with respect to monotone
        measures are useful as tools in decision support systems.
        In this paper we propose a new class of graph-based integrals that
        generalize these three operations. Then, an efficient linear-time
        algorithm for computing their special case, that is lp-indices,
        1 ≤ p < ∞, is presented. The algorithm is based on R.L. Graham's
        routine for determining the convex hull of a finite planar set.
    },
    keywords = {monotone measures; Choquet, Sugeno, and Shilkret integral;
        lp-index; convex hull; Graham's scan; scientific impact indices},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2013graphintegrals.pdf}
}

@incollection{GagolewskiGrzegorzewski2009:possibleh,
    author = {Marek Gagolewski and Przemysław Grzegorzewski},
    booktitle = {Proc. IFSA/EUSFLAT'09},
    editor = {J. P. Carvalho and others},
    location = {Lisbon, Portugal},
    pages = {1691--1695},
    publisher = {IFSA},
    title = {Possible and necessary {h}-indices},
    year = {2009}
}

@incollection{LasekGagolewski2015:esttourmet,
    author = {Jan Lasek and Marek Gagolewski},
    booktitle = {Selected problems in information technologies (Proc. ITRIA'15 vol. 2)},
    pages = {67--78},
    publisher = {Institute of Computer Science, Polish Academy of Sciences},
    title = {Estimation of tournament metrics for association football league formats},
    year = {2015}
}

@incollection{CenaGagolewski2015:clustaginf,
    author = {Anna Cena and Marek Gagolewski},
    booktitle = {Computational methods in data analysis (Proc. ITRIA'15 vol. 1)},
    pages = {5--26},
    publisher = {Institute of Computer Science, Polish Academy of Sciences},
    title = {Clustering and aggregation of informetric data sets},
    year = {2015}
}

@incollection{Gagolewski2015:issuesmultidim,
    author = {Marek Gagolewski},
    booktitle = {Proc. 8th International Summer School on Aggregation Operators (AGOP 2015)},
    editor = {Michał Baczyński and De~Baets, Bernard and Radko Mesiar},
    isbn = {978-83-8012-519-3},
    pages = {127--132},
    publisher = {University of Silesia},
    title = {Some issues in aggregation of multidimensional data},
    year = {2015}
}

@incollection{CenaGagolewski2015:fuzzycmeansinformetric,
    author = {Anna Cena and Marek Gagolewski},
    booktitle = {Proc. 8th International Summer School on Aggregation Operators (AGOP 2015)},
    editor = {Michał Baczyński and De~Baets, Bernard and Radko Mesiar},
    isbn = {978-83-8012-519-3},
    pages = {79--84},
    publisher = {University of Silesia},
    title = {Aggregation and soft clustering of informetric data},
    year = {2015}
}

@incollection{GagolewskiGrzegorzewski2010:metodyiproblemy,
    address = {Warsaw},
    author = {Marek Gagolewski and Przemysław Grzegorzewski},
    booktitle = {Psychologia i~informatyka. Synergia i~kontradykcje},
    editor = {Tomasz Rowiński and Ryszard Tadeusiewicz},
    isbn = {978-83-707-2679-9},
    note = {in Polish},
    pages = {103--125},
    publisher = {Wyd.~UKSW},
    title = {Metody i~problemy naukometrii (Methods and problems of scientometrics)},
    year = {2010}
}

@incollection{GagolewskiGrzegorzewski2009:uogolnienieh,
    address = {Lublin},
    author = {Marek Gagolewski and Przemysław Grzegorzewski},
    booktitle = {Kadry i infrastruktura nowoczesnej nauki: Teoria i praktyka, Proc. 1st Intl. Conf. Zarządzanie Nauką},
    editor = {P. Kawalec and P. Lipski},
    isbn = {978-83-61671-12-1},
    note = {in Polish},
    pages = {15--29},
    publisher = {Wydawnictwo Lubelskiej Szkoły Biznesu},
    title = {O pewnym uogólnieniu indeksu Hirscha},
    volume = {2},
    year = {2009}
}

@incollection{RowinskiGagolewski2011:internetkryzys,
    address = {Warsaw},
    author = {Tomasz Rowiński and Marek Gagolewski},
    booktitle = {Kryzys: Pułapka czy szansa?},
    editor = {M. Jankowska and M. Starzomska},
    isbn = {978-83-609-5885-8},
    note = {in Polish},
    pages = {211--224},
    publisher = {WN Akapit},
    title = {Internet a kryzys},
    year = {2011}
}

@article{RowinskiGagolewski2007:online,
    author = {Tomasz Rowiński and Marek Gagolewski},
    journal = {Studia Psychologica UKSW},
    note = {in Polish},
    pages = {195--210},
    title = {Preferencje i postawy wobec pomocy online (Attitudes towards online counselling and psychotherapy)},
    volume = {7},
    year = {2007}
}

@phdthesis{Gagolewski2011:PhD,
    author = {Marek Gagolewski},
    note = {in Polish},
    school = {Systems Research Institute, Polish Academy of Sciences},
    title = {Aggregation operators and their application in a formal model for quality evaluation system of scientific research (Wybrane operatory agregacji i~ich zastosowanie w~modelu formalnym systemu jakości w~nauce)},
    year = {2011}
}

@proceedings{HalasETAL2019:agop2019,
    editor = {Radomír Halaš and Marek Gagolewski and Radko Mesiar},
    isbn = {978-3-030-19493-2},
    pages = {348},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {New Trends in Aggregation Theory},
    volume = {981},
    doi = {10.1007/978-3-030-19494-9},
    year = {2019}
}

@proceedings{GrzegorzewskiETAL2015:smps2014,
    editor = {Przemysław Grzegorzewski and Marek Gagolewski and Olgierd Hryniewicz and María Ángeles Gil},
    isbn = {978-3-319-10764-6},
    pages = {294},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {Strengthening Links Between Data Analysis and Soft Computing},
    doi = {10.1007/978-3-319-10765-3},
    volume = {315},
    year = {2015}
}

@proceedings{FerraroETAL2017:smps2016,
    editor = {Maria Brigida Ferraro and Paulo Giordani and Barbara Vantaggi
        and Marek Gagolewski and María Ángeles Gil and Przemysław Grzegorzewski
        and Olgierd Hryniewicz},
    isbn = {978-3-319-42971-7},
    pages = {535},
    publisher = {Springer},
    series = {Advances in Intelligent Systems and Computing},
    title = {Soft Methods for Data Science},
    doi = {10.1007/978-3-319-42972-4},
    volume = {456},
    year = {2017}
}

@book{GagolewskiETAL2016:Pythonksiazka,
    address = {Warsaw},
    author = {Marek Gagolewski and Maciej Bartoszuk and Anna Cena},
    isbn = {978-83-01-18940-2},
    note = {🇵🇱},
    pages = {369},
    publisher = {Wydawnictwo Naukowe PWN},
    title = {Przetwarzanie i analiza danych w języku {Python} (Data Processing and Analysis in Python)},
    year = {2016},
    url = {https://github.com/gagolews/Analiza_danych_w_jezyku_Python}
}

@book{Gagolewski2016:Rksiazka,
    address = {Warsaw},
    author = {Marek Gagolewski},
    edition = {2nd},
    isbn = {978-83-01-18939-6},
    note = {🇵🇱},
    pages = {550},
    publisher = {Wydawnictwo Naukowe PWN},
    title = {Programowanie w języku {R}. {A}naliza danych, obliczenia, symulacje (R Programming. Data Analysis, Computing, Simulations)},
    year = {2016},
    url = {https://github.com/gagolews/Programowanie_w_jezyku_R}
}

@book{Gagolewski2015:datafusionbook,
    author = {Marek Gagolewski},
    isbn = {978-83-63159-20-7},
    note = {🔓},
    pages = {290},
    publisher = {Institute of Computer Science, Polish Academy of Sciences},
    address = {Warsaw},
    title = {Data Fusion: {T}heory, Methods, and Applications},
    year = {2015},
    url = {https://github.com/gagolews/datafusion},
    preprint = {https://raw.githubusercontent.com/gagolews/datafusion/master/datafusion.pdf},
    keywords = {data aggregation, data fusion, means, t-norms, spread measures,
        multidimensional data, strings},
    abstract = {
        A proper fusion of complex data is of interest to many researchers
        in diverse fields, including computational statistics, computational
        geometry, bioinformatics, machine learning, pattern recognition,
        quality management, engineering, statistics, finance, economics, etc.
        It plays a crucial role in: synthetic description of data processes
        or whole domains, creation of rule bases for approximate reasoning
        tasks, reaching consensus and selection of the optimal strategy in
        decision support systems, imputation of missing values, data
        deduplication and consolidation, record linkage across heterogeneous
        databases, and clustering. This open-access research monograph
        integrates the spread-out results from different domains using the
        methodology of the well-established classical aggregation framework,
        introduces researchers and practitioners to Aggregation 2.0,
        as well as points out the challenges and interesting directions
        for further research.
    }
}

@book{Gagolewski2014:Rksiazka,
    address = {Warsaw},
    author = {Marek Gagolewski},
    edition = {1st},
    isbn = {978-83-01-17461-3},
    note = {🇵🇱},
    pages = {494},
    publisher = {Wydawnictwo Naukowe PWN},
    title = {Programowanie w języku {R}. {A}naliza danych, obliczenia, symulacje (R Programming. Data Analysis, Computing, Simulations)},
    year = {2014}
}

@book{GrzegorzewskiETAL2014:wnioskowaniestatystyczne,
    address = {Warsaw},
    author = {Przemysław Grzegorzewski and Marek Gagolewski and Konstancja Bobecka-Wesołowska},
    isbn = {978-83-93-72601-1},
    note = {🇵🇱 🔓},
    pages = {183},
    publisher = {Politechnika Warszawska},
    title = {Wnioskowanie statystyczne z wykorzystaniem środowiska R (Statistical Inference with R)},
    year = {2014},
    preprint = {https://raw.githubusercontent.com/gagolews/bibliography/master/preprints/2014wnioskowaniestatystyczne.pdf}
}