@inproceedings{bhargava-etal-2021-generalization, title = "Generalization in {NLI}: Ways (Not) To Go Beyond Simple Heuristics", author = "Bhargava, Prajjwal and Drozd, Aleksandr and Rogers, Anna", booktitle = "Proceedings of the Second Workshop on Insights from Negative Results in NLP", month = nov, year = "2021", address = "Online and Punta Cana, Dominican Republic", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.insights-1.18", doi = "10.18653/v1/2021.insights-1.18", pages = "125--135", abstract = "Much of recent progress in NLU was shown to be due to models{'} learning dataset-specific heuristics. We conduct a case study of generalization in NLI (from MNLI to the adversarially constructed HANS dataset) in a range of BERT-based architectures (adapters, Siamese Transformers, HEX debiasing), as well as with subsampling the data and increasing the model size. We report 2 successful and 3 unsuccessful strategies, all providing insights into how Transformer-based models learn to generalize.", }